-
Notifications
You must be signed in to change notification settings - Fork 0
/
Rakefile
84 lines (71 loc) · 2.09 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# http://data.stackexchange.com/stackoverflow/query/new
# requires ruby >= 2.0
require 'rubygems'
require 'bundler/setup'
require_relative 'lib/importer'
require_relative 'lib/exporter'
def source_directory
ENV['source'] || File.join(__dir__, 'data', 'stackoverflow.com')
end
def mode
(ENV['mode'] || :forked).to_sym
end
def tag
ENV['tag'] || 'ios'
end
def docset_name
ENV['docset'] || "#{File.basename(source_directory)} #{tag}"
end
def output
docset_name.gsub(/\s/, '_')
end
desc 'Imports XML files into the database'
task :import do
puts "Importing"
puts "------------"
files = YAML::load_file(File.join(__dir__, 'config', 'files.yml'))
importer = Importer.new files: files, source_directory: source_directory, mode: mode
importer.import
puts "\n"
end
desc 'Add indices to database columns where needed'
task :index do
puts "Indexing"
puts "------------"
db = Database.new_connection
indexes = {
'posts' => ['Id', 'ParentId', 'AcceptedAnswerId', 'export'],
'comments' => ['Id', 'PostId']
}
indexes.each do |table, columns|
columns.each do |column|
start = Time.now
puts "Adding index on #{table}.#{column}..."
db.exec "CREATE INDEX ON #{table} (#{column})"
puts "Total time indexing #{table}.#{column}: #{Time.now - start} s"
end
end
puts "\n"
end
desc 'Marks which questions are to be exported'
task :mark_for_export do
puts "Marking"
puts "------------"
db = Database.new_connection
start = Time.now
puts "Resetting export flag..."
db.exec 'UPDATE posts SET export = false'
puts "Total time resetting export flag: #{Time.now - start}"
start = Time.now
puts "Marking questions with tag #{tag}..."
db.exec "UPDATE posts SET export = true WHERE ParentId IS NULL AND AcceptedAnswerId IS NOT NULL AND Tags LIKE '%#{tag}%'"
puts "Total time marking #{tag} tagged questions: #{Time.now - start}"
puts "\n"
end
task :export do
puts "Exporting"
puts "------------"
exporter = Exporter.new name: docset_name, output: output, mode: mode
exporter.export
end
task :default => [:import, :index, :mark_for_export, :export]