Skip to content

Commit f785414

Browse files
committed
Also support importing likes
1 parent fbb3e15 commit f785414

File tree

4 files changed

+145
-23
lines changed

4 files changed

+145
-23
lines changed

config.json.sample

+1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@
33
"api_key": "API_KEY",
44
"topics_query_id": "QUERY_ID",
55
"posts_query_id": "QUERY_ID",
6+
"likes_query_id": "QUERY_ID",
67
"domain": "YOUR_DISCOURSE.com"
78
}

download_topics.rb

+86-23
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
API_USERNAME = config["api_username"]
1818
TOPIC_QUERY_ID = config["topics_query_id"]
1919
POST_QUERY_ID = config["posts_query_id"]
20+
LIKES_QUERY_ID = config["likes_query_id"]
2021

2122
sqlite_conn = SQLite3::Database.new("dump.db")
2223
conn = MiniSql::Connection.get(sqlite_conn)
@@ -37,11 +38,17 @@ def run_report(query_id:, min_id: 0, limit:)
3738
request["Api-Username"] = API_USERNAME
3839

3940
response = http.request(request)
41+
if response.code != "200"
42+
puts "Error: #{response.code} #{response.message}"
43+
puts response.body
44+
exit 1
45+
end
46+
4047
JSON.parse(response.body)
4148
end
4249

4350
def create_schema(conn)
44-
conn.exec <<-SQL
51+
conn.exec <<~SQL
4552
CREATE TABLE IF NOT EXISTS topics (
4653
id INTEGER PRIMARY KEY,
4754
category,
@@ -52,15 +59,15 @@ def create_schema(conn)
5259
)
5360
SQL
5461

55-
conn.exec <<-SQL
62+
conn.exec <<~SQL
5663
CREATE TABLE IF NOT EXISTS users(
5764
id INTEGER PRIMARY KEY,
5865
username,
5966
name
6067
)
6168
SQL
6269

63-
conn.exec <<-SQL
70+
conn.exec <<~SQL
6471
CREATE TABLE IF NOT EXISTS posts(
6572
id INTEGER PRIMARY KEY,
6673
raw,
@@ -71,7 +78,21 @@ def create_schema(conn)
7178
)
7279
SQL
7380

74-
conn.exec("create index idxTopic on posts(topic_id,post_number)")
81+
conn.exec <<~SQL
82+
CREATE TABLE IF NOT EXISTS likes(
83+
post_id,
84+
user_id,
85+
created_at
86+
)
87+
SQL
88+
89+
conn.exec(
90+
"create unique index IF NOT EXISTS idxLikes on likes(post_id,user_id)"
91+
)
92+
93+
conn.exec(
94+
"create index IF NOT EXISTS idxTopic on posts(topic_id,post_number)"
95+
)
7596
end
7697

7798
def load_posts(conn, rows)
@@ -139,32 +160,74 @@ def load_users_from_json(conn, json)
139160
end
140161
end
141162

142-
create_schema(conn)
163+
def load_likes(conn, json)
164+
result = { highest_id: 0, likes_loaded: 0 }
165+
166+
conn.exec "BEGIN TRANSACTION"
167+
168+
json["rows"].each do |row|
169+
conn.exec <<~SQL, *row
170+
-- id: ?
171+
INSERT OR IGNORE INTO likes(post_id, user_id, created_at)
172+
VALUES (?, ?, ?)
173+
SQL
174+
result[:highest_id] = row[0] if row[0] > result[:highest_id]
175+
result[:likes_loaded] += 1
176+
end
143177

144-
min_id = 0
145-
while true
146-
response_data =
147-
run_report(query_id: TOPIC_QUERY_ID, min_id: min_id, limit: 10_000)
178+
conn.exec "COMMIT TRANSACTION"
179+
180+
result
181+
end
182+
183+
def download_topics(conn)
184+
min_id = 0
185+
while true
186+
response_data =
187+
run_report(query_id: TOPIC_QUERY_ID, min_id: min_id, limit: 10_000)
188+
189+
load_users_from_json(conn, response_data)
190+
191+
result = load_topics(conn, response_data["rows"])
192+
puts "Loaded #{result[:topics_loaded]} topics (highest id is #{result[:highest_id]})"
193+
194+
min_id = result[:highest_id]
195+
break if result[:topics_loaded] == 0
196+
end
197+
end
148198

149-
load_users_from_json(conn, response_data)
199+
def download_posts(conn)
200+
min_id = 0
201+
while true
202+
response_data =
203+
run_report(query_id: POST_QUERY_ID, min_id: min_id, limit: 10_000)
150204

151-
result = load_topics(conn, response_data["rows"])
152-
puts "Loaded #{result[:topics_loaded]} topics (highest id is #{result[:highest_id]})"
205+
load_users_from_json(conn, response_data)
153206

154-
min_id = result[:highest_id]
155-
break if result[:topics_loaded] == 0
207+
result = load_posts(conn, response_data["rows"])
208+
puts "Loaded #{result[:posts_loaded]} posts (highest id is #{result[:highest_id]})"
209+
210+
min_id = result[:highest_id]
211+
break if result[:posts_loaded] == 0
212+
end
156213
end
157214

158-
min_id = 0
159-
while true
160-
response_data =
161-
run_report(query_id: POST_QUERY_ID, min_id: min_id, limit: 10_000)
215+
def download_likes(conn)
216+
min_id = 0
217+
while true
218+
response_data =
219+
run_report(query_id: LIKES_QUERY_ID, min_id: min_id, limit: 10_000)
162220

163-
load_users_from_json(conn, response_data)
221+
result = load_likes(conn, response_data)
164222

165-
result = load_posts(conn, response_data["rows"])
166-
puts "Loaded #{result[:posts_loaded]} posts (highest id is #{result[:highest_id]})"
223+
puts "Loaded #{result[:likes_loaded]} likes (highest id is #{result[:highest_id]})"
167224

168-
min_id = result[:highest_id]
169-
break if result[:posts_loaded] == 0
225+
min_id = result[:highest_id]
226+
break if result[:likes_loaded] == 0
227+
end
170228
end
229+
230+
create_schema(conn)
231+
download_topics(conn)
232+
download_posts(conn)
233+
download_likes(conn)

import_db.rb

+36
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,45 @@ def import_posts(conn)
123123
end
124124
end
125125

126+
def import_likes(conn)
127+
bang
128+
puts "creating likes..."
129+
130+
created = 0
131+
conn
132+
.query("SELECT post_id,user_id,created_at FROM likes order by id asc")
133+
.each_slice(100) do |slice|
134+
PostAction.transaction do
135+
slice.each do |row|
136+
if DB.query(
137+
"SELECT 1 FROM post_actions where (post_id = ? and user_id = ? and post_action_type_id = ?)",
138+
row.post_id,
139+
row.user_id,
140+
2
141+
).blank?
142+
p =
143+
PostAction.new(
144+
post_id: row.post_id,
145+
user_id: row.user_id,
146+
post_action_type_id: 2,
147+
created_at: row.created_at,
148+
updated_at: row.created_at
149+
)
150+
p.save!(validate: false)
151+
print "."
152+
end
153+
154+
created += 1
155+
puts "#{created} likes created" if created % 500 == 0
156+
end
157+
end
158+
end
159+
end
160+
126161
import_users(conn)
127162
import_topics(conn)
128163
import_posts(conn)
164+
import_likes(conn)
129165

130166
Jobs::EnsureDbConsistency.new.execute(nil)
131167
Topic.reset_all_highest!

likes_query.sql

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
-- [params]
2+
-- int :min_id = 0
3+
4+
SELECT id, post_id pid, user_id uid, created_at
5+
FROM post_actions
6+
WHERE post_action_type_id = 2
7+
AND deleted_at is NULL
8+
AND id > :min_id
9+
AND post_id IN (
10+
SELECT p.id
11+
FROM topics t
12+
JOIN posts p ON p.topic_id = t.id
13+
JOIN categories c ON c.id = t.category_id
14+
WHERE
15+
NOT c.read_restricted
16+
AND t.deleted_at IS NULL
17+
AND p.deleted_at IS NULL
18+
AND p.post_type = 1
19+
AND NOT p.hidden
20+
)
21+
ORDER BY id ASC
22+

0 commit comments

Comments
 (0)