FEATURE: StackOverflow importer
This commit is contained in:
parent
bc75cfe4b5
commit
685083491e
1
Gemfile
1
Gemfile
|
@ -184,6 +184,7 @@ if ENV["IMPORT"] == "1"
|
|||
gem 'sqlite3', '~> 1.3.13'
|
||||
gem 'ruby-bbcode-to-md', github: 'nlalonde/ruby-bbcode-to-md'
|
||||
gem 'reverse_markdown'
|
||||
gem 'tiny_tds'
|
||||
end
|
||||
|
||||
gem 'webpush', require: false
|
||||
|
|
|
@ -505,4 +505,4 @@ DEPENDENCIES
|
|||
webpush
|
||||
|
||||
BUNDLED WITH
|
||||
1.16.1
|
||||
1.16.2
|
||||
|
|
|
@ -0,0 +1,290 @@
|
|||
# cf. https://github.com/rails-sqlserver/tiny_tds#install
|
||||
require "tiny_tds"
|
||||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||||
|
||||
class ImportScripts::StackOverflow < ImportScripts::Base
|
||||
|
||||
BATCH_SIZE ||= 1000
|
||||
|
||||
def initialize
|
||||
super
|
||||
|
||||
@client = TinyTds::Client.new(
|
||||
host: ENV["DB_HOST"],
|
||||
username: ENV["DB_USERNAME"],
|
||||
password: ENV["DB_PASSWORD"],
|
||||
database: ENV["DB_NAME"],
|
||||
)
|
||||
end
|
||||
|
||||
def execute
|
||||
SiteSetting.tagging_enabled = true
|
||||
|
||||
# TODO: import_groups
|
||||
import_users
|
||||
|
||||
import_topics
|
||||
import_posts
|
||||
|
||||
import_likes
|
||||
|
||||
mark_topics_as_solved
|
||||
end
|
||||
|
||||
def import_users
|
||||
puts "", "Importing users..."
|
||||
|
||||
last_user_id = -1
|
||||
total = query("SELECT COUNT(*) count FROM Users WHERE Id > 0").first["count"]
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
users = query(<<~SQL
|
||||
SELECT TOP #{BATCH_SIZE}
|
||||
Id
|
||||
, UserTypeId
|
||||
, CreationDate
|
||||
, LastLoginDate
|
||||
, LastLoginIP
|
||||
, Email
|
||||
, DisplayName
|
||||
, WebsiteUrl
|
||||
, RealName
|
||||
, Location
|
||||
, Birthday
|
||||
, ProfileImageUrl
|
||||
FROM Users
|
||||
WHERE Id > 0
|
||||
AND Id > #{last_user_id}
|
||||
ORDER BY Id
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
break if users.empty?
|
||||
|
||||
last_user_id = users[-1]["Id"]
|
||||
user_ids = users.map { |u| u["Id"] }
|
||||
|
||||
next if all_records_exist?(:users, user_ids)
|
||||
|
||||
create_users(users, total: total, offset: offset) do |u|
|
||||
{
|
||||
id: u["Id"],
|
||||
admin: u["UserTypeId"] == 4,
|
||||
created_at: u["CreationDate"],
|
||||
last_seen_at: u["LastLoginDate"],
|
||||
ip_address: u["LastLoginIP"],
|
||||
email: u["Email"],
|
||||
username: u["DisplayName"],
|
||||
website: u["WebsiteUrl"],
|
||||
name: u["RealName"],
|
||||
location: u["Location"],
|
||||
date_of_birth: u["Birthday"],
|
||||
post_create_action: proc do |user|
|
||||
if u["ProfileImageUrl"].present?
|
||||
UserAvatar.import_url_for_user(u["ProfileImageUrl"], user) rescue nil
|
||||
end
|
||||
end
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_topics
|
||||
puts "", "Importing topics..."
|
||||
|
||||
last_post_id = -1
|
||||
total = query("SELECT COUNT(*) count FROM Posts WHERE PostTypeId IN (1,3)").first["count"]
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
posts = query(<<~SQL
|
||||
SELECT TOP #{BATCH_SIZE}
|
||||
Id
|
||||
, PostTypeId
|
||||
, CreationDate
|
||||
, Body
|
||||
, OwnerUserId
|
||||
, Title
|
||||
, Tags
|
||||
, DeletionDate
|
||||
, CASE WHEN (ClosedDate IS NOT NULL OR LockedDate IS NOT NULL) THEN 1 ELSE 0 END AS Closed
|
||||
FROM Posts
|
||||
WHERE PostTypeId IN (1,3)
|
||||
AND Id > #{last_post_id}
|
||||
ORDER BY Id
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
break if posts.empty?
|
||||
|
||||
last_post_id = posts[-1]["Id"]
|
||||
post_ids = posts.map { |p| p["Id"] }
|
||||
|
||||
next if all_records_exist?(:posts, post_ids)
|
||||
|
||||
create_posts(posts, total: total, offset: offset) do |p|
|
||||
{
|
||||
id: p["Id"],
|
||||
wiki: p["PostTypeId"] == 3,
|
||||
created_at: p["CreationDate"],
|
||||
raw: HtmlToMarkdown.new(p["Body"]).to_markdown,
|
||||
user_id: user_id_from_imported_user_id(p["OwnerUserId"]) || -1,
|
||||
title: p["Title"],
|
||||
tags: p["Tags"].split("|"),
|
||||
deleted_at: p["DeletionDate"],
|
||||
closed: p["Closed"] == 1,
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_posts
|
||||
puts "", "Importing posts..."
|
||||
|
||||
last_post_id = -1
|
||||
total = query("SELECT COUNT(*) count FROM Posts WHERE PostTypeId = 2").first["count"] +
|
||||
query("SELECT COUNT(*) count FROM PostComments WHERE PostId IN (SELECT Id FROM Posts WHERE PostTypeId = 2)").first["count"]
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
posts = query(<<~SQL
|
||||
SELECT TOP #{BATCH_SIZE}
|
||||
Id
|
||||
, CreationDate
|
||||
, Body
|
||||
, OwnerUserId AS UserId
|
||||
, ParentId
|
||||
, IsAcceptedAnswer
|
||||
FROM Posts
|
||||
WHERE PostTypeId = 2
|
||||
AND Id > #{last_post_id}
|
||||
ORDER BY Id
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
break if posts.empty?
|
||||
|
||||
last_post_id = posts[-1]["Id"]
|
||||
post_ids = posts.map { |p| p["Id"] }
|
||||
|
||||
comments = query(<<~SQL
|
||||
SELECT CONCAT('Comment-', Id) AS Id
|
||||
, PostId AS ParentId
|
||||
, Text AS Body
|
||||
, CreationDate
|
||||
, UserId
|
||||
FROM PostComments
|
||||
WHERE PostId IN (#{post_ids.join(",")})
|
||||
ORDER BY Id
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
posts_and_comments = (posts + comments).sort_by { |p| p["CreationDate"] }
|
||||
post_and_comment_ids = posts_and_comments.map { |p| p["Id"] }
|
||||
|
||||
next if all_records_exist?(:posts, post_and_comment_ids)
|
||||
|
||||
create_posts(posts_and_comments) do |p|
|
||||
next unless t = topic_lookup_from_imported_post_id(p["ParentId"])
|
||||
|
||||
post = {
|
||||
id: p["Id"],
|
||||
created_at: p["CreationDate"],
|
||||
raw: HtmlToMarkdown.new(p["Body"]).to_markdown,
|
||||
user_id: user_id_from_imported_user_id(p["UserId"]) || -1,
|
||||
topic_id: t[:topic_id],
|
||||
reply_to_post_number: t[:post_number],
|
||||
}
|
||||
|
||||
post[:custom_fields] = { is_accepted_answer: true } if p["IsAcceptedAnswer"]
|
||||
|
||||
post
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
LIKE ||= PostActionType.types[:like]
|
||||
|
||||
def import_likes
|
||||
puts "", "Importing likes..."
|
||||
|
||||
last_like_id = -1
|
||||
total = query("SELECT COUNT(*) count FROM Posts2Votes WHERE VoteTypeId = 2 AND DeletionDate IS NULL").first["count"]
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
likes = query(<<~SQL
|
||||
SELECT TOP #{BATCH_SIZE}
|
||||
Id
|
||||
, PostId
|
||||
, UserId
|
||||
, CreationDate
|
||||
FROM Posts2Votes
|
||||
WHERE VoteTypeId = 2
|
||||
AND DeletionDate IS NULL
|
||||
AND Id > #{last_like_id}
|
||||
ORDER BY Id
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
break if likes.empty?
|
||||
|
||||
last_like_id = likes[-1]["Id"]
|
||||
|
||||
likes.each do |l|
|
||||
next unless user_id = user_id_from_imported_user_id(l["UserId"])
|
||||
next unless post_id = post_id_from_imported_post_id(l["PostId"])
|
||||
next unless user = User.find_by(id: user_id)
|
||||
next unless post = Post.find_by(id: post_id)
|
||||
PostAction.act(user, post, LIKE) rescue nil
|
||||
end
|
||||
end
|
||||
|
||||
last_like_id = -1
|
||||
total = query("SELECT COUNT(*) count FROM Comments2Votes WHERE VoteTypeId = 2 AND DeletionDate IS NULL").first["count"]
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
likes = query(<<~SQL
|
||||
SELECT TOP #{BATCH_SIZE}
|
||||
Id
|
||||
, CONCAT('Comment-', PostCommentId) AS PostCommentId
|
||||
, UserId
|
||||
, CreationDate
|
||||
FROM Comments2Votes
|
||||
WHERE VoteTypeId = 2
|
||||
AND DeletionDate IS NULL
|
||||
AND Id > #{last_like_id}
|
||||
ORDER BY Id
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
break if likes.empty?
|
||||
|
||||
last_like_id = likes[-1]["Id"]
|
||||
|
||||
likes.each do |l|
|
||||
next unless user_id = user_id_from_imported_user_id(l["UserId"])
|
||||
next unless post_id = post_id_from_imported_post_id(l["PostCommentId"])
|
||||
next unless user = User.find_by(id: user_id)
|
||||
next unless post = Post.find_by(id: post_id)
|
||||
PostAction.act(user, post, LIKE) rescue nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def mark_topics_as_solved
|
||||
puts "", "Marking topics as solved..."
|
||||
|
||||
Topic.exec_sql <<~SQL
|
||||
INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at)
|
||||
SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at
|
||||
FROM post_custom_fields pcf
|
||||
JOIN posts p ON p.id = pcf.post_id
|
||||
WHERE pcf.name = 'is_accepted_answer'
|
||||
SQL
|
||||
end
|
||||
|
||||
def query(sql)
|
||||
@client.execute(sql)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
ImportScripts::StackOverflow.new.perform
|
Loading…
Reference in New Issue