2019-05-02 18:17:27 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2018-06-04 10:57:12 -04:00
|
|
|
# cf. https://github.com/rails-sqlserver/tiny_tds#install
|
|
|
|
require "tiny_tds"
|
|
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
|
|
|
|
class ImportScripts::StackOverflow < ImportScripts::Base
|
2024-11-05 17:27:49 -05:00
|
|
|
BATCH_SIZE = 1000
|
2018-06-04 10:57:12 -04:00
|
|
|
|
|
|
|
def initialize
|
|
|
|
super
|
|
|
|
|
|
|
|
@client =
|
|
|
|
TinyTds::Client.new(
|
|
|
|
host: ENV["DB_HOST"],
|
|
|
|
username: ENV["DB_USERNAME"],
|
|
|
|
password: ENV["DB_PASSWORD"],
|
|
|
|
database: ENV["DB_NAME"],
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
|
|
|
def execute
|
|
|
|
SiteSetting.tagging_enabled = true
|
|
|
|
|
|
|
|
# TODO: import_groups
|
|
|
|
import_users
|
|
|
|
import_posts
|
|
|
|
import_likes
|
|
|
|
mark_topics_as_solved
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_users
|
|
|
|
puts "", "Importing users..."
|
|
|
|
|
|
|
|
last_user_id = -1
|
|
|
|
total = query("SELECT COUNT(*) count FROM Users WHERE Id > 0").first["count"]
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
users = query(<<~SQL).to_a
|
|
|
|
SELECT TOP #{BATCH_SIZE}
|
|
|
|
Id
|
|
|
|
, UserTypeId
|
|
|
|
, CreationDate
|
|
|
|
, LastLoginDate
|
|
|
|
, LastLoginIP
|
|
|
|
, Email
|
|
|
|
, DisplayName
|
|
|
|
, WebsiteUrl
|
|
|
|
, RealName
|
|
|
|
, Location
|
|
|
|
, Birthday
|
|
|
|
, ProfileImageUrl
|
|
|
|
FROM Users
|
|
|
|
WHERE Id > 0
|
|
|
|
AND Id > #{last_user_id}
|
|
|
|
ORDER BY Id
|
|
|
|
SQL
|
|
|
|
|
|
|
|
break if users.empty?
|
|
|
|
|
|
|
|
last_user_id = users[-1]["Id"]
|
|
|
|
user_ids = users.map { |u| u["Id"] }
|
|
|
|
|
|
|
|
next if all_records_exist?(:users, user_ids)
|
|
|
|
|
|
|
|
create_users(users, total: total, offset: offset) do |u|
|
|
|
|
{
|
|
|
|
id: u["Id"],
|
|
|
|
admin: u["UserTypeId"] == 4,
|
|
|
|
created_at: u["CreationDate"],
|
|
|
|
last_seen_at: u["LastLoginDate"],
|
|
|
|
ip_address: u["LastLoginIP"],
|
|
|
|
email: u["Email"],
|
|
|
|
username: u["DisplayName"],
|
|
|
|
website: u["WebsiteUrl"],
|
|
|
|
name: u["RealName"],
|
|
|
|
location: u["Location"],
|
|
|
|
date_of_birth: u["Birthday"],
|
|
|
|
post_create_action:
|
|
|
|
proc do |user|
|
|
|
|
if u["ProfileImageUrl"].present?
|
2023-01-07 06:53:14 -05:00
|
|
|
begin
|
2018-06-04 10:57:12 -04:00
|
|
|
UserAvatar.import_url_for_user(u["ProfileImageUrl"], user)
|
|
|
|
rescue StandardError
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
end
|
2023-01-07 06:53:14 -05:00
|
|
|
end,
|
2018-06-04 10:57:12 -04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-06-05 12:22:42 -04:00
|
|
|
def import_posts
|
|
|
|
puts "", "Importing posts..."
|
2018-06-04 10:57:12 -04:00
|
|
|
|
|
|
|
last_post_id = -1
|
2018-06-05 12:22:42 -04:00
|
|
|
total =
|
|
|
|
query("SELECT COUNT(*) count FROM Posts WHERE PostTypeId IN (1,2,3)").first["count"] +
|
|
|
|
query(
|
|
|
|
"SELECT COUNT(*) count FROM PostComments WHERE PostId IN (SELECT Id FROM Posts WHERE PostTypeId IN (1,2,3))",
|
|
|
|
).first[
|
|
|
|
"count"
|
|
|
|
]
|
2018-06-04 10:57:12 -04:00
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
posts = query(<<~SQL).to_a
|
|
|
|
SELECT TOP #{BATCH_SIZE}
|
|
|
|
Id
|
|
|
|
, PostTypeId
|
|
|
|
, CreationDate
|
|
|
|
, Body
|
2018-06-05 12:22:42 -04:00
|
|
|
, OwnerUserId AS UserId
|
2018-06-04 10:57:12 -04:00
|
|
|
, Title
|
|
|
|
, Tags
|
|
|
|
, DeletionDate
|
|
|
|
, ParentId
|
|
|
|
, IsAcceptedAnswer
|
2018-06-05 12:22:42 -04:00
|
|
|
, CASE WHEN (ClosedDate IS NOT NULL OR LockedDate IS NOT NULL) THEN 1 ELSE 0 END AS Closed
|
2018-06-04 10:57:12 -04:00
|
|
|
FROM Posts
|
2018-06-05 12:22:42 -04:00
|
|
|
WHERE PostTypeId IN (1,2,3)
|
2018-06-04 10:57:12 -04:00
|
|
|
AND Id > #{last_post_id}
|
|
|
|
ORDER BY Id
|
|
|
|
SQL
|
|
|
|
|
|
|
|
break if posts.empty?
|
|
|
|
|
|
|
|
last_post_id = posts[-1]["Id"]
|
|
|
|
post_ids = posts.map { |p| p["Id"] }
|
|
|
|
|
|
|
|
comments = query(<<~SQL).to_a
|
|
|
|
SELECT CONCAT('Comment-', Id) AS Id
|
|
|
|
, PostId AS ParentId
|
2018-06-05 12:22:42 -04:00
|
|
|
, Text
|
2018-06-04 10:57:12 -04:00
|
|
|
, CreationDate
|
|
|
|
, UserId
|
|
|
|
FROM PostComments
|
|
|
|
WHERE PostId IN (#{post_ids.join(",")})
|
|
|
|
ORDER BY Id
|
|
|
|
SQL
|
|
|
|
|
|
|
|
posts_and_comments = (posts + comments).sort_by { |p| p["CreationDate"] }
|
|
|
|
post_and_comment_ids = posts_and_comments.map { |p| p["Id"] }
|
|
|
|
|
|
|
|
next if all_records_exist?(:posts, post_and_comment_ids)
|
|
|
|
|
2018-06-05 12:22:42 -04:00
|
|
|
create_posts(posts_and_comments, total: total, offset: offset) do |p|
|
|
|
|
raw = p["Body"].present? ? HtmlToMarkdown.new(p["Body"]).to_markdown : p["Text"]
|
2018-06-04 10:57:12 -04:00
|
|
|
|
|
|
|
post = {
|
|
|
|
id: p["Id"],
|
|
|
|
created_at: p["CreationDate"],
|
2018-06-05 12:22:42 -04:00
|
|
|
raw: raw,
|
2018-06-04 10:57:12 -04:00
|
|
|
user_id: user_id_from_imported_user_id(p["UserId"]) || -1,
|
|
|
|
}
|
|
|
|
|
2018-06-05 12:22:42 -04:00
|
|
|
if p["Title"].present?
|
|
|
|
post[:wiki] = p["PostTypeId"] = 3
|
|
|
|
post[:title] = p["Title"]
|
|
|
|
post[:tags] = p["Tags"].split("|")
|
|
|
|
post[:deleted_at] = p["DeletionDate"]
|
|
|
|
post[:closed] = p["Closed"] == 1
|
|
|
|
elsif t = topic_lookup_from_imported_post_id(p["ParentId"])
|
|
|
|
post[:custom_fields] = { is_accepted_answer: true } if p["IsAcceptedAnswer"]
|
|
|
|
post[:topic_id] = t[:topic_id]
|
|
|
|
post[:reply_to_post_number] = t[:post_number]
|
|
|
|
else
|
|
|
|
puts "", "", "#{p["Id"]} was not imported", "", ""
|
|
|
|
next
|
|
|
|
end
|
2018-06-04 10:57:12 -04:00
|
|
|
|
|
|
|
post
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_likes
|
2018-06-05 12:22:42 -04:00
|
|
|
puts "", "Importing post likes..."
|
2018-06-04 10:57:12 -04:00
|
|
|
|
|
|
|
last_like_id = -1
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
likes = query(<<~SQL).to_a
|
|
|
|
SELECT TOP #{BATCH_SIZE}
|
|
|
|
Id
|
|
|
|
, PostId
|
|
|
|
, UserId
|
|
|
|
, CreationDate
|
|
|
|
FROM Posts2Votes
|
|
|
|
WHERE VoteTypeId = 2
|
|
|
|
AND DeletionDate IS NULL
|
|
|
|
AND Id > #{last_like_id}
|
|
|
|
ORDER BY Id
|
|
|
|
SQL
|
|
|
|
|
|
|
|
break if likes.empty?
|
|
|
|
|
|
|
|
last_like_id = likes[-1]["Id"]
|
|
|
|
|
|
|
|
likes.each do |l|
|
|
|
|
next unless user_id = user_id_from_imported_user_id(l["UserId"])
|
|
|
|
next unless post_id = post_id_from_imported_post_id(l["PostId"])
|
|
|
|
next unless user = User.find_by(id: user_id)
|
|
|
|
next unless post = Post.find_by(id: post_id)
|
2023-01-07 06:53:14 -05:00
|
|
|
begin
|
2019-01-03 12:03:01 -05:00
|
|
|
PostActionCreator.like(user, post)
|
|
|
|
rescue StandardError
|
|
|
|
nil
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-06-04 10:57:12 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2018-06-05 12:22:42 -04:00
|
|
|
puts "", "Importing comment likes..."
|
|
|
|
|
2018-06-04 10:57:12 -04:00
|
|
|
last_like_id = -1
|
|
|
|
total =
|
|
|
|
query(
|
|
|
|
"SELECT COUNT(*) count FROM Comments2Votes WHERE VoteTypeId = 2 AND DeletionDate IS NULL",
|
|
|
|
).first[
|
|
|
|
"count"
|
|
|
|
]
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
likes = query(<<~SQL).to_a
|
|
|
|
SELECT TOP #{BATCH_SIZE}
|
|
|
|
Id
|
|
|
|
, CONCAT('Comment-', PostCommentId) AS PostCommentId
|
|
|
|
, UserId
|
|
|
|
, CreationDate
|
|
|
|
FROM Comments2Votes
|
|
|
|
WHERE VoteTypeId = 2
|
|
|
|
AND DeletionDate IS NULL
|
|
|
|
AND Id > #{last_like_id}
|
|
|
|
ORDER BY Id
|
|
|
|
SQL
|
|
|
|
|
|
|
|
break if likes.empty?
|
|
|
|
|
|
|
|
last_like_id = likes[-1]["Id"]
|
|
|
|
|
|
|
|
likes.each do |l|
|
|
|
|
next unless user_id = user_id_from_imported_user_id(l["UserId"])
|
|
|
|
next unless post_id = post_id_from_imported_post_id(l["PostCommentId"])
|
|
|
|
next unless user = User.find_by(id: user_id)
|
|
|
|
next unless post = Post.find_by(id: post_id)
|
2023-01-07 06:53:14 -05:00
|
|
|
begin
|
2019-01-03 12:03:01 -05:00
|
|
|
PostActionCreator.like(user, post)
|
|
|
|
rescue StandardError
|
|
|
|
nil
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-06-04 10:57:12 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def mark_topics_as_solved
|
|
|
|
puts "", "Marking topics as solved..."
|
|
|
|
|
2018-06-19 02:13:14 -04:00
|
|
|
DB.exec <<~SQL
|
2018-06-04 10:57:12 -04:00
|
|
|
INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at)
|
|
|
|
SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at
|
|
|
|
FROM post_custom_fields pcf
|
|
|
|
JOIN posts p ON p.id = pcf.post_id
|
|
|
|
WHERE pcf.name = 'is_accepted_answer'
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
|
|
|
def query(sql)
|
|
|
|
@client.execute(sql)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
ImportScripts::StackOverflow.new.perform
|