mirror of
https://github.com/discourse/discourse.git
synced 2025-02-21 03:19:10 +00:00
DEV: Improve q2a import script (#10346)
The changes here are largely pulled from the below gist by RGJ on meta.discourse.org. Thanks much for the changes! https://gist.github.com/discoursehosting/769eff2014d5482f0ab776de03dc3349
This commit is contained in:
parent
da7e6b1f7e
commit
d266baff2b
@ -3,17 +3,7 @@
|
||||
require 'mysql2'
|
||||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||||
require 'htmlentities'
|
||||
begin
|
||||
require 'php_serialize' # https://github.com/jqr/php-serialize
|
||||
rescue LoadError
|
||||
puts
|
||||
puts 'php_serialize not found.'
|
||||
puts 'Add to Gemfile, like this: '
|
||||
puts
|
||||
puts "echo gem \\'php-serialize\\' >> Gemfile"
|
||||
puts "bundle install"
|
||||
exit
|
||||
end
|
||||
require 'php_serialize' # https://github.com/jqr/php-serialize
|
||||
|
||||
class ImportScripts::Question2Answer < ImportScripts::Base
|
||||
BATCH_SIZE = 1000
|
||||
@ -21,14 +11,11 @@ class ImportScripts::Question2Answer < ImportScripts::Base
|
||||
# CHANGE THESE BEFORE RUNNING THE IMPORTER
|
||||
|
||||
DB_HOST ||= ENV['DB_HOST'] || "localhost"
|
||||
DB_NAME ||= ENV['DB_NAME']
|
||||
DB_PW ||= ENV['DB_PW']
|
||||
DB_USER ||= ENV['DB_USER']
|
||||
DB_NAME ||= ENV['DB_NAME'] || "qa_db"
|
||||
DB_PW ||= ENV['DB_PW'] || ""
|
||||
DB_USER ||= ENV['DB_USER'] || "root"
|
||||
TIMEZONE ||= ENV['TIMEZONE'] || "America/Los_Angeles"
|
||||
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "qa_"
|
||||
MAIN_APP_DB_NAME = "primary_db"
|
||||
|
||||
puts "#{DB_USER}:#{DB_PW}@#{DB_HOST} wants #{DB_NAME}"
|
||||
|
||||
def initialize
|
||||
super
|
||||
@ -45,28 +32,6 @@ class ImportScripts::Question2Answer < ImportScripts::Base
|
||||
password: DB_PW,
|
||||
database: DB_NAME
|
||||
)
|
||||
rescue Exception => e
|
||||
puts '=' * 50
|
||||
puts e.message
|
||||
puts <<EOM
|
||||
Cannot connect in to database.
|
||||
|
||||
Hostname: #{DB_HOST}
|
||||
Username: #{DB_USER}
|
||||
Password: #{DB_PW}
|
||||
database: #{DB_NAME}
|
||||
|
||||
Edit the script or set these environment variables:
|
||||
|
||||
export DB_HOST="localhost"
|
||||
export DB_NAME=""
|
||||
export DB_PW='password'
|
||||
export DB_USER="root"
|
||||
export TABLE_PREFIX="qa_"
|
||||
|
||||
Exiting.
|
||||
EOM
|
||||
exit
|
||||
end
|
||||
|
||||
def execute
|
||||
@ -75,6 +40,7 @@ EOM
|
||||
import_topics
|
||||
import_posts
|
||||
import_likes
|
||||
import_bestanswer
|
||||
|
||||
post_process_posts
|
||||
create_permalinks
|
||||
@ -83,23 +49,21 @@ EOM
|
||||
def import_users
|
||||
puts "", "importing users"
|
||||
|
||||
#only import users that have posted or voted on Q2A
|
||||
user_count = mysql_query("SELECT COUNT(id) count FROM #{MAIN_APP_DB_NAME}.users u WHERE EXISTS (SELECT 1 FROM #{TABLE_PREFIX}posts p WHERE p.userid=u.id) or EXISTS (SELECT 1 FROM #{TABLE_PREFIX}uservotes u WHERE u.userid=u.id)").first["count"]
|
||||
# only import users that have posted or voted on Q2A
|
||||
# if you want to import all users, just leave out the WHERE and everything after it (and remove line 95 as well)
|
||||
user_count = mysql_query("SELECT COUNT(userid) count FROM #{TABLE_PREFIX}users u WHERE EXISTS (SELECT 1 FROM #{TABLE_PREFIX}posts p WHERE p.userid=u.userid) or EXISTS (SELECT 1 FROM #{TABLE_PREFIX}uservotes uv WHERE u.userid=uv.userid)").first["count"]
|
||||
last_user_id = -1
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
users = mysql_query(<<-SQL
|
||||
SELECT u.id, u.email, first_name, last_name, u.created_at, last_sign_in_at, u.custom_field_1, u.website website, u.city city, u.state state, i.cdn_slug cdn_slug
|
||||
FROM #{MAIN_APP_DB_NAME}.users u
|
||||
LEFT JOIN #{MAIN_APP_DB_NAME}.images i
|
||||
ON i.id=u.image_id
|
||||
WHERE u.id > #{last_user_id} AND
|
||||
(EXISTS (SELECT 1 FROM #{TABLE_PREFIX}posts p WHERE p.userid=u.id) or EXISTS (SELECT 1 FROM #{TABLE_PREFIX}uservotes u WHERE u.userid=u.id))
|
||||
ORDER BY u.id
|
||||
LIMIT #{BATCH_SIZE}
|
||||
SELECT u.userid AS id, u.email, u.handle AS username, u.created AS created_at, u.loggedin AS last_sign_in_at, u.avatarblobid
|
||||
FROM #{TABLE_PREFIX}users u
|
||||
WHERE u.userid > #{last_user_id}
|
||||
AND (EXISTS (SELECT 1 FROM #{TABLE_PREFIX}posts p WHERE p.userid=u.userid) or EXISTS (SELECT 1 FROM #{TABLE_PREFIX}uservotes uv WHERE u.userid=uv.userid))
|
||||
ORDER BY u.userid
|
||||
LIMIT #{BATCH_SIZE}
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
break if users.empty?
|
||||
|
||||
last_user_id = users[-1]["id"]
|
||||
@ -112,13 +76,10 @@ EOM
|
||||
avatar_url = "https://your_image_bucket/#{user['cdn_slug']}" if user['cdn_slug']
|
||||
{
|
||||
id: user["id"],
|
||||
name: "#{user['first_name']} #{user['last_name']}",
|
||||
username: username,
|
||||
website: user['website'],
|
||||
name: "#{user['username']}",
|
||||
username: "#{user['username']}",
|
||||
password: user['password'],
|
||||
email: email,
|
||||
avatar_url: avatar_url,
|
||||
custom_fields: user["custom_field_1"] ? { user_field_1: user["custom_field_1"] } : {},
|
||||
location: user["city"] && user["state"] ? "#{user['city']}, #{user['state']}" : nil,
|
||||
created_at: user["created_at"],
|
||||
last_seen_at: user["last_sign_in_at"],
|
||||
post_create_action: proc do |u|
|
||||
@ -169,7 +130,7 @@ EOM
|
||||
def import_topics
|
||||
puts "", "importing topics..."
|
||||
|
||||
topic_count = mysql_query("SELECT COUNT(postid) count FROM #{TABLE_PREFIX}posts WHERE type in ('Q', 'Q_HIDDEN')").first["count"]
|
||||
topic_count = mysql_query("SELECT COUNT(postid) count FROM #{TABLE_PREFIX}posts WHERE type = 'Q'").first["count"]
|
||||
|
||||
last_topic_id = -1
|
||||
|
||||
@ -177,9 +138,8 @@ EOM
|
||||
topics = mysql_query(<<-SQL
|
||||
SELECT p.postid, p.type, p.categoryid, p.closedbyid, p.userid postuserid, p.views, p.created, p.title, p.content raw
|
||||
FROM #{TABLE_PREFIX}posts p
|
||||
WHERE p.postid > #{last_topic_id}
|
||||
and p.parentid IS NULL
|
||||
and type IN ('Q', 'Q_HIDDEN')
|
||||
WHERE type = 'Q'
|
||||
AND p.postid > #{last_topic_id}
|
||||
ORDER BY p.postid
|
||||
LIMIT #{BATCH_SIZE}
|
||||
SQL
|
||||
@ -189,6 +149,7 @@ EOM
|
||||
|
||||
last_topic_id = topics[-1]["postid"]
|
||||
topics.reject! { |t| @lookup.post_already_imported?("thread-#{t["postid"]}") }
|
||||
topics.reject! { |t| t["type"] == "Q_HIDDEN" }
|
||||
|
||||
create_posts(topics, total: topic_count, offset: offset) do |topic|
|
||||
begin
|
||||
@ -205,7 +166,7 @@ EOM
|
||||
category: category_id_from_imported_category_id(topic["categoryid"]),
|
||||
raw: raw,
|
||||
created_at: topic["created"],
|
||||
visible: topic["closedbyid"].to_i == 0 && topic["type"] != 'Q_HIDDEN',
|
||||
visible: topic["closedbyid"].to_i == 0,
|
||||
views: topic["views"],
|
||||
}
|
||||
t
|
||||
@ -217,7 +178,7 @@ EOM
|
||||
topic = topic_lookup_from_imported_post_id(topic_id)
|
||||
if topic.present?
|
||||
title_slugified = slugify(thread["title"], false, 50) if thread["title"].present?
|
||||
url_slug = "#{thread["postid"]}/#{title_slugified}" if thread["title"].present?
|
||||
url_slug = "qa/#{thread["postid"]}/#{title_slugified}" if thread["title"].present?
|
||||
Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i) if url_slug.present? && topic[:topic_id].present?
|
||||
end
|
||||
end
|
||||
@ -261,12 +222,14 @@ EOM
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
posts = mysql_query(<<-SQL
|
||||
SELECT p.postid, p.type, p.parentid, p.categoryid, p.closedbyid, p.userid, p.views, p.created, p.title, p.content
|
||||
SELECT p.postid, p.type, p.parentid, p.categoryid, p.closedbyid, p.userid, p.views, p.created, p.title, p.content,
|
||||
parent.type AS parenttype, parent.parentid AS qid
|
||||
FROM #{TABLE_PREFIX}posts p
|
||||
LEFT JOIN qa_posts parent ON parent.postid = p.parentid
|
||||
WHERE p.parentid IS NOT NULL
|
||||
AND p.postid > #{last_post_id}
|
||||
AND type in ('A')
|
||||
AND closedbyid IS NULL
|
||||
AND p.type in ('A','C')
|
||||
AND p.closedbyid IS NULL
|
||||
ORDER BY p.postid
|
||||
LIMIT #{BATCH_SIZE}
|
||||
SQL
|
||||
@ -283,7 +246,23 @@ EOM
|
||||
puts e.message
|
||||
end
|
||||
next if raw.blank?
|
||||
next unless topic = topic_lookup_from_imported_post_id("thread-#{post["parentid"]}")
|
||||
|
||||
# this works as long as comments can not have a comment as parent
|
||||
# it's always Q-A Q-C or A-C
|
||||
|
||||
if post['type'] == 'A' # for answers the question/topic is always the parent
|
||||
topic = topic_lookup_from_imported_post_id("thread-#{post["parentid"]}")
|
||||
next if topic.nil?
|
||||
else
|
||||
if post['parenttype'] == 'Q' # for comments to questions, the question/topic is the parent as well
|
||||
topic = topic_lookup_from_imported_post_id("thread-#{post["parentid"]}")
|
||||
next if topic.nil?
|
||||
else # for comments to answers, the question/topic is the parent of the parent
|
||||
topic = topic_lookup_from_imported_post_id("thread-#{post["qid"]}")
|
||||
next if topic.nil?
|
||||
end
|
||||
end
|
||||
next if topic.nil?
|
||||
|
||||
p = {
|
||||
id: post["postid"],
|
||||
@ -300,6 +279,24 @@ EOM
|
||||
end
|
||||
end
|
||||
|
||||
def import_bestanswer
|
||||
puts "", "importing best answers..."
|
||||
ans = mysql_query("select postid, selchildid from qa_posts where selchildid is not null").to_a
|
||||
ans.each do |answer|
|
||||
begin
|
||||
post = Post.find_by(id: post_id_from_imported_post_id("#{answer['selchildid']}"))
|
||||
post.custom_fields["is_accepted_answer"] = "true"
|
||||
post.save
|
||||
topic = Topic.find(post.topic_id)
|
||||
topic.custom_fields["accepted_answer_post_id"] = post.id
|
||||
topic.save
|
||||
rescue => e
|
||||
puts "error acting on post #{e}"
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
def import_likes
|
||||
puts "", "importing likes..."
|
||||
likes = mysql_query(<<-SQL
|
||||
@ -343,6 +340,11 @@ EOM
|
||||
def preprocess_post_raw(raw)
|
||||
return "" if raw.blank?
|
||||
|
||||
raw.gsub!(/<a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a>/i, '[\2](\1)')
|
||||
raw.gsub!(/<p>(.+?)<\/p>/im) { "#{$1}\n\n" }
|
||||
raw.gsub!('<br />', "\n")
|
||||
raw.gsub!(/<strong>(.*?)<\/strong>/im, '[b]\1[/b]')
|
||||
|
||||
# decode HTML entities
|
||||
raw = @htmlentities.decode(raw)
|
||||
raw = ActionView::Base.full_sanitizer.sanitize raw
|
||||
@ -351,8 +353,6 @@ EOM
|
||||
raw.gsub!(/(\\r)?\\n/, "\n")
|
||||
raw.gsub!("\\t", "\t")
|
||||
|
||||
raw.gsub!('<br />', "\n")
|
||||
|
||||
# [HTML]...[/HTML]
|
||||
raw.gsub!(/\[html\]/i, "\n```html\n")
|
||||
raw.gsub!(/\[\/html\]/i, "\n```\n")
|
||||
@ -537,8 +537,20 @@ EOM
|
||||
end
|
||||
|
||||
def create_permalinks
|
||||
puts '', 'Creating Permalink File...', ''
|
||||
#creates permalinks for q2a category links
|
||||
puts '', 'Creating permalinks...'
|
||||
|
||||
# topics
|
||||
Topic.find_each do |topic|
|
||||
tcf = topic.custom_fields
|
||||
|
||||
if tcf && tcf["import_id"]
|
||||
question_id = tcf["import_id"][/thread-(\d)/, 0]
|
||||
url = "#{question_id}"
|
||||
Permalink.create(url: url, topic_id: topic.id) rescue nil
|
||||
end
|
||||
end
|
||||
|
||||
# categories
|
||||
Category.find_each do |category|
|
||||
ccf = category.custom_fields
|
||||
|
||||
@ -547,6 +559,7 @@ EOM
|
||||
Permalink.create(url: url, category_id: category.id) rescue nil
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
def parse_timestamp(timestamp)
|
||||
|
Loading…
x
Reference in New Issue
Block a user