2019-05-02 18:17:27 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2015-05-22 15:40:26 -04:00
|
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
require "pg"
|
2016-01-03 15:26:12 -05:00
|
|
|
require_relative "base/uploader"
|
|
|
|
|
|
|
|
=begin
|
|
|
|
if you want to create mock users for posts made by anonymous participants,
|
|
|
|
run the following SQL prior to importing.
|
|
|
|
|
|
|
|
-- first attribute any anonymous posts to existing users (if any)
|
|
|
|
|
|
|
|
UPDATE node
|
|
|
|
SET owner_id = p.user_id, anonymous_name = NULL
|
|
|
|
FROM ( SELECT lower(name) AS name, user_id FROM user_ ) p
|
|
|
|
WHERE p.name = lower(node.anonymous_name)
|
|
|
|
AND owner_id IS NULL;
|
|
|
|
|
|
|
|
-- then create mock users
|
|
|
|
|
|
|
|
INSERT INTO user_ (email, name, joined, registered)
|
|
|
|
SELECT lower(anonymous_name) || '@dummy.com', MIN(anonymous_name), MIN(when_created), MIN(when_created)
|
|
|
|
FROM node
|
|
|
|
WHERE anonymous_name IS NOT NULL
|
|
|
|
GROUP BY lower(anonymous_name);
|
|
|
|
|
|
|
|
-- then move these posts to the new users
|
|
|
|
-- (yes, this is the same query as the first one indeed)
|
|
|
|
|
|
|
|
UPDATE node
|
|
|
|
SET owner_id = p.user_id, anonymous_name = NULL
|
|
|
|
FROM ( SELECT lower(name) AS name, user_id FROM user_ ) p
|
|
|
|
WHERE p.name = lower(node.anonymous_name)
|
|
|
|
AND owner_id IS NULL;
|
|
|
|
|
|
|
|
=end
|
|
|
|
|
2015-07-23 18:37:40 -04:00
|
|
|
class ImportScripts::Nabble < ImportScripts::Base
|
2015-05-22 15:40:26 -04:00
|
|
|
# CHANGE THESE BEFORE RUNNING THE IMPORTER
|
|
|
|
|
|
|
|
BATCH_SIZE = 1000
|
|
|
|
|
|
|
|
DB_NAME = "nabble"
|
|
|
|
CATEGORY_ID = 6
|
|
|
|
|
|
|
|
def initialize
|
|
|
|
super
|
|
|
|
|
|
|
|
@tagmap = []
|
|
|
|
@td = PG::TextDecoder::TimestampWithTimeZone.new
|
|
|
|
@client = PG.connect(dbname: DB_NAME)
|
2016-01-03 15:26:12 -05:00
|
|
|
@uploader = ImportScripts::Uploader.new
|
2015-05-22 15:40:26 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def execute
|
|
|
|
import_users
|
|
|
|
create_forum_topics
|
|
|
|
import_replies
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_users
|
|
|
|
puts "", "importing users"
|
|
|
|
|
|
|
|
total_count = @client.exec("SELECT COUNT(user_id) FROM user_")[0]["count"]
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
users = @client.query(<<-SQL)
|
|
|
|
SELECT user_id, name, email, joined
|
|
|
|
FROM user_
|
|
|
|
ORDER BY joined
|
|
|
|
LIMIT #{BATCH_SIZE}
|
|
|
|
OFFSET #{offset}
|
|
|
|
SQL
|
|
|
|
|
|
|
|
break if users.ntuples() < 1
|
|
|
|
|
2015-09-21 19:48:42 -04:00
|
|
|
next if all_records_exist? :users, users.map { |u| u["user_id"].to_i }
|
|
|
|
|
2016-01-03 15:26:12 -05:00
|
|
|
create_users(users, total: total_count, offset: offset) do |row|
|
2015-05-22 15:40:26 -04:00
|
|
|
{
|
2018-12-04 04:48:16 -05:00
|
|
|
id: row["user_id"],
|
2019-05-30 16:02:10 -04:00
|
|
|
email: row["email"] || fake_email,
|
2018-12-04 04:48:16 -05:00
|
|
|
created_at: Time.zone.at(@td.decode(row["joined"])),
|
|
|
|
name: row["name"],
|
2016-01-03 15:26:12 -05:00
|
|
|
post_create_action: proc { |user| import_avatar(user, row["user_id"]) },
|
2015-05-22 15:40:26 -04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2016-01-03 15:26:12 -05:00
|
|
|
def import_avatar(user, org_id)
|
|
|
|
filename = "avatar" + org_id.to_s
|
|
|
|
path = File.join("/tmp/nab", filename)
|
|
|
|
res =
|
|
|
|
@client.exec(
|
|
|
|
"SELECT content FROM file_avatar WHERE name='avatar100.png' AND user_id = #{org_id} LIMIT 1",
|
|
|
|
)
|
|
|
|
return if res.ntuples() < 1
|
|
|
|
|
|
|
|
binary = res[0]["content"]
|
|
|
|
File.open(path, "wb") { |f| f.write(PG::Connection.unescape_bytea(binary)) }
|
|
|
|
|
|
|
|
upload = @uploader.create_upload(user.id, path, filename)
|
|
|
|
|
|
|
|
if upload.persisted?
|
|
|
|
user.import_mode = false
|
|
|
|
user.create_user_avatar
|
|
|
|
user.import_mode = true
|
|
|
|
user.user_avatar.update(custom_upload_id: upload.id)
|
|
|
|
user.update(uploaded_avatar_id: upload.id)
|
|
|
|
else
|
|
|
|
Rails.logger.error("Could not persist avatar for user #{user.username}")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-05-22 15:40:26 -04:00
|
|
|
def parse_email(msg)
|
2016-08-22 08:04:01 -04:00
|
|
|
receiver = Email::Receiver.new(msg)
|
2015-05-22 15:40:26 -04:00
|
|
|
mail = Mail.read_from_string(msg)
|
2015-06-05 11:46:21 -04:00
|
|
|
mail.body
|
|
|
|
|
2016-08-22 08:04:01 -04:00
|
|
|
body, elided = receiver.select_body
|
|
|
|
body.force_encoding(body.encoding).encode("UTF-8")
|
2015-05-22 15:40:26 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def create_forum_topics
|
|
|
|
puts "", "creating forum topics"
|
|
|
|
|
|
|
|
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]["node_id"]
|
|
|
|
topic_count =
|
|
|
|
@client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id = #{app_node_id}")[0][
|
|
|
|
"count"
|
|
|
|
]
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
topics = @client.exec <<-SQL
|
|
|
|
SELECT n.node_id, n.subject, n.owner_id, n.when_created, nm.message, n.msg_fmt
|
|
|
|
FROM node AS n
|
|
|
|
INNER JOIN node_msg AS nm ON nm.node_id = n.node_id
|
|
|
|
WHERE n.parent_id = #{app_node_id}
|
|
|
|
ORDER BY n.when_created
|
|
|
|
LIMIT #{BATCH_SIZE}
|
|
|
|
OFFSET #{offset}
|
|
|
|
SQL
|
|
|
|
|
|
|
|
break if topics.ntuples() < 1
|
|
|
|
|
2015-09-21 19:48:42 -04:00
|
|
|
next if all_records_exist? :posts, topics.map { |t| t["node_id"].to_i }
|
|
|
|
|
2015-05-22 15:40:26 -04:00
|
|
|
create_posts(topics, total: topic_count, offset: offset) do |t|
|
|
|
|
raw = body_from(t)
|
|
|
|
next unless raw
|
2016-01-03 15:26:12 -05:00
|
|
|
raw = process_content(raw)
|
|
|
|
raw = process_attachments(raw, t["node_id"])
|
2015-05-22 15:40:26 -04:00
|
|
|
|
2018-12-04 04:48:16 -05:00
|
|
|
{
|
|
|
|
id: t["node_id"],
|
2015-05-22 15:40:26 -04:00
|
|
|
title: t["subject"],
|
|
|
|
user_id: user_id_from_imported_user_id(t["owner_id"]) || Discourse::SYSTEM_USER_ID,
|
|
|
|
created_at: Time.zone.at(@td.decode(t["when_created"])),
|
|
|
|
category: CATEGORY_ID,
|
2015-06-05 11:46:21 -04:00
|
|
|
raw: raw,
|
2018-12-04 04:48:16 -05:00
|
|
|
cook_method: Post.cook_methods[:regular],
|
|
|
|
}
|
2015-05-22 15:40:26 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def body_from(p)
|
|
|
|
%w[m s].include?(p["msg_fmt"]) ? parse_email(p["message"]) : p["message"]
|
|
|
|
rescue Email::Receiver::EmptyEmailError
|
|
|
|
puts "Skipped #{p["node_id"]}"
|
|
|
|
end
|
|
|
|
|
2016-01-03 15:26:12 -05:00
|
|
|
def process_content(txt)
|
|
|
|
txt.gsub! /\<quote author="(.*?)"\>/, '[quote="\1"]'
|
|
|
|
txt.gsub! %r{\</quote\>}, "[/quote]"
|
|
|
|
txt.gsub!(%r{\<raw\>(.*?)\</raw\>}m) do |match|
|
2018-12-03 22:48:13 -05:00
|
|
|
c = Regexp.last_match[1].indent(4)
|
2016-01-03 15:26:12 -05:00
|
|
|
"\n#{c}\n"
|
|
|
|
end
|
|
|
|
|
|
|
|
# lines starting with # are comments, not headings, insert a space to prevent markdown
|
|
|
|
txt.gsub! /\n#/m, " #"
|
|
|
|
|
|
|
|
# in the languagetool forum, quite a lot of XML was not marked as raw
|
|
|
|
# so we treat <rule...>...</rule> and <category...>...</category> as raw
|
|
|
|
|
|
|
|
# uncomment below if you want to use this
|
|
|
|
|
|
|
|
#txt.gsub!(/<rule(.*?)>(.*?<\/rule>)/m) do |match|
|
|
|
|
# c = Regexp.last_match[2].indent(4);
|
|
|
|
# "\n <rule#{Regexp.last_match[1]}>#{c}\n"
|
|
|
|
#end
|
|
|
|
#txt.gsub!(/<category(.*?)>(.*?<\/category>)/m) do |match|
|
|
|
|
# c = Regexp.last_match[2].indent(4);
|
|
|
|
# "\n <rule#{Regexp.last_match[1]}>#{c}\n"
|
|
|
|
#end
|
|
|
|
txt
|
|
|
|
end
|
|
|
|
|
|
|
|
def process_attachments(txt, postid)
|
|
|
|
txt.gsub!(/<nabble_img src="(.*?)" (.*?)>/m) do |match|
|
|
|
|
basename = Regexp.last_match[1]
|
2016-08-23 13:57:48 -04:00
|
|
|
get_attachment_upload(basename, postid) { |upload| @uploader.embedded_image_html(upload) }
|
2016-01-03 15:26:12 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
txt.gsub!(%r{<nabble_a href="(.*?)">(.*?)</nabble_a>}m) do |match|
|
|
|
|
basename = Regexp.last_match[1]
|
2016-08-23 13:57:48 -04:00
|
|
|
get_attachment_upload(basename, postid) do |upload|
|
|
|
|
@uploader.attachment_html(upload, basename)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
txt
|
|
|
|
end
|
2016-01-03 15:26:12 -05:00
|
|
|
|
2016-08-23 13:57:48 -04:00
|
|
|
def get_attachment_upload(basename, postid)
|
|
|
|
contents =
|
|
|
|
@client.exec("SELECT content FROM file_node WHERE name='#{basename}' AND node_id = #{postid}")
|
|
|
|
if contents.any?
|
|
|
|
binary = contents[0]["content"]
|
|
|
|
fn = File.join("/tmp/nab", basename)
|
2016-01-03 15:26:12 -05:00
|
|
|
File.open(fn, "wb") { |f| f.write(PG::Connection.unescape_bytea(binary)) }
|
2016-08-23 13:57:48 -04:00
|
|
|
yield @uploader.create_upload(0, fn, basename)
|
2016-01-03 15:26:12 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-05-22 15:40:26 -04:00
|
|
|
def import_replies
|
|
|
|
puts "", "creating topic replies"
|
|
|
|
|
|
|
|
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]["node_id"]
|
|
|
|
post_count =
|
|
|
|
@client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id != #{app_node_id}")[0][
|
|
|
|
"count"
|
|
|
|
]
|
|
|
|
|
|
|
|
topic_ids = {}
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
posts = @client.exec <<-SQL
|
|
|
|
SELECT n.node_id, n.parent_id, n.subject, n.owner_id, n.when_created, nm.message, n.msg_fmt
|
|
|
|
FROM node AS n
|
|
|
|
INNER JOIN node_msg AS nm ON nm.node_id = n.node_id
|
|
|
|
WHERE n.parent_id != #{app_node_id}
|
|
|
|
ORDER BY n.when_created
|
|
|
|
LIMIT #{BATCH_SIZE}
|
|
|
|
OFFSET #{offset}
|
|
|
|
SQL
|
|
|
|
|
|
|
|
break if posts.ntuples() < 1
|
|
|
|
|
2015-09-21 19:48:42 -04:00
|
|
|
next if all_records_exist? :posts, posts.map { |p| p["node_id"].to_i }
|
|
|
|
|
2015-05-22 15:40:26 -04:00
|
|
|
create_posts(posts, total: post_count, offset: offset) do |p|
|
|
|
|
parent_id = p["parent_id"]
|
|
|
|
id = p["node_id"]
|
|
|
|
|
|
|
|
topic_id = topic_ids[parent_id]
|
|
|
|
unless topic_id
|
|
|
|
topic = topic_lookup_from_imported_post_id(parent_id)
|
|
|
|
topic_id = topic[:topic_id] if topic
|
|
|
|
end
|
|
|
|
next unless topic_id
|
|
|
|
|
|
|
|
topic_ids[id] = topic_id
|
|
|
|
|
|
|
|
raw = body_from(p)
|
|
|
|
next unless raw
|
2016-01-03 15:26:12 -05:00
|
|
|
raw = process_content(raw)
|
|
|
|
raw = process_attachments(raw, id)
|
2015-05-22 15:40:26 -04:00
|
|
|
{
|
|
|
|
id: id,
|
|
|
|
topic_id: topic_id,
|
|
|
|
user_id: user_id_from_imported_user_id(p["owner_id"]) || Discourse::SYSTEM_USER_ID,
|
|
|
|
created_at: Time.zone.at(@td.decode(p["when_created"])),
|
2015-06-05 11:46:21 -04:00
|
|
|
raw: raw,
|
2016-01-03 15:26:12 -05:00
|
|
|
cook_method: Post.cook_methods[:regular],
|
|
|
|
}
|
2015-05-22 15:40:26 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2016-01-03 15:26:12 -05:00
|
|
|
class String
|
|
|
|
def indent(count, char = " ")
|
|
|
|
gsub(/([^\n]*)(\n|$)/) do |match|
|
|
|
|
last_iteration = ($1 == "" && $2 == "")
|
2019-05-30 16:20:57 -04:00
|
|
|
line = +""
|
2016-01-03 15:26:12 -05:00
|
|
|
line << (char * count) unless last_iteration
|
|
|
|
line << $1
|
|
|
|
line << $2
|
|
|
|
line
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2015-07-23 18:37:40 -04:00
|
|
|
ImportScripts::Nabble.new.perform
|