Importer for nabble
This commit is contained in:
parent
3677c56267
commit
ab50d039bb
|
@ -24,8 +24,9 @@ module Email
|
||||||
|
|
||||||
attr_reader :body, :email_log
|
attr_reader :body, :email_log
|
||||||
|
|
||||||
def initialize(raw)
|
def initialize(raw, opts=nil)
|
||||||
@raw = raw
|
@raw = raw
|
||||||
|
@opts = opts || {}
|
||||||
end
|
end
|
||||||
|
|
||||||
def process
|
def process
|
||||||
|
@ -135,6 +136,8 @@ module Email
|
||||||
body = fix_charset message
|
body = fix_charset message
|
||||||
end
|
end
|
||||||
|
|
||||||
|
return body if @opts[:skip_sanity_check]
|
||||||
|
|
||||||
# Certain trigger phrases that means we didn't parse correctly
|
# Certain trigger phrases that means we didn't parse correctly
|
||||||
if body =~ /Content\-Type\:/ || body =~ /multipart\/alternative/ || body =~ /text\/plain/
|
if body =~ /Content\-Type\:/ || body =~ /multipart\/alternative/ || body =~ /text\/plain/
|
||||||
raise EmptyEmailError
|
raise EmptyEmailError
|
||||||
|
|
|
@ -0,0 +1,146 @@
|
||||||
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||||||
|
require 'pg'
|
||||||
|
|
||||||
|
class ImportScripts::MyAskBot < ImportScripts::Base
|
||||||
|
# CHANGE THESE BEFORE RUNNING THE IMPORTER
|
||||||
|
|
||||||
|
BATCH_SIZE = 1000
|
||||||
|
|
||||||
|
DB_NAME = "nabble"
|
||||||
|
CATEGORY_ID = 6
|
||||||
|
|
||||||
|
def initialize
|
||||||
|
super
|
||||||
|
|
||||||
|
@tagmap = []
|
||||||
|
@td = PG::TextDecoder::TimestampWithTimeZone.new
|
||||||
|
@client = PG.connect(dbname: DB_NAME)
|
||||||
|
end
|
||||||
|
|
||||||
|
def execute
|
||||||
|
import_users
|
||||||
|
create_forum_topics
|
||||||
|
import_replies
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_users
|
||||||
|
puts "", "importing users"
|
||||||
|
|
||||||
|
total_count = @client.exec("SELECT COUNT(user_id) FROM user_")[0]["count"]
|
||||||
|
|
||||||
|
batches(BATCH_SIZE) do |offset|
|
||||||
|
users = @client.query(<<-SQL
|
||||||
|
SELECT user_id, name, email, joined
|
||||||
|
FROM user_
|
||||||
|
ORDER BY joined
|
||||||
|
LIMIT #{BATCH_SIZE}
|
||||||
|
OFFSET #{offset}
|
||||||
|
SQL
|
||||||
|
)
|
||||||
|
|
||||||
|
break if users.ntuples() < 1
|
||||||
|
|
||||||
|
create_users(users, total: total_count, offset: offset) do |user|
|
||||||
|
{
|
||||||
|
id: user["user_id"],
|
||||||
|
email: user["email"] || (SecureRandom.hex << "@domain.com"),
|
||||||
|
created_at: Time.zone.at(@td.decode(user["joined"])),
|
||||||
|
name: user["name"]
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_email(msg)
|
||||||
|
receiver = Email::Receiver.new(msg, skip_sanity_check: true)
|
||||||
|
mail = Mail.read_from_string(msg)
|
||||||
|
receiver.parse_body(mail)
|
||||||
|
end
|
||||||
|
|
||||||
|
def create_forum_topics
|
||||||
|
puts "", "creating forum topics"
|
||||||
|
|
||||||
|
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]['node_id']
|
||||||
|
topic_count = @client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id = #{app_node_id}")[0]["count"]
|
||||||
|
|
||||||
|
batches(BATCH_SIZE) do |offset|
|
||||||
|
|
||||||
|
topics = @client.exec <<-SQL
|
||||||
|
SELECT n.node_id, n.subject, n.owner_id, n.when_created, nm.message, n.msg_fmt
|
||||||
|
FROM node AS n
|
||||||
|
INNER JOIN node_msg AS nm ON nm.node_id = n.node_id
|
||||||
|
WHERE n.parent_id = #{app_node_id}
|
||||||
|
ORDER BY n.when_created
|
||||||
|
LIMIT #{BATCH_SIZE}
|
||||||
|
OFFSET #{offset}
|
||||||
|
SQL
|
||||||
|
|
||||||
|
break if topics.ntuples() < 1
|
||||||
|
|
||||||
|
create_posts(topics, total: topic_count, offset: offset) do |t|
|
||||||
|
raw = body_from(t)
|
||||||
|
next unless raw
|
||||||
|
|
||||||
|
{ id: t['node_id'],
|
||||||
|
title: t['subject'],
|
||||||
|
user_id: user_id_from_imported_user_id(t["owner_id"]) || Discourse::SYSTEM_USER_ID,
|
||||||
|
created_at: Time.zone.at(@td.decode(t["when_created"])),
|
||||||
|
category: CATEGORY_ID,
|
||||||
|
raw: raw }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def body_from(p)
|
||||||
|
%w(m s).include?(p['msg_fmt']) ? parse_email(p['message']) : p['message']
|
||||||
|
rescue Email::Receiver::EmptyEmailError
|
||||||
|
puts "Skipped #{p['node_id']}"
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_replies
|
||||||
|
puts "", "creating topic replies"
|
||||||
|
|
||||||
|
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]['node_id']
|
||||||
|
post_count = @client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id != #{app_node_id}")[0]["count"]
|
||||||
|
|
||||||
|
topic_ids = {}
|
||||||
|
|
||||||
|
batches(BATCH_SIZE) do |offset|
|
||||||
|
posts = @client.exec <<-SQL
|
||||||
|
SELECT n.node_id, n.parent_id, n.subject, n.owner_id, n.when_created, nm.message, n.msg_fmt
|
||||||
|
FROM node AS n
|
||||||
|
INNER JOIN node_msg AS nm ON nm.node_id = n.node_id
|
||||||
|
WHERE n.parent_id != #{app_node_id}
|
||||||
|
ORDER BY n.when_created
|
||||||
|
LIMIT #{BATCH_SIZE}
|
||||||
|
OFFSET #{offset}
|
||||||
|
SQL
|
||||||
|
|
||||||
|
break if posts.ntuples() < 1
|
||||||
|
|
||||||
|
create_posts(posts, total: post_count, offset: offset) do |p|
|
||||||
|
parent_id = p['parent_id']
|
||||||
|
id = p['node_id']
|
||||||
|
|
||||||
|
topic_id = topic_ids[parent_id]
|
||||||
|
unless topic_id
|
||||||
|
topic = topic_lookup_from_imported_post_id(parent_id)
|
||||||
|
topic_id = topic[:topic_id] if topic
|
||||||
|
end
|
||||||
|
next unless topic_id
|
||||||
|
|
||||||
|
topic_ids[id] = topic_id
|
||||||
|
|
||||||
|
raw = body_from(p)
|
||||||
|
next unless raw
|
||||||
|
{ id: id,
|
||||||
|
topic_id: topic_id,
|
||||||
|
user_id: user_id_from_imported_user_id(p['owner_id']) || Discourse::SYSTEM_USER_ID,
|
||||||
|
created_at: Time.zone.at(@td.decode(p["when_created"])),
|
||||||
|
raw: raw }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
ImportScripts::MyAskBot.new.perform
|
Loading…
Reference in New Issue