2013-12-12 15:35:55 -05:00
|
|
|
require 'nokogiri'
|
|
|
|
|
|
|
|
class DisqusSAX < Nokogiri::XML::SAX::Document
|
|
|
|
attr_accessor :posts, :threads
|
|
|
|
|
2013-12-30 11:32:21 -05:00
|
|
|
def initialize(options=nil)
|
2013-12-12 15:35:55 -05:00
|
|
|
@inside = {}
|
|
|
|
@posts = {}
|
|
|
|
@threads = {}
|
2013-12-30 11:32:21 -05:00
|
|
|
@options = options || {}
|
2013-12-12 15:35:55 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def start_element(name, attrs = [])
|
|
|
|
|
|
|
|
case name
|
|
|
|
when 'post'
|
|
|
|
@post = {}
|
|
|
|
@post[:id] = Hash[attrs]['dsq:id'] if @post
|
|
|
|
when 'thread'
|
|
|
|
id = Hash[attrs]['dsq:id']
|
|
|
|
if @post
|
|
|
|
thread = @threads[id]
|
|
|
|
thread[:posts] << @post
|
|
|
|
else
|
|
|
|
@thread = {id: id, posts: []}
|
|
|
|
end
|
|
|
|
when 'parent'
|
|
|
|
if @post
|
|
|
|
id = Hash[attrs]['dsq:id']
|
|
|
|
@post[:parent_id] = id
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
@inside[name] = true
|
|
|
|
end
|
|
|
|
|
|
|
|
def end_element(name)
|
|
|
|
case name
|
|
|
|
when 'post'
|
|
|
|
@posts[@post[:id]] = @post
|
|
|
|
@post = nil
|
|
|
|
when 'thread'
|
|
|
|
if @post.nil?
|
|
|
|
@threads[@thread[:id]] = @thread
|
|
|
|
@thread = nil
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
@inside[name] = false
|
|
|
|
end
|
|
|
|
|
|
|
|
def characters(str)
|
|
|
|
record(@post, :author_email, str, 'author', 'email')
|
|
|
|
record(@post, :author_name, str, 'author', 'name')
|
|
|
|
record(@post, :author_anonymous, str, 'author', 'isAnonymous')
|
|
|
|
record(@post, :created_at, str, 'createdAt')
|
|
|
|
|
|
|
|
record(@thread, :link, str, 'link')
|
|
|
|
record(@thread, :title, str, 'title')
|
2013-12-12 16:31:08 -05:00
|
|
|
record(@thread, :created_at, str, 'createdAt')
|
2013-12-12 15:35:55 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def cdata_block(str)
|
|
|
|
record(@post, :cooked, str, 'message')
|
|
|
|
end
|
|
|
|
|
|
|
|
def record(target, sym, str, *params)
|
|
|
|
return if target.nil?
|
2013-12-30 11:32:21 -05:00
|
|
|
|
|
|
|
if inside?(*params)
|
|
|
|
target[sym] ||= ""
|
|
|
|
target[sym] << str
|
|
|
|
end
|
|
|
|
|
2013-12-12 15:35:55 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def inside?(*params)
|
|
|
|
return !params.find{|p| !@inside[p]}
|
|
|
|
end
|
|
|
|
|
|
|
|
def normalize
|
|
|
|
|
|
|
|
@threads.each do |id, t|
|
|
|
|
if t[:posts].size == 0
|
2013-12-30 11:32:21 -05:00
|
|
|
# Remove any threads that have no posts
|
2013-12-12 15:35:55 -05:00
|
|
|
@threads.delete(id)
|
2013-12-30 11:32:21 -05:00
|
|
|
else
|
|
|
|
# Normalize titles
|
|
|
|
t[:title].gsub!(@options[:strip], '') if @options[:strip].present?
|
|
|
|
t[:title].strip!
|
2013-12-12 15:35:55 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Merge any threads that have the same title
|
|
|
|
existing_title = {}
|
|
|
|
@threads.each do |id, t|
|
|
|
|
existing = existing_title[t[:title]]
|
|
|
|
if existing.nil?
|
|
|
|
existing_title[t[:title]] = t
|
|
|
|
else
|
|
|
|
existing[:posts] << t[:posts]
|
|
|
|
existing[:posts].flatten!
|
|
|
|
@threads.delete(t[:id])
|
|
|
|
end
|
|
|
|
end
|
2013-12-30 11:32:21 -05:00
|
|
|
|
|
|
|
|
2013-12-12 15:35:55 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
class Disqus < Thor
|
|
|
|
desc "import", "Imports posts from a Disqus XML export"
|
|
|
|
method_option :file, aliases: '-f', required: true, desc: "The disqus XML file to import"
|
2013-12-30 11:32:21 -05:00
|
|
|
method_option :dry_run, required: false, desc: "Just output what will be imported rather than doing it"
|
2013-12-12 15:35:55 -05:00
|
|
|
method_option :post_as, aliases: '-p', required: true, desc: "The Discourse username to post as"
|
2013-12-30 11:32:21 -05:00
|
|
|
method_option :strip, aliases: '-s', required: false, desc: "Text to strip from titles"
|
|
|
|
|
2013-12-12 15:35:55 -05:00
|
|
|
def import
|
|
|
|
require './config/environment'
|
|
|
|
|
|
|
|
email_blacklist = SiteSetting.email_domains_blacklist
|
|
|
|
|
|
|
|
user = User.where(username_lower: options[:post_as].downcase).first
|
|
|
|
if user.nil?
|
|
|
|
puts "No user found named: '#{options[:post_as]}'"
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
|
|
|
unless File.exist?(options[:file])
|
|
|
|
puts "File '#{options[:file]}' not found"
|
|
|
|
exit 1
|
|
|
|
end
|
|
|
|
|
2013-12-30 11:32:21 -05:00
|
|
|
parser = DisqusSAX.new(options)
|
2013-12-12 15:35:55 -05:00
|
|
|
doc = Nokogiri::XML::SAX::Parser.new(parser)
|
|
|
|
doc.parse_file(options[:file])
|
|
|
|
parser.normalize
|
|
|
|
|
|
|
|
RateLimiter.disable
|
|
|
|
|
|
|
|
SiteSetting.email_domains_blacklist = ""
|
|
|
|
|
|
|
|
parser.threads.each do |id, t|
|
|
|
|
puts "Creating #{t[:title]}... (#{t[:posts].size} posts)"
|
|
|
|
|
2013-12-30 11:32:21 -05:00
|
|
|
if options[:dry_run].blank?
|
|
|
|
|
2013-12-31 14:37:43 -05:00
|
|
|
post = TopicEmbed.import_remote(user, t[:link], title: t[:title])
|
2013-12-30 11:32:21 -05:00
|
|
|
if post.present?
|
|
|
|
t[:posts].each do |p|
|
|
|
|
post_user = user
|
|
|
|
if p[:author_email]
|
|
|
|
email = Email.downcase(p[:author_email])
|
|
|
|
post_user = User.where(email: email).first
|
|
|
|
if post_user.blank?
|
|
|
|
post_user = User.create!(email: email, username: UserNameSuggester.suggest(email))
|
|
|
|
end
|
2013-12-12 15:35:55 -05:00
|
|
|
end
|
|
|
|
|
2013-12-30 11:32:21 -05:00
|
|
|
attrs = {
|
|
|
|
topic_id: post.topic_id,
|
|
|
|
raw: p[:cooked],
|
|
|
|
cooked: p[:cooked],
|
|
|
|
created_at: Date.parse(p[:created_at])
|
|
|
|
}
|
|
|
|
|
|
|
|
if p[:parent_id]
|
|
|
|
parent = parser.posts[p[:parent_id]]
|
|
|
|
if parent && parent[:discourse_number]
|
|
|
|
attrs[:reply_to_post_number] = parent[:discourse_number]
|
|
|
|
end
|
2013-12-12 15:35:55 -05:00
|
|
|
end
|
|
|
|
|
2013-12-30 11:32:21 -05:00
|
|
|
post = PostCreator.new(post_user, attrs).create
|
|
|
|
p[:discourse_number] = post.post_number
|
|
|
|
end
|
|
|
|
TopicFeaturedUsers.new(post.topic).choose
|
2013-12-12 15:35:55 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
ensure
|
|
|
|
RateLimiter.enable
|
|
|
|
SiteSetting.email_domains_blacklist = email_blacklist
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
|