Update Disqus importer.

This commit is contained in:
Guo Xiang Tan 2015-07-01 09:16:52 +08:00
parent 0c403272e2
commit efff3351bf
1 changed files with 91 additions and 81 deletions

View File

@ -1,17 +1,84 @@
require 'nokogiri' require 'nokogiri'
require 'optparse'
require File.expand_path(File.dirname(__FILE__) + "/base")
class ImportScripts::Disqus < ImportScripts::Base
def initialize(options)
verify_file(options[:file])
@post_as_user = get_post_as_user(options[:post_as])
@dry_run = options[:dry_run]
@parser = DisqusSAX.new(options[:strip])
doc = Nokogiri::XML::SAX::Parser.new(@parser)
doc.parse_file(options[:file])
@parser.normalize
super()
end
def execute
@parser.threads.each do |id, t|
puts "Creating #{t[:title]}... (#{t[:posts].size} posts)"
if !@dry_run
post = TopicEmbed.import_remote(@post_as_user, t[:link], title: t[:title])
if post.present?
t[:posts].each do |p|
post_user = @post_as_user
if p[:author_email]
post_user = create_user({ id: nil, email: p[:author_email] }, nil)
end
attrs = {
user_id: post_user.id,
topic_id: post.topic_id,
raw: p[:cooked],
cooked: p[:cooked],
created_at: Date.parse(p[:created_at])
}
if p[:parent_id]
parent = @parser.posts[p[:parent_id]]
if parent && parent[:discourse_number]
attrs[:reply_to_post_number] = parent[:discourse_number]
end
end
post = create_post(attrs, p[:id])
p[:discourse_number] = post.post_number
end
TopicFeaturedUsers.new(post.topic).choose
end
end
end
end
private
def verify_file(file)
abort("File '#{file}' not found") if !File.exist?(file)
end
def get_post_as_user(username)
user = User.find_by_username_lower(username.downcase)
abort("No user found named: '#{username}'") if user.nil?
user
end
end
class DisqusSAX < Nokogiri::XML::SAX::Document class DisqusSAX < Nokogiri::XML::SAX::Document
attr_accessor :posts, :threads attr_accessor :posts, :threads
def initialize(options=nil) def initialize(strip)
@inside = {} @inside = {}
@posts = {} @posts = {}
@threads = {} @threads = {}
@options = options || {} @strip = strip
end end
def start_element(name, attrs = []) def start_element(name, attrs = [])
case name case name
when 'post' when 'post'
@post = {} @post = {}
@ -71,7 +138,6 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
target[sym] ||= "" target[sym] ||= ""
target[sym] << str target[sym] << str
end end
end end
def inside?(*params) def inside?(*params)
@ -79,14 +145,13 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
end end
def normalize def normalize
@threads.each do |id, t| @threads.each do |id, t|
if t[:posts].size == 0 if t[:posts].size == 0
# Remove any threads that have no posts # Remove any threads that have no posts
@threads.delete(id) @threads.delete(id)
else else
# Normalize titles # Normalize titles
t[:title].gsub!(@options[:strip], '') if @options[:strip].present? t[:title].gsub!(@strip, '') if @strip.present?
t[:title].strip! t[:title].strip!
end end
end end
@ -103,86 +168,31 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
@threads.delete(t[:id]) @threads.delete(t[:id])
end end
end end
end end
end end
class Disqus < Thor options = {
desc "import", "Imports posts from a Disqus XML export" dry_run: false
method_option :file, aliases: '-f', required: true, desc: "The disqus XML file to import"
method_option :dry_run, required: false, desc: "Just output what will be imported rather than doing it"
method_option :post_as, aliases: '-p', required: true, desc: "The Discourse username to post as"
method_option :strip, aliases: '-s', required: false, desc: "Text to strip from titles"
def import
require './config/environment'
email_blacklist = SiteSetting.email_domains_blacklist
user = User.where(username_lower: options[:post_as].downcase).first
if user.nil?
puts "No user found named: '#{options[:post_as]}'"
exit 1
end
unless File.exist?(options[:file])
puts "File '#{options[:file]}' not found"
exit 1
end
parser = DisqusSAX.new(options)
doc = Nokogiri::XML::SAX::Parser.new(parser)
doc.parse_file(options[:file])
parser.normalize
RateLimiter.disable
SiteSetting.email_domains_blacklist = ""
parser.threads.each do |id, t|
puts "Creating #{t[:title]}... (#{t[:posts].size} posts)"
if options[:dry_run].blank?
post = TopicEmbed.import_remote(user, t[:link], title: t[:title])
if post.present?
t[:posts].each do |p|
post_user = user
if p[:author_email]
email = Email.downcase(p[:author_email])
post_user = User.where(email: email).first
if post_user.blank?
post_user = User.create!(email: email, username: UserNameSuggester.suggest(email))
end
end
attrs = {
topic_id: post.topic_id,
raw: p[:cooked],
cooked: p[:cooked],
created_at: Date.parse(p[:created_at])
} }
if p[:parent_id] OptionParser.new do |opts|
parent = parser.posts[p[:parent_id]] opts.banner = 'Usage: RAILS_ENV=production ruby disqus.rb [OPTIONS]'
if parent && parent[:discourse_number]
attrs[:reply_to_post_number] = parent[:discourse_number] opts.on('-f', '--file=FILE_PATH', 'The disqus XML file to import') do |value|
end options[:file] = value
end end
post = PostCreator.new(post_user, attrs).create opts.on('-d', '--dry_run', 'Just output what will be imported rather than doing it') do
p[:discourse_number] = post.post_number options[:dry_run] = true
end
TopicFeaturedUsers.new(post.topic).choose
end
end
end end
ensure opts.on('-p', '--post_as=USERNAME', 'The Discourse username to post as') do |value|
RateLimiter.enable options[:post_as] = value
SiteSetting.email_domains_blacklist = email_blacklist
end
end end
opts.on('-s', '--strip=TEXT', 'Text to strip from titles') do |value|
options[:strip] = value
end
end.parse!
ImportScripts::Disqus.new(options).perform