improvements to importer

- improve perf of test for existing posts
- always use a system guardian when importing posts
- for lithuim importer requery raw (transform is not repeatable)
This commit is contained in:
Sam 2015-10-15 13:25:10 +11:00
parent 06f616792d
commit 606aeb9d55
3 changed files with 37 additions and 15 deletions

View File

@ -57,6 +57,7 @@ class PostCreator
opts[:title] = pg_clean_up(opts[:title]) if opts[:title] && opts[:title].include?("\u0000")
opts[:raw] = pg_clean_up(opts[:raw]) if opts[:raw] && opts[:raw].include?("\u0000")
opts.delete(:reply_to_post_number) unless opts[:topic_id]
@guardian = opts[:guardian] if opts[:guardian]
@spam = false
end

View File

@ -197,13 +197,20 @@ class ImportScripts::Base
def all_records_exist?(type, import_ids)
return false if import_ids.empty?
Post.exec_sql('create temp table import_ids(val varchar(200) primary key)')
import_id_clause = import_ids.map{|id| "('#{PG::Connection.escape_string(id)}')"}.join(",")
Post.exec_sql("insert into import_ids values #{import_id_clause}")
existing = "#{type.to_s.classify}CustomField".constantize.where(name: 'import_id')
existing = existing.where('value in (?)', import_ids.map(&:to_s))
existing = existing.joins('JOIN import_ids ON val=value')
if existing.count == import_ids.length
# puts "Skipping #{import_ids.length} already imported #{type}"
true
puts "Skipping #{import_ids.length} already imported #{type}"
return true
end
ensure
Post.exec_sql('drop table import_ids')
end
# Iterate through a list of user records to be imported.
@ -444,6 +451,8 @@ class ImportScripts::Base
[created, skipped]
end
STAFF_GUARDIAN = Guardian.new(User.find(-1))
def create_post(opts, import_id)
user = User.find(opts[:user_id])
post_create_action = opts.delete(:post_create_action)
@ -452,6 +461,7 @@ class ImportScripts::Base
opts[:custom_fields] ||= {}
opts[:custom_fields]['import_id'] = import_id
opts[:guardian] = STAFF_GUARDIAN
if @bbcode_to_md
opts[:raw] = opts[:raw].bbcode_to_md(false) rescue opts[:raw]
end

View File

@ -56,14 +56,14 @@ class ImportScripts::Lithium < ImportScripts::Base
SiteSetting.allow_html_tables = true
import_categories
import_users
import_topics
import_posts
import_likes
import_accepted_answers
import_pms
close_topics
create_permalinks
# import_users
# import_topics
# import_posts
# import_likes
# import_accepted_answers
# import_pms
# close_topics
# create_permalinks
post_process_posts
end
@ -307,11 +307,12 @@ class ImportScripts::Lithium < ImportScripts::Base
end
def import_posts
puts "", "importing posts..."
post_count = mysql_query("SELECT COUNT(*) count FROM message2
WHERE id <> root_id").first["count"]
puts "", "importing posts... (#{post_count})"
batches(BATCH_SIZE) do |offset|
posts = mysql_query <<-SQL
SELECT id, body, deleted, user_id,
@ -629,7 +630,6 @@ class ImportScripts::Lithium < ImportScripts::Base
import_mode: true
}
unless topic_id
msg[:title] = @htmlentities.decode(topic["subject"]).strip[0...255]
msg[:archetype] = Archetype.private_message
@ -739,15 +739,26 @@ SQL
def post_process_posts
puts "", "Postprocessing posts..."
current = 0
max = Post.count
mysql_query("create index idxUniqueId on message2(unique_id)") rescue nil
Post.all.find_each do |post|
begin
new_raw = postprocess_post_raw(post.raw, post.user_id)
id = post.custom_fields["import_unique_id"]
next unless id
raw = mysql_query("select body from message2 where unique_id = '#{id}'").first['body']
unless raw
puts "Missing raw for post: #{post.id}"
next
end
new_raw = postprocess_post_raw(raw, post.user_id)
post.raw = new_raw
post.save
rescue PrettyText::JavaScriptError
puts "GOT A JS error on post: #{post.id}"
nil
ensure
print_status(current += 1, max)
@ -825,7 +836,7 @@ SQL
end
def mysql_query(sql)
@client.query(sql, cache_rows: false)
@client.query(sql, cache_rows: true)
end
end