Improve lithuim importer

- grab unique id for each post
- clean up html quotes and lines with nbsp
This commit is contained in:
Sam 2015-07-15 15:34:16 +10:00
parent b772d96f7a
commit e0eb7f0016
1 changed files with 25 additions and 12 deletions

View File

@ -26,6 +26,9 @@ class ImportScripts::Lithium < ImportScripts::Base
TIMEZONE = "Asia/Kolkata" TIMEZONE = "Asia/Kolkata"
ATTACHMENT_DIR = '/path/to/your/attachment/folder' ATTACHMENT_DIR = '/path/to/your/attachment/folder'
TEMP = ""
def initialize def initialize
super super
@ -45,10 +48,10 @@ class ImportScripts::Lithium < ImportScripts::Base
def execute def execute
# import_groups # import_groups
import_users #import_users
import_categories import_categories
# import_topics import_topics
# import_posts import_posts
# import_attachments # import_attachments
# #
# close_topics # close_topics
@ -222,10 +225,10 @@ class ImportScripts::Lithium < ImportScripts::Base
batches(BATCH_SIZE) do |offset| batches(BATCH_SIZE) do |offset|
topics = mysql_query <<-SQL topics = mysql_query <<-SQL
SELECT id, subject, body, deleted, user_id, SELECT id, subject, body, deleted, user_id,
post_date, views, node_id post_date, views, node_id, unique_id
FROM message2 FROM message2
WHERE id = root_id WHERE id = root_id #{TEMP}'
ORDER BY id ORDER BY node_id, id
LIMIT #{BATCH_SIZE} LIMIT #{BATCH_SIZE}
OFFSET #{offset} OFFSET #{offset}
SQL SQL
@ -236,7 +239,7 @@ class ImportScripts::Lithium < ImportScripts::Base
# @closed_topic_ids << topic_id if topic["open"] == "0" # @closed_topic_ids << topic_id if topic["open"] == "0"
raw = ReverseMarkdown.convert(topic["body"]) raw = to_markdown(topic["body"])
t = { t = {
id: "#{topic["node_id"]} #{topic["id"]}", id: "#{topic["node_id"]} #{topic["id"]}",
@ -246,6 +249,7 @@ class ImportScripts::Lithium < ImportScripts::Base
raw: raw, raw: raw,
created_at: unix_time(topic["post_date"]), created_at: unix_time(topic["post_date"]),
views: topic["views"], views: topic["views"],
custom_fields: {import_unique_id: topic[:unique_id]}
} }
if topic["deleted"] > 0 if topic["deleted"] > 0
@ -266,10 +270,10 @@ class ImportScripts::Lithium < ImportScripts::Base
batches(BATCH_SIZE) do |offset| batches(BATCH_SIZE) do |offset|
posts = mysql_query <<-SQL posts = mysql_query <<-SQL
SELECT id, body, deleted, user_id, SELECT id, body, deleted, user_id,
post_date, parent_id, root_id post_date, parent_id, root_id, node_id, unique_id
FROM message2 FROM message2
WHERE id <> root_id WHERE id <> root_id #{TEMP}'
ORDER BY root_id, id ORDER BY node_id, root_id, id
LIMIT #{BATCH_SIZE} LIMIT #{BATCH_SIZE}
OFFSET #{offset} OFFSET #{offset}
SQL SQL
@ -278,9 +282,9 @@ class ImportScripts::Lithium < ImportScripts::Base
create_posts(posts, total: post_count, offset: offset) do |post| create_posts(posts, total: post_count, offset: offset) do |post|
raw = preprocess_post_raw(post["raw"]) rescue nil raw = preprocess_post_raw(post["raw"]) rescue nil
next unless topic = topic_lookup_from_imported_post_id(post["root_id"]) next unless topic = topic_lookup_from_imported_post_id("#{post["node_id"]} #{post["root_id"]}")
raw = ReverseMarkdown.convert(post["body"]) raw = to_markdown(post["body"])
new_post = { new_post = {
id: "#{post["node_id"]} #{post["root_id"]} #{post["id"]}", id: "#{post["node_id"]} #{post["root_id"]} #{post["id"]}",
@ -288,6 +292,7 @@ class ImportScripts::Lithium < ImportScripts::Base
topic_id: topic[:topic_id], topic_id: topic[:topic_id],
raw: raw, raw: raw,
created_at: unix_time(post["post_date"]), created_at: unix_time(post["post_date"]),
custom_fields: {import_unique_id: post[:unique_id]}
} }
if post["deleted"] > 0 if post["deleted"] > 0
@ -303,6 +308,14 @@ class ImportScripts::Lithium < ImportScripts::Base
end end
end end
def to_markdown(html)
raw = ReverseMarkdown.convert(html)
raw.gsub!(/^\s*&nbsp;\s*$/, "")
# ugly quotes
raw.gsub!(/^>[\s\*]*$/, "")
raw
end
# find the uploaded file information from the db # find the uploaded file information from the db
def find_upload(post, attachment_id) def find_upload(post, attachment_id)
sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename, sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename,