FEATURE: XenForo importer can import categories from the xf_node table and convert sub-categories beyond second level to tags

This commit is contained in:
Neil Lalonde 2018-10-11 12:04:15 -04:00
parent b54d940182
commit a68032835a
1 changed files with 111 additions and 8 deletions

View File

@ -19,6 +19,9 @@ class ImportScripts::XenForo < ImportScripts::Base
password: "pa$$word", password: "pa$$word",
database: XENFORO_DB database: XENFORO_DB
) )
@category_mappings = {}
@prefix_as_category = false
end end
def execute def execute
@ -61,7 +64,72 @@ class ImportScripts::XenForo < ImportScripts::Base
def import_categories def import_categories
puts "", "importing categories..." puts "", "importing categories..."
# Note that this script uses Prefix as Category, you may want to change this as per your requirement categories = mysql_query("
SELECT node_id id,
title,
description,
parent_node_id,
display_order
FROM #{TABLE_PREFIX}node
ORDER BY parent_node_id, display_order
").to_a
top_level_categories = categories.select { |c| c["parent_node_id"] == 0 }
create_categories(top_level_categories) do |c|
{
id: c['id'],
name: c['title'],
description: c['description'],
position: c['display_order']
}
end
top_level_category_ids = Set.new(top_level_categories.map { |c| c["id"] })
subcategories = categories.select { |c| top_level_category_ids.include?(c["parent_node_id"]) }
create_categories(subcategories) do |c|
{
id: c['id'],
name: c['title'],
description: c['description'],
position: c['display_order'],
parent_category_id: category_id_from_imported_category_id(c['parent_node_id'])
}
end
subcategory_ids = Set.new(subcategories.map { |c| c['id'] })
# deeper categories need to be tags
categories.each do |c|
next if c['parent_node_id'] == 0
next if top_level_category_ids.include?(c['id'])
next if subcategory_ids.include?(c['id'])
# Find a subcategory for topics in this category
parent = c
while !parent.nil? && !subcategory_ids.include?(parent['id'])
parent = categories.find { |subcat| subcat['id'] == parent['parent_node_id'] }
end
if parent
tag_name = DiscourseTagging.clean_tag(c['title'])
@category_mappings[c['id']] = {
category_id: category_id_from_imported_category_id(parent['id']),
tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name)
}
else
puts '', "Couldn't find a category for #{c['id']} '#{c['title']}'!"
end
end
end
# This method is an alternative to import_categories.
# It uses prefixes instead of nodes.
def import_categories_from_thread_prefixes
puts "", "importing categories..."
categories = mysql_query(" categories = mysql_query("
SELECT prefix_id id SELECT prefix_id id
FROM #{TABLE_PREFIX}thread_prefix FROM #{TABLE_PREFIX}thread_prefix
@ -74,6 +142,8 @@ class ImportScripts::XenForo < ImportScripts::Base
name: "Category-#{category["id"]}" name: "Category-#{category["id"]}"
} }
end end
@prefix_as_category = true
end end
def import_posts def import_posts
@ -81,11 +151,10 @@ class ImportScripts::XenForo < ImportScripts::Base
total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}post").first["count"] total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}post").first["count"]
batches(BATCH_SIZE) do |offset| posts_sql = "
results = mysql_query("
SELECT p.post_id id, SELECT p.post_id id,
t.thread_id topic_id, t.thread_id topic_id,
t.prefix_id category_id, #{@prefix_as_category ? 't.prefix_id' : 't.node_id'} category_id,
t.title title, t.title title,
t.first_post_id first_post_id, t.first_post_id first_post_id,
p.user_id user_id, p.user_id user_id,
@ -95,9 +164,10 @@ class ImportScripts::XenForo < ImportScripts::Base
#{TABLE_PREFIX}thread t #{TABLE_PREFIX}thread t
WHERE p.thread_id = t.thread_id WHERE p.thread_id = t.thread_id
ORDER BY p.post_date ORDER BY p.post_date
LIMIT #{BATCH_SIZE} LIMIT #{BATCH_SIZE}" # needs OFFSET
OFFSET #{offset};
").to_a batches(BATCH_SIZE) do |offset|
results = mysql_query("#{posts_sql} OFFSET #{offset};").to_a
break if results.size < 1 break if results.size < 1
next if all_records_exist? :posts, results.map { |p| p['id'] } next if all_records_exist? :posts, results.map { |p| p['id'] }
@ -115,7 +185,8 @@ class ImportScripts::XenForo < ImportScripts::Base
if m['category_id'].to_i == 0 || m['category_id'].nil? if m['category_id'].to_i == 0 || m['category_id'].nil?
mapped[:category] = SiteSetting.uncategorized_category_id mapped[:category] = SiteSetting.uncategorized_category_id
else else
mapped[:category] = category_id_from_imported_category_id(m['category_id'].to_i) mapped[:category] = category_id_from_imported_category_id(m['category_id'].to_i) ||
@category_mappings[m['category_id']].try(:[], :category_id)
end end
mapped[:title] = CGI.unescapeHTML(m['title']) mapped[:title] = CGI.unescapeHTML(m['title'])
else else
@ -132,6 +203,22 @@ class ImportScripts::XenForo < ImportScripts::Base
end end
end end
# Apply tags
batches(BATCH_SIZE) do |offset|
results = mysql_query("#{posts_sql} OFFSET #{offset};").to_a
break if results.size < 1
results.each do |m|
next unless m['id'] == m['first_post_id'] && m['category_id'].to_i > 0
next unless tag = @category_mappings[m['category_id']].try(:[], :tag)
next unless topic_mapping = topic_lookup_from_imported_post_id(m['id'])
topic = Topic.find_by_id(topic_mapping[:topic_id])
topic.tags = [tag] if topic
end
end
end end
def process_xenforo_post(raw, import_id) def process_xenforo_post(raw, import_id)
@ -167,6 +254,22 @@ class ImportScripts::XenForo < ImportScripts::Base
# [QUOTE]...[/QUOTE] # [QUOTE]...[/QUOTE]
s.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" } s.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" }
# Nested Quotes
s.gsub!(/(\[\/?QUOTE.*?\])/mi) { |q| "\n#{q}\n" }
# [QUOTE="username, post: 28662, member: 1283"]
s.gsub!(/\[quote="(\w+), post: (\d*), member: (\d*)"\]/i) do
username, imported_post_id, imported_user_id = $1, $2, $3
topic_mapping = topic_lookup_from_imported_post_id(imported_post_id)
if topic_mapping
"\n[quote=\"#{username}, post:#{topic_mapping[:post_number]}, topic:#{topic_mapping[:topic_id]}\"]\n"
else
"\n[quote=\"#{username}\"]\n"
end
end
# [URL=...]...[/URL] # [URL=...]...[/URL]
s.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" } s.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" }