From a68032835aee1d4ea07d0d46d0a2ab2fd04b0644 Mon Sep 17 00:00:00 2001 From: Neil Lalonde Date: Thu, 11 Oct 2018 12:04:15 -0400 Subject: [PATCH] FEATURE: XenForo importer can import categories from the xf_node table and convert sub-categories beyond second level to tags --- script/import_scripts/xenforo.rb | 119 ++++++++++++++++++++++++++++--- 1 file changed, 111 insertions(+), 8 deletions(-) diff --git a/script/import_scripts/xenforo.rb b/script/import_scripts/xenforo.rb index ac23dcd0976..ed368c5e4a0 100644 --- a/script/import_scripts/xenforo.rb +++ b/script/import_scripts/xenforo.rb @@ -19,6 +19,9 @@ class ImportScripts::XenForo < ImportScripts::Base password: "pa$$word", database: XENFORO_DB ) + + @category_mappings = {} + @prefix_as_category = false end def execute @@ -61,7 +64,72 @@ class ImportScripts::XenForo < ImportScripts::Base def import_categories puts "", "importing categories..." - # Note that this script uses Prefix as Category, you may want to change this as per your requirement + categories = mysql_query(" + SELECT node_id id, + title, + description, + parent_node_id, + display_order + FROM #{TABLE_PREFIX}node + ORDER BY parent_node_id, display_order + ").to_a + + top_level_categories = categories.select { |c| c["parent_node_id"] == 0 } + + create_categories(top_level_categories) do |c| + { + id: c['id'], + name: c['title'], + description: c['description'], + position: c['display_order'] + } + end + + top_level_category_ids = Set.new(top_level_categories.map { |c| c["id"] }) + + subcategories = categories.select { |c| top_level_category_ids.include?(c["parent_node_id"]) } + + create_categories(subcategories) do |c| + { + id: c['id'], + name: c['title'], + description: c['description'], + position: c['display_order'], + parent_category_id: category_id_from_imported_category_id(c['parent_node_id']) + } + end + + subcategory_ids = Set.new(subcategories.map { |c| c['id'] }) + + # deeper categories need to be tags + categories.each do |c| + next if c['parent_node_id'] == 0 + next if top_level_category_ids.include?(c['id']) + next if subcategory_ids.include?(c['id']) + + # Find a subcategory for topics in this category + parent = c + while !parent.nil? && !subcategory_ids.include?(parent['id']) + parent = categories.find { |subcat| subcat['id'] == parent['parent_node_id'] } + end + + if parent + tag_name = DiscourseTagging.clean_tag(c['title']) + @category_mappings[c['id']] = { + category_id: category_id_from_imported_category_id(parent['id']), + tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name) + } + else + puts '', "Couldn't find a category for #{c['id']} '#{c['title']}'!" + end + end + end + + # This method is an alternative to import_categories. + # It uses prefixes instead of nodes. + def import_categories_from_thread_prefixes + puts "", "importing categories..." + categories = mysql_query(" SELECT prefix_id id FROM #{TABLE_PREFIX}thread_prefix @@ -74,6 +142,8 @@ class ImportScripts::XenForo < ImportScripts::Base name: "Category-#{category["id"]}" } end + + @prefix_as_category = true end def import_posts @@ -81,11 +151,10 @@ class ImportScripts::XenForo < ImportScripts::Base total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}post").first["count"] - batches(BATCH_SIZE) do |offset| - results = mysql_query(" + posts_sql = " SELECT p.post_id id, t.thread_id topic_id, - t.prefix_id category_id, + #{@prefix_as_category ? 't.prefix_id' : 't.node_id'} category_id, t.title title, t.first_post_id first_post_id, p.user_id user_id, @@ -95,9 +164,10 @@ class ImportScripts::XenForo < ImportScripts::Base #{TABLE_PREFIX}thread t WHERE p.thread_id = t.thread_id ORDER BY p.post_date - LIMIT #{BATCH_SIZE} - OFFSET #{offset}; - ").to_a + LIMIT #{BATCH_SIZE}" # needs OFFSET + + batches(BATCH_SIZE) do |offset| + results = mysql_query("#{posts_sql} OFFSET #{offset};").to_a break if results.size < 1 next if all_records_exist? :posts, results.map { |p| p['id'] } @@ -115,7 +185,8 @@ class ImportScripts::XenForo < ImportScripts::Base if m['category_id'].to_i == 0 || m['category_id'].nil? mapped[:category] = SiteSetting.uncategorized_category_id else - mapped[:category] = category_id_from_imported_category_id(m['category_id'].to_i) + mapped[:category] = category_id_from_imported_category_id(m['category_id'].to_i) || + @category_mappings[m['category_id']].try(:[], :category_id) end mapped[:title] = CGI.unescapeHTML(m['title']) else @@ -132,6 +203,22 @@ class ImportScripts::XenForo < ImportScripts::Base end end + # Apply tags + batches(BATCH_SIZE) do |offset| + results = mysql_query("#{posts_sql} OFFSET #{offset};").to_a + break if results.size < 1 + + results.each do |m| + next unless m['id'] == m['first_post_id'] && m['category_id'].to_i > 0 + next unless tag = @category_mappings[m['category_id']].try(:[], :tag) + next unless topic_mapping = topic_lookup_from_imported_post_id(m['id']) + + topic = Topic.find_by_id(topic_mapping[:topic_id]) + + topic.tags = [tag] if topic + end + end + end def process_xenforo_post(raw, import_id) @@ -167,6 +254,22 @@ class ImportScripts::XenForo < ImportScripts::Base # [QUOTE]...[/QUOTE] s.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" } + # Nested Quotes + s.gsub!(/(\[\/?QUOTE.*?\])/mi) { |q| "\n#{q}\n" } + + # [QUOTE="username, post: 28662, member: 1283"] + s.gsub!(/\[quote="(\w+), post: (\d*), member: (\d*)"\]/i) do + username, imported_post_id, imported_user_id = $1, $2, $3 + + topic_mapping = topic_lookup_from_imported_post_id(imported_post_id) + + if topic_mapping + "\n[quote=\"#{username}, post:#{topic_mapping[:post_number]}, topic:#{topic_mapping[:topic_id]}\"]\n" + else + "\n[quote=\"#{username}\"]\n" + end + end + # [URL=...]...[/URL] s.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" }