From 93ff54e184fcae8a85b4030a15702f0989dd6927 Mon Sep 17 00:00:00 2001 From: Krzysztof Kotlarek Date: Tue, 14 Jul 2020 15:58:27 +1000 Subject: [PATCH] FIX: improvements for vanilla bulk import (#10212) Adjustments to the base: 1. PG connection doesn't require host - it was broken on import droplet 2. Drop `topic_reply_count` - it was removed here - https://github.com/discourse/discourse/blob/master/db/post_migrate/20200513185052_drop_topic_reply_count.rb 3. Error with `backtrace.join("\n")` -> `e.backtrace.join("\n")` 4. Correctly link the user and avatar to quote block Adjustments to vanilla: 1. Top-level Vanilla categories are valid categories 2. Posts have `format` column which should be used to decide if the format is HTML or Markdown 3. Remove no UTF8 characters 4. Remove not supported HTML elements like `font` `span` `sub` `u` --- script/bulk_import/base.rb | 30 +++++++++----- script/bulk_import/vanilla.rb | 74 ++++++++++++++++++++++++++--------- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/script/bulk_import/base.rb b/script/bulk_import/base.rb index 1253c2d2e48..0d7ec65d06e 100644 --- a/script/bulk_import/base.rb +++ b/script/bulk_import/base.rb @@ -76,7 +76,7 @@ class BulkImport::Base charset = ENV["DB_CHARSET"] || "utf8" db = ActiveRecord::Base.connection_config @encoder = PG::TextEncoder::CopyRow.new - @raw_connection = PG.connect(dbname: db[:database], host: db[:host_names]&.first, port: db[:port]) + @raw_connection = PG.connect(dbname: db[:database], port: db[:port]) @uploader = ImportScripts::Uploader.new @html_entities = HTMLEntities.new @encoding = CHARSET_MAP[charset] @@ -283,7 +283,7 @@ class BulkImport::Base USER_STAT_COLUMNS ||= %i{ user_id topics_entered time_read days_visited posts_read_count - likes_given likes_received topic_reply_count new_since read_faq + likes_given likes_received new_since read_faq first_post_created_at post_count topic_count bounce_score reset_bounce_score_after } @@ -441,14 +441,12 @@ class BulkImport::Base def process_user_stat(user_stat) user_stat[:user_id] = @users[user_stat[:imported_user_id].to_i] - user_stat[:topic_reply_count] = user_stat[:post_count] - user_stat[:topic_count] user_stat[:topics_entered] ||= 0 user_stat[:time_read] ||= 0 user_stat[:days_visited] ||= 0 user_stat[:posts_read_count] ||= 0 user_stat[:likes_given] ||= 0 user_stat[:likes_received] ||= 0 - user_stat[:topic_reply_count] ||= 0 user_stat[:new_since] ||= NOW user_stat[:post_count] ||= 0 user_stat[:topic_count] ||= 0 @@ -546,7 +544,8 @@ class BulkImport::Base topic_tag end - def process_raw(raw) + def process_raw(original_raw) + raw = original_raw.dup # fix whitespaces raw.gsub!(/(\\r)?\\n/, "\n") raw.gsub!("\\t", "\t") @@ -699,7 +698,7 @@ class BulkImport::Base rescue => e puts "\n" puts "ERROR: #{e.message}" - puts backtrace.join("\n") + puts e.backtrace.join("\n") end end end @@ -782,17 +781,25 @@ class BulkImport::Base quote.gsub!(/^(
\n?)+/, "") quote.gsub!(/(
\n?)+$/, "") + user = User.find_by(username: username) + if post_id.present? && topic_id.present? <<-HTML HTML else <<-HTML -