Enhance bulk import scripts (#5010)
* Enhance bulk import scripts * Fix: restore running statement of BulkImport::VBulletin
This commit is contained in:
parent
83bb042311
commit
0daa177805
|
@ -176,7 +176,7 @@ class BulkImport::Base
|
||||||
|
|
||||||
POST_COLUMNS ||= %i{
|
POST_COLUMNS ||= %i{
|
||||||
id user_id last_editor_id topic_id post_number sort_order reply_to_post_number
|
id user_id last_editor_id topic_id post_number sort_order reply_to_post_number
|
||||||
raw cooked hidden word_count created_at last_version_at updated_at
|
like_count raw cooked hidden word_count created_at last_version_at updated_at
|
||||||
}
|
}
|
||||||
|
|
||||||
TOPIC_ALLOWED_USER_COLUMNS ||= %i{
|
TOPIC_ALLOWED_USER_COLUMNS ||= %i{
|
||||||
|
@ -306,7 +306,8 @@ class BulkImport::Base
|
||||||
end
|
end
|
||||||
|
|
||||||
def process_category(category)
|
def process_category(category)
|
||||||
@categories[category[:imported_id].to_s] = category[:id] = @last_category_id += 1
|
category[:id] ||= @last_category_id += 1
|
||||||
|
@categories[category[:imported_id].to_s] ||= category[:id]
|
||||||
category[:name] = category[:name][0...50].scrub.strip
|
category[:name] = category[:name][0...50].scrub.strip
|
||||||
# TODO: unique name
|
# TODO: unique name
|
||||||
category[:name_lower] = category[:name].downcase
|
category[:name_lower] = category[:name].downcase
|
||||||
|
@ -347,6 +348,7 @@ class BulkImport::Base
|
||||||
@topic_id_by_post_id[post[:id]] = post[:topic_id]
|
@topic_id_by_post_id[post[:id]] = post[:topic_id]
|
||||||
post[:raw] = (post[:raw] || "").scrub.strip.presence || "<Empty imported post>"
|
post[:raw] = (post[:raw] || "").scrub.strip.presence || "<Empty imported post>"
|
||||||
post[:raw] = process_raw post[:raw]
|
post[:raw] = process_raw post[:raw]
|
||||||
|
post[:like_count] ||= 0
|
||||||
post[:cooked] = pre_cook post[:raw]
|
post[:cooked] = pre_cook post[:raw]
|
||||||
post[:hidden] ||= false
|
post[:hidden] ||= false
|
||||||
post[:word_count] = post[:raw].scan(/[[:word:]]+/).size
|
post[:word_count] = post[:raw].scan(/[[:word:]]+/).size
|
||||||
|
@ -484,7 +486,8 @@ class BulkImport::Base
|
||||||
mapped = yield(row)
|
mapped = yield(row)
|
||||||
next unless mapped
|
next unless mapped
|
||||||
processed = send(process_method_name, mapped)
|
processed = send(process_method_name, mapped)
|
||||||
imported_ids << mapped[:imported_id]
|
imported_ids << mapped[:imported_id] unless mapped[:imported_id].nil?
|
||||||
|
imported_ids |= mapped[:imported_ids] unless mapped[:imported_ids].nil?
|
||||||
@raw_connection.put_copy_data columns.map { |c| processed[c] }
|
@raw_connection.put_copy_data columns.map { |c| processed[c] }
|
||||||
print "\r%7d - %6d/sec".freeze % [imported_ids.size, imported_ids.size.to_f / (Time.now - start)] if imported_ids.size % 5000 == 0
|
print "\r%7d - %6d/sec".freeze % [imported_ids.size, imported_ids.size.to_f / (Time.now - start)] if imported_ids.size % 5000 == 0
|
||||||
end
|
end
|
||||||
|
@ -538,7 +541,18 @@ class BulkImport::Base
|
||||||
end
|
end
|
||||||
|
|
||||||
def pre_cook(raw)
|
def pre_cook(raw)
|
||||||
cooked = @markdown.render(raw).scrub.strip
|
cooked = raw
|
||||||
|
|
||||||
|
# Convert YouTube URLs to lazyYT DOMs before being transformed into links
|
||||||
|
cooked.gsub!(/\nhttps\:\/\/www.youtube.com\/watch\?v=(\w+)\n/) do
|
||||||
|
video_id = $1
|
||||||
|
result = <<-HTML
|
||||||
|
<div class="lazyYT" data-youtube-id="#{video_id}" data-width="480" data-height="270" data-parameters="feature=oembed&wmode=opaque"></div>
|
||||||
|
HTML
|
||||||
|
result.strip
|
||||||
|
end
|
||||||
|
|
||||||
|
cooked = @markdown.render(cooked).scrub.strip
|
||||||
|
|
||||||
cooked.gsub!(/\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[\/QUOTE\]/im) do
|
cooked.gsub!(/\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[\/QUOTE\]/im) do
|
||||||
username, post_id, topic_id = $1, $2, $3
|
username, post_id, topic_id = $1, $2, $3
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
require_relative "base"
|
require_relative "base"
|
||||||
|
require "set"
|
||||||
require "mysql2"
|
require "mysql2"
|
||||||
require "htmlentities"
|
require "htmlentities"
|
||||||
|
|
||||||
|
@ -354,6 +355,8 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
|
|
||||||
posts = mysql_stream <<-SQL
|
posts = mysql_stream <<-SQL
|
||||||
SELECT postid, post.threadid, parentid, userid, post.dateline, post.visible, pagetext
|
SELECT postid, post.threadid, parentid, userid, post.dateline, post.visible, pagetext
|
||||||
|
#{", post_thanks_amount" if @has_post_thanks}
|
||||||
|
|
||||||
FROM post
|
FROM post
|
||||||
JOIN thread ON thread.threadid = post.threadid
|
JOIN thread ON thread.threadid = post.threadid
|
||||||
WHERE postid > #{@last_imported_post_id}
|
WHERE postid > #{@last_imported_post_id}
|
||||||
|
@ -365,7 +368,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
replied_post_topic_id = topic_id_from_imported_post_id(row[2])
|
replied_post_topic_id = topic_id_from_imported_post_id(row[2])
|
||||||
reply_to_post_number = topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil
|
reply_to_post_number = topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil
|
||||||
|
|
||||||
{
|
post = {
|
||||||
imported_id: row[0],
|
imported_id: row[0],
|
||||||
topic_id: topic_id,
|
topic_id: topic_id,
|
||||||
reply_to_post_number: reply_to_post_number,
|
reply_to_post_number: reply_to_post_number,
|
||||||
|
@ -374,6 +377,9 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
hidden: row[5] == 0,
|
hidden: row[5] == 0,
|
||||||
raw: normalize_text(row[6]),
|
raw: normalize_text(row[6]),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
post[:like_count] = row[7] if @has_post_thanks
|
||||||
|
post
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -396,11 +402,10 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
|
|
||||||
next if @imported_topics.has_key?(key)
|
next if @imported_topics.has_key?(key)
|
||||||
@imported_topics[key] = row[0] + PRIVATE_OFFSET
|
@imported_topics[key] = row[0] + PRIVATE_OFFSET
|
||||||
|
|
||||||
{
|
{
|
||||||
archetype: Archetype.private_message,
|
archetype: Archetype.private_message,
|
||||||
imported_id: row[0] + PRIVATE_OFFSET,
|
imported_id: row[0] + PRIVATE_OFFSET,
|
||||||
title: normalize_text(title),
|
title: title,
|
||||||
user_id: user_id_from_imported_id(row[2]),
|
user_id: user_id_from_imported_id(row[2]),
|
||||||
created_at: Time.zone.at(row[4]),
|
created_at: Time.zone.at(row[4]),
|
||||||
}
|
}
|
||||||
|
@ -410,7 +415,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
def import_topic_allowed_users
|
def import_topic_allowed_users
|
||||||
puts "Importing topic allowed users..."
|
puts "Importing topic allowed users..."
|
||||||
|
|
||||||
allowed_users = []
|
allowed_users = Set.new
|
||||||
|
|
||||||
mysql_stream(<<-SQL
|
mysql_stream(<<-SQL
|
||||||
SELECT pmtextid, touserarray
|
SELECT pmtextid, touserarray
|
||||||
|
|
Loading…
Reference in New Issue