discourse/script/import_scripts/vbulletin.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1028 lines
30 KiB
Ruby
Raw Permalink Normal View History

# frozen_string_literal: true
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require "htmlentities"
begin
require "php_serialize" # https://github.com/jqr/php-serialize
rescue LoadError
puts
puts "php_serialize not found."
puts "Add to Gemfile, like this: "
puts
puts "echo gem \\'php-serialize\\' >> Gemfile"
puts "bundle install"
exit
end
# See https://meta.discourse.org/t/importing-from-vbulletin-4/54881
# Please update there if substantive changes are made!
2014-08-18 07:04:08 -04:00
class ImportScripts::VBulletin < ImportScripts::Base
BATCH_SIZE = 1000
2014-08-18 07:04:08 -04:00
# CHANGE THESE BEFORE RUNNING THE IMPORTER
2024-11-05 17:27:49 -05:00
DB_HOST = ENV["DB_HOST"] || "localhost"
DB_NAME = ENV["DB_NAME"] || "vbulletin"
DB_PW = ENV["DB_PW"] || ""
DB_USER = ENV["DB_USER"] || "root"
TIMEZONE = ENV["TIMEZONE"] || "America/Los_Angeles"
TABLE_PREFIX = ENV["TABLE_PREFIX"] || "vb_"
ATTACHMENT_DIR = ENV["ATTACHMENT_DIR"] || "/path/to/your/attachment/folder"
puts "#{DB_USER}:#{DB_PW}@#{DB_HOST} wants #{DB_NAME}"
2014-08-18 07:04:08 -04:00
def initialize
2018-06-12 14:41:21 -04:00
@bbcode_to_md = true
super
2014-08-18 07:04:08 -04:00
2018-06-12 14:41:21 -04:00
@usernames = {}
@tz = TZInfo::Timezone.get(TIMEZONE)
@htmlentities = HTMLEntities.new
@client =
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
rescue Exception => e
puts "=" * 50
puts e.message
puts <<~TEXT
Cannot connect in to database.
Hostname: #{DB_HOST}
Username: #{DB_USER}
Password: #{DB_PW}
database: #{DB_NAME}
Edit the script or set these environment variables:
export DB_HOST="localhost"
export DB_NAME="vbulletin"
export DB_PW=""
export DB_USER="root"
export TABLE_PREFIX="vb_"
export ATTACHMENT_DIR '/path/to/your/attachment/folder'
Exiting.
TEXT
exit
2014-08-18 07:04:08 -04:00
end
def execute
begin
2017-02-01 08:33:09 -05:00
mysql_query("CREATE INDEX firstpostid_index ON #{TABLE_PREFIX}thread (firstpostid)")
rescue StandardError
nil
end
2017-02-01 08:33:09 -05:00
2014-08-18 07:04:08 -04:00
import_groups
import_users
create_groups_membership
2014-08-18 07:04:08 -04:00
import_categories
import_topics
import_posts
2016-07-10 05:19:24 -04:00
import_private_messages
import_attachments
2014-08-18 07:04:08 -04:00
close_topics
post_process_posts
2016-07-10 05:19:24 -04:00
create_permalink_file
2016-07-10 05:19:24 -04:00
suspend_users
2014-08-18 07:04:08 -04:00
end
def import_groups
puts "", "importing groups..."
2014-08-18 07:04:08 -04:00
groups = mysql_query <<-SQL
SELECT usergroupid, title
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}usergroup
ORDER BY usergroupid
SQL
2014-08-18 07:04:08 -04:00
create_groups(groups) do |group|
{ id: group["usergroupid"], name: @htmlentities.decode(group["title"]).strip }
2014-08-18 07:04:08 -04:00
end
end
2014-08-18 07:04:08 -04:00
2018-06-12 14:41:21 -04:00
def get_username_for_old_username(old_username)
if @usernames.has_key?(old_username)
@usernames[old_username]
else
old_username
end
end
def import_users
puts "", "importing users"
2014-08-18 07:04:08 -04:00
2016-07-10 05:19:24 -04:00
user_count = mysql_query("SELECT COUNT(userid) count FROM #{TABLE_PREFIX}user").first["count"]
2014-08-18 07:04:08 -04:00
2017-02-01 08:33:09 -05:00
last_user_id = -1
batches(BATCH_SIZE) do |offset|
2017-02-01 08:33:09 -05:00
users = mysql_query(<<-SQL).to_a
SELECT userid
, username
, homepage
, usertitle
, usergroupid
, joindate
, email
, password
, salt
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}user
2017-02-01 08:33:09 -05:00
WHERE userid > #{last_user_id}
ORDER BY userid
LIMIT #{BATCH_SIZE}
SQL
2014-08-18 07:04:08 -04:00
2017-02-01 08:33:09 -05:00
break if users.empty?
2017-02-01 08:33:09 -05:00
last_user_id = users[-1]["userid"]
users.reject! { |u| @lookup.user_already_imported?(u["userid"]) }
create_users(users, total: user_count, offset: offset) do |user|
2017-02-01 08:33:09 -05:00
email = user["email"].presence || fake_email
email = fake_email if !EmailAddressValidator.valid_value?(email)
2017-02-01 08:33:09 -05:00
password = [user["password"].presence, user["salt"].presence].compact.join(":")
username = @htmlentities.decode(user["username"]).strip
2014-08-18 07:04:08 -04:00
{
id: user["userid"],
name: username,
username: username,
password: password,
2017-02-01 08:33:09 -05:00
email: email,
merge: true,
website: user["homepage"].strip,
title: @htmlentities.decode(user["usertitle"]).strip,
primary_group_id: group_id_from_imported_group_id(user["usergroupid"].to_i),
created_at: parse_timestamp(user["joindate"]),
2016-07-10 05:19:24 -04:00
last_seen_at: parse_timestamp(user["lastvisit"]),
post_create_action:
proc do |u|
import_profile_picture(user, u)
import_profile_background(user, u)
end,
2014-08-18 07:04:08 -04:00
}
end
end
2018-06-12 14:41:21 -04:00
@usernames =
UserCustomField
.joins(:user)
.where(name: "import_username")
.pluck("user_custom_fields.value", "users.username")
.to_h
end
2014-08-18 07:04:08 -04:00
def create_groups_membership
puts "", "Creating groups membership..."
Group.find_each do |group|
begin
next if group.automatic
puts "\t#{group.name}"
next if GroupUser.where(group_id: group.id).count > 0
user_ids_in_group = User.where(primary_group_id: group.id).pluck(:id).to_a
next if user_ids_in_group.size == 0
values =
user_ids_in_group
.map { |user_id| "(#{group.id}, #{user_id}, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)" }
.join(",")
DB.exec <<~SQL
INSERT INTO group_users (group_id, user_id, created_at, updated_at) VALUES #{values}
SQL
rescue Exception => e
puts e.message
puts e.backtrace.join("\n")
end
end
Group.reset_all_counters!
end
def import_profile_picture(old_user, imported_user)
query = mysql_query <<-SQL
SELECT filedata, filename
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}customavatar
WHERE userid = #{old_user["userid"]}
ORDER BY dateline DESC
LIMIT 1
SQL
picture = query.first
return if picture.nil?
return if picture["filedata"].nil?
file = Tempfile.new("profile-picture")
file.write(picture["filedata"].encode("ASCII-8BIT").force_encoding("UTF-8"))
file.rewind
upload = UploadCreator.new(file, picture["filename"]).create_for(imported_user.id)
return if !upload.persisted?
imported_user.create_user_avatar
imported_user.user_avatar.update(custom_upload_id: upload.id)
imported_user.update(uploaded_avatar_id: upload.id)
ensure
begin
file.close
rescue StandardError
nil
end
begin
file.unlind
rescue StandardError
nil
end
end
def import_profile_background(old_user, imported_user)
query = mysql_query <<-SQL
SELECT filedata, filename
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}customprofilepic
WHERE userid = #{old_user["userid"]}
ORDER BY dateline DESC
LIMIT 1
SQL
background = query.first
return if background.nil?
return if background["filedata"].nil?
file = Tempfile.new("profile-background")
file.write(background["filedata"].encode("ASCII-8BIT").force_encoding("UTF-8"))
file.rewind
upload = UploadCreator.new(file, background["filename"]).create_for(imported_user.id)
return if !upload.persisted?
imported_user.user_profile.upload_profile_background(upload)
ensure
begin
file.close
rescue StandardError
nil
end
begin
file.unlink
rescue StandardError
nil
end
end
def import_categories
puts "", "importing top level categories..."
2016-07-10 05:19:24 -04:00
categories =
mysql_query(
"SELECT forumid, title, description, displayorder, parentid FROM #{TABLE_PREFIX}forum ORDER BY forumid",
).to_a
top_level_categories = categories.select { |c| c["parentid"] == -1 }
create_categories(top_level_categories) do |category|
{
id: category["forumid"],
name: @htmlentities.decode(category["title"]).strip,
position: category["displayorder"],
description: @htmlentities.decode(category["description"]).strip,
}
2014-08-18 07:04:08 -04:00
end
puts "", "importing children categories..."
children_categories = categories.select { |c| c["parentid"] != -1 }
top_level_category_ids = Set.new(top_level_categories.map { |c| c["forumid"] })
# cut down the tree to only 2 levels of categories
children_categories.each do |cc|
while !top_level_category_ids.include?(cc["parentid"])
cc["parentid"] = categories.detect { |c| c["forumid"] == cc["parentid"] }["parentid"]
end
end
create_categories(children_categories) do |category|
{
id: category["forumid"],
name: @htmlentities.decode(category["title"]).strip,
position: category["displayorder"],
description: @htmlentities.decode(category["description"]).strip,
parent_category_id: category_id_from_imported_category_id(category["parentid"]),
}
end
end
2014-08-18 07:04:08 -04:00
def import_topics
puts "", "importing topics..."
2016-07-10 05:19:24 -04:00
topic_count =
mysql_query("SELECT COUNT(threadid) count FROM #{TABLE_PREFIX}thread").first["count"]
2017-02-01 08:33:09 -05:00
last_topic_id = -1
batches(BATCH_SIZE) do |offset|
2017-02-01 08:33:09 -05:00
topics = mysql_query(<<-SQL).to_a
SELECT t.threadid threadid, t.title title, forumid, open, postuserid, t.dateline dateline, views, t.visible visible, sticky,
p.pagetext raw
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}thread t
JOIN #{TABLE_PREFIX}post p ON p.postid = t.firstpostid
2017-02-01 08:33:09 -05:00
WHERE t.threadid > #{last_topic_id}
ORDER BY t.threadid
LIMIT #{BATCH_SIZE}
SQL
2017-02-01 08:33:09 -05:00
break if topics.empty?
last_topic_id = topics[-1]["threadid"]
topics.reject! { |t| @lookup.post_already_imported?("thread-#{t["threadid"]}") }
2014-08-18 07:04:08 -04:00
create_posts(topics, total: topic_count, offset: offset) do |topic|
raw =
begin
preprocess_post_raw(topic["raw"])
rescue StandardError
nil
end
next if raw.blank?
topic_id = "thread-#{topic["threadid"]}"
2014-08-18 07:04:08 -04:00
t = {
id: topic_id,
user_id: user_id_from_imported_user_id(topic["postuserid"]) || Discourse::SYSTEM_USER_ID,
title: @htmlentities.decode(topic["title"]).strip[0...255],
category: category_id_from_imported_category_id(topic["forumid"]),
raw: raw,
created_at: parse_timestamp(topic["dateline"]),
visible: topic["visible"].to_i == 1,
views: topic["views"],
2014-08-18 07:04:08 -04:00
}
t[:pinned_at] = t[:created_at] if topic["sticky"].to_i == 1
2014-08-18 07:04:08 -04:00
t
end
2018-06-12 14:41:21 -04:00
# Add the following to permalink_normalizations for this to work:
# /forum\/.*?\/(\d*)\-.*/thread/\1
topics.each do |thread|
topic_id = "thread-#{thread["threadid"]}"
topic = topic_lookup_from_imported_post_id(topic_id)
if topic.present?
url_slug = "thread/#{thread["threadid"]}" if thread["title"].present?
if url_slug.present? && topic[:topic_id].present?
Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i)
end
2018-06-12 14:41:21 -04:00
end
end
2014-08-18 07:04:08 -04:00
end
end
2014-08-18 07:04:08 -04:00
def import_posts
puts "", "importing posts..."
2014-08-18 07:04:08 -04:00
2017-02-01 08:33:09 -05:00
post_count = mysql_query(<<-SQL).first["count"]
SELECT COUNT(postid) count
FROM #{TABLE_PREFIX}post p
JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid
WHERE t.firstpostid <> p.postid
SQL
2017-02-01 08:33:09 -05:00
last_post_id = -1
2014-08-18 07:04:08 -04:00
batches(BATCH_SIZE) do |offset|
2017-02-01 08:33:09 -05:00
posts = mysql_query(<<-SQL).to_a
SELECT p.postid, p.userid, p.threadid, p.pagetext raw, p.dateline, p.visible, p.parentid
FROM #{TABLE_PREFIX}post p
JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid
WHERE t.firstpostid <> p.postid
AND p.postid > #{last_post_id}
ORDER BY p.postid
LIMIT #{BATCH_SIZE}
SQL
2017-02-01 08:33:09 -05:00
break if posts.empty?
2017-02-01 08:33:09 -05:00
last_post_id = posts[-1]["postid"]
posts.reject! { |p| @lookup.post_already_imported?(p["postid"].to_i) }
2014-08-18 07:04:08 -04:00
create_posts(posts, total: post_count, offset: offset) do |post|
raw =
begin
preprocess_post_raw(post["raw"])
rescue StandardError
nil
end
next if raw.blank?
next unless topic = topic_lookup_from_imported_post_id("thread-#{post["threadid"]}")
2014-08-18 07:04:08 -04:00
p = {
id: post["postid"],
user_id: user_id_from_imported_user_id(post["userid"]) || Discourse::SYSTEM_USER_ID,
topic_id: topic[:topic_id],
raw: raw,
created_at: parse_timestamp(post["dateline"]),
hidden: post["visible"].to_i != 1,
2014-08-18 07:04:08 -04:00
}
if parent = topic_lookup_from_imported_post_id(post["parentid"])
2014-08-18 07:04:08 -04:00
p[:reply_to_post_number] = parent[:post_number]
end
p
end
end
end
2014-08-18 07:04:08 -04:00
# find the uploaded file information from the db
def find_upload(post, attachment_id)
sql =
"SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename,
2018-06-12 14:41:21 -04:00
LENGTH(fd.filedata) AS dbsize, filedata, a.caption caption
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}attachment a
2018-06-12 14:41:21 -04:00
LEFT JOIN #{TABLE_PREFIX}filedata fd ON fd.filedataid = a.filedataid
WHERE a.attachmentid = #{attachment_id}"
results = mysql_query(sql)
2017-02-01 08:33:09 -05:00
unless row = results.first
puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields["import_id"]}"
2017-02-01 08:33:09 -05:00
return
end
filename =
File.join(ATTACHMENT_DIR, row["user_id"].to_s.split("").join("/"), "#{row["file_id"]}.attach")
2018-06-12 14:41:21 -04:00
real_filename = row["filename"]
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
unless File.exist?(filename)
2018-06-12 14:41:21 -04:00
if row["dbsize"].to_i == 0
puts "Attachment file #{row["filedataid"]} doesn't exist"
return nil
end
tmpfile = "attach_" + row["filedataid"].to_s
filename = File.join("/tmp/", tmpfile)
File.open(filename, "wb") { |f| f.write(row["filedata"]) }
end
2017-02-01 08:33:09 -05:00
upload = create_upload(post.user.id, filename, real_filename)
if upload.nil? || !upload.valid?
puts "Upload not valid :("
puts upload.errors.inspect if upload
2017-02-01 08:33:09 -05:00
return
end
2017-02-01 08:33:09 -05:00
[upload, real_filename]
rescue Mysql2::Error => e
puts "SQL Error"
puts e.message
puts sql
end
2016-07-10 05:19:24 -04:00
def import_private_messages
puts "", "importing private messages..."
topic_count =
mysql_query("SELECT COUNT(pmtextid) count FROM #{TABLE_PREFIX}pmtext").first["count"]
2017-02-01 08:33:09 -05:00
last_private_message_id = -1
2016-07-10 05:19:24 -04:00
batches(BATCH_SIZE) do |offset|
2017-02-01 08:33:09 -05:00
private_messages = mysql_query(<<-SQL).to_a
2016-07-10 05:19:24 -04:00
SELECT pmtextid, fromuserid, title, message, touserarray, dateline
2017-02-01 08:33:09 -05:00
FROM #{TABLE_PREFIX}pmtext
WHERE pmtextid > #{last_private_message_id}
ORDER BY pmtextid
LIMIT #{BATCH_SIZE}
2016-07-10 05:19:24 -04:00
SQL
2017-02-01 08:33:09 -05:00
break if private_messages.empty?
2016-07-10 05:19:24 -04:00
2017-02-01 08:33:09 -05:00
last_private_message_id = private_messages[-1]["pmtextid"]
private_messages.reject! { |pm| @lookup.post_already_imported?("pm-#{pm["pmtextid"]}") }
2016-07-10 05:19:24 -04:00
title_username_of_pm_first_post = {}
create_posts(private_messages, total: topic_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = "pm-#{m["pmtextid"]}"
mapped[:user_id] = user_id_from_imported_user_id(m["fromuserid"]) ||
Discourse::SYSTEM_USER_ID
mapped[:raw] = begin
preprocess_post_raw(m["message"])
rescue StandardError
nil
end
2016-07-10 05:19:24 -04:00
mapped[:created_at] = Time.zone.at(m["dateline"])
title = @htmlentities.decode(m["title"]).strip[0...255]
topic_id = nil
next if mapped[:raw].blank?
# users who are part of this private message.
target_usernames = []
target_userids = []
begin
to_user_array = PHP.unserialize(m["touserarray"])
rescue StandardError
puts "#{m["pmtextid"]} -- #{m["touserarray"]}"
skip = true
end
begin
to_user_array.each do |to_user|
if to_user[0] == "cc" || to_user[0] == "bcc" # not sure if we should include bcc users
to_user[1].each do |to_user_cc|
user_id = user_id_from_imported_user_id(to_user_cc[0])
username = User.find_by(id: user_id).try(:username)
target_userids << user_id || Discourse::SYSTEM_USER_ID
target_usernames << username if username
end
else
user_id = user_id_from_imported_user_id(to_user[0])
username = User.find_by(id: user_id).try(:username)
target_userids << user_id || Discourse::SYSTEM_USER_ID
target_usernames << username if username
end
end
rescue StandardError
puts "skipping pm-#{m["pmtextid"]} `to_user_array` is not properly serialized -- #{to_user_array.inspect}"
skip = true
end
participants = target_userids
participants << mapped[:user_id]
begin
participants.sort!
rescue StandardError
puts "one of the participant's id is nil -- #{participants.inspect}"
end
if title =~ /^Re:/
2017-02-01 08:33:09 -05:00
parent_id =
title_username_of_pm_first_post[[title[3..-1], participants]] ||
title_username_of_pm_first_post[[title[4..-1], participants]] ||
title_username_of_pm_first_post[[title[5..-1], participants]] ||
title_username_of_pm_first_post[[title[6..-1], participants]] ||
title_username_of_pm_first_post[[title[7..-1], participants]] ||
title_username_of_pm_first_post[[title[8..-1], participants]]
2016-09-13 22:45:48 -04:00
if parent_id
2016-07-10 05:19:24 -04:00
if t = topic_lookup_from_imported_post_id("pm-#{parent_id}")
topic_id = t[:topic_id]
end
end
else
title_username_of_pm_first_post[[title, participants]] ||= m["pmtextid"]
end
if topic_id
mapped[:topic_id] = topic_id
else
2016-07-10 05:19:24 -04:00
mapped[:title] = title
mapped[:archetype] = Archetype.private_message
mapped[:target_usernames] = target_usernames.join(",")
2017-02-01 08:33:09 -05:00
if mapped[:target_usernames].size < 1 # pm with yourself?
2016-07-10 05:19:24 -04:00
# skip = true
mapped[:target_usernames] = "system"
puts "pm-#{m["pmtextid"]} has no target (#{m["touserarray"]})"
end
end
skip ? nil : mapped
end
end
end
def import_attachments
puts "", "importing attachments..."
mapping = {}
attachments = mysql_query(<<-SQL)
SELECT a.attachmentid, a.contentid as postid, p.threadid
FROM #{TABLE_PREFIX}attachment a, #{TABLE_PREFIX}post p
WHERE a.contentid = p.postid
AND contenttypeid = 1 AND state = 'visible'
SQL
attachments.each do |attachment|
post_id = post_id_from_imported_post_id(attachment["postid"])
post_id = post_id_from_imported_post_id("thread-#{attachment["threadid"]}") unless post_id
if post_id.nil?
puts "Post for attachment #{attachment["attachmentid"]} not found"
next
end
mapping[post_id] ||= []
mapping[post_id] << attachment["attachmentid"].to_i
end
current_count = 0
total_count = Post.count
success_count = 0
fail_count = 0
attachment_regex = %r{\[attach[^\]]*\](\d+)\[/attach\]}i
Post.find_each do |post|
current_count += 1
print_status current_count, total_count
new_raw = post.raw.dup
new_raw.gsub!(attachment_regex) do |s|
matches = attachment_regex.match(s)
attachment_id = matches[1]
mapping[post.id].delete(attachment_id.to_i) unless mapping[post.id].nil?
upload, filename = find_upload(post, attachment_id)
unless upload
fail_count += 1
next
end
html_for_upload(upload, filename)
end
# make resumed imports faster
if new_raw == post.raw
unless mapping[post.id].nil? || mapping[post.id].empty?
imported_text = mysql_query(<<-SQL).first["pagetext"]
SELECT p.pagetext
FROM #{TABLE_PREFIX}attachment a, #{TABLE_PREFIX}post p
WHERE a.contentid = p.postid
AND a.attachmentid = #{mapping[post.id][0]}
SQL
imported_text.scan(attachment_regex) do |match|
attachment_id = match[0]
mapping[post.id].delete(attachment_id.to_i)
end
end
end
unless mapping[post.id].nil? || mapping[post.id].empty?
mapping[post.id].each do |attachment_id|
upload, filename = find_upload(post, attachment_id)
unless upload
fail_count += 1
next
end
# internal upload deduplication will make sure that we do not import attachments again
html = html_for_upload(upload, filename)
new_raw += "\n\n#{html}\n\n" if !new_raw[html]
end
end
if new_raw != post.raw
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
bypass_bump: true,
edit_reason: "Import attachments from vBulletin",
)
end
success_count += 1
end
end
def close_topics
puts "", "Closing topics..."
2016-12-15 07:20:05 -05:00
# keep track of closed topics
closed_topic_ids = []
topics = mysql_query <<-SQL
2016-12-15 07:20:05 -05:00
SELECT t.threadid threadid, firstpostid, open
FROM #{TABLE_PREFIX}thread t
JOIN #{TABLE_PREFIX}post p ON p.postid = t.firstpostid
ORDER BY t.threadid
SQL
2016-12-15 07:20:05 -05:00
topics.each do |topic|
topic_id = "thread-#{topic["threadid"]}"
closed_topic_ids << topic_id if topic["open"] == 0
end
sql = <<-SQL
WITH closed_topic_ids AS (
SELECT t.id AS topic_id
2016-12-03 10:31:10 -05:00
FROM post_custom_fields pcf
JOIN posts p ON p.id = pcf.post_id
JOIN topics t ON t.id = p.topic_id
WHERE pcf.name = 'import_id'
AND pcf.value IN (?)
)
UPDATE topics
SET closed = true
2016-12-03 10:31:10 -05:00
WHERE id IN (SELECT topic_id FROM closed_topic_ids)
SQL
DB.exec(sql, closed_topic_ids)
end
def post_process_posts
puts "", "Postprocessing posts..."
current = 0
max = Post.count
Post.find_each do |post|
begin
2018-06-12 14:41:21 -04:00
old_raw = post.raw.dup
new_raw = postprocess_post_raw(post.raw)
2018-06-12 14:41:21 -04:00
if new_raw != old_raw
post.raw = new_raw
post.save
end
rescue PrettyText::JavaScriptError
nil
ensure
print_status(current += 1, max)
end
end
end
def preprocess_post_raw(raw)
return "" if raw.blank?
2014-08-18 07:04:08 -04:00
# decode HTML entities
raw = @htmlentities.decode(raw)
# fix whitespaces
2016-12-03 10:31:10 -05:00
raw.gsub!(/(\\r)?\\n/, "\n")
2016-12-04 21:16:59 -05:00
raw.gsub!("\\t", "\t")
2014-08-18 07:04:08 -04:00
# [HTML]...[/HTML]
2016-12-03 10:31:10 -05:00
raw.gsub!(/\[html\]/i, "\n```html\n")
2016-12-04 21:16:59 -05:00
raw.gsub!(%r{\[/html\]}i, "\n```\n")
2014-08-18 07:04:08 -04:00
# [PHP]...[/PHP]
2016-12-03 10:31:10 -05:00
raw.gsub!(/\[php\]/i, "\n```php\n")
2016-12-04 21:16:59 -05:00
raw.gsub!(%r{\[/php\]}i, "\n```\n")
2014-08-18 07:04:08 -04:00
# [HIGHLIGHT="..."]
2016-12-03 10:31:10 -05:00
raw.gsub!(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" }
2014-08-18 07:04:08 -04:00
# [CODE]...[/CODE]
# [HIGHLIGHT]...[/HIGHLIGHT]
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[/?code\]}i, "\n```\n")
2016-12-04 21:16:59 -05:00
raw.gsub!(%r{\[/?highlight\]}i, "\n```\n")
# [SAMP]...[/SAMP]
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[/?samp\]}i, "`")
# replace all chevrons with HTML entities
# NOTE: must be done
# - AFTER all the "code" processing
# - BEFORE the "quote" processing
2016-12-03 10:31:10 -05:00
raw.gsub!(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
2016-12-04 21:16:59 -05:00
raw.gsub!("<", "&lt;")
raw.gsub!("\u2603", "<")
2014-08-18 07:04:08 -04:00
2016-12-03 10:31:10 -05:00
raw.gsub!(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" }
2016-12-04 21:16:59 -05:00
raw.gsub!(">", "&gt;")
raw.gsub!("\u2603", ">")
2014-08-18 07:04:08 -04:00
# [URL=...]...[/URL]
2016-07-10 05:19:24 -04:00
raw.gsub!(%r{\[url="?([^"]+?)"?\](.*?)\[/url\]}im) { "[#{$2.strip}](#{$1})" }
raw.gsub!(%r{\[url="?(.+?)"?\](.+)\[/url\]}im) { "[#{$2.strip}](#{$1})" }
2014-08-18 07:04:08 -04:00
# [URL]...[/URL]
# [MP3]...[/MP3]
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[/?url\]}i, "")
2016-12-04 21:16:59 -05:00
raw.gsub!(%r{\[/?mp3\]}i, "")
2014-08-18 07:04:08 -04:00
# [MENTION]<username>[/MENTION]
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[mention\](.+?)\[/mention\]}i) do
2018-06-12 14:41:21 -04:00
new_username = get_username_for_old_username($1)
"@#{new_username}"
end
2014-08-18 07:04:08 -04:00
# [FONT=blah] and [COLOR=blah]
raw.gsub! %r{\[FONT=.*?\](.*?)\[/FONT\]}im, '\1'
raw.gsub! %r{\[COLOR=.*?\](.*?)\[/COLOR\]}im, '\1'
raw.gsub! %r{\[COLOR=#.*?\](.*?)\[/COLOR\]}im, '\1'
raw.gsub! %r{\[SIZE=.*?\](.*?)\[/SIZE\]}im, '\1'
2018-06-12 14:41:21 -04:00
raw.gsub! %r{\[SUP\](.*?)\[/SUP\]}im, '\1'
raw.gsub! %r{\[h=.*?\](.*?)\[/h\]}im, '\1'
# [CENTER]...[/CENTER]
raw.gsub! %r{\[CENTER\](.*?)\[/CENTER\]}im, '\1'
# [INDENT]...[/INDENT]
raw.gsub! %r{\[INDENT\](.*?)\[/INDENT\]}im, '\1'
2018-06-12 14:41:21 -04:00
# Tables to MD
raw.gsub!(%r{\[TABLE.*?\](.*?)\[/TABLE\]}im) do |t|
rows =
$1.gsub!(%r{\s*\[TR\](.*?)\[/TR\]\s*}im) do |r|
cols = $1.gsub! %r{\s*\[TD.*?\](.*?)\[/TD\]\s*}im, '|\1'
"#{cols}|\n"
end
2018-06-12 14:41:21 -04:00
header, rest = rows.split "\n", 2
c = header.count "|"
sep = "|---" * (c - 1)
"#{header}\n#{sep}|\n#{rest}\n"
end
# [QUOTE]...[/QUOTE]
2016-07-10 05:19:24 -04:00
raw.gsub!(%r{\[quote\](.+?)\[/quote\]}im) do |quote|
quote.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n#{$1}\n" }
quote.gsub!(/\n(.+?)/) { "\n> #{$1}" }
end
# [QUOTE=<username>]...[/QUOTE]
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do
old_username, quote = $1, $2
2018-06-12 14:41:21 -04:00
new_username = get_username_for_old_username(old_username)
"\n[quote=\"#{new_username}\"]\n#{quote}\n[/quote]\n"
2014-08-18 07:04:08 -04:00
end
# [YOUTUBE]<id>[/YOUTUBE]
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[youtube\](.+?)\[/youtube\]}i) { "\n//youtu.be/#{$1}\n" }
# [VIDEO=youtube;<id>]...[/VIDEO]
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[video=youtube;([^\]]+)\].*?\[/video\]}i) { "\n//youtu.be/#{$1}\n" }
2018-06-12 14:41:21 -04:00
# Fix uppercase B U and I tags
raw.gsub!(%r{(\[/?[BUI]\])}i) { $1.downcase }
2016-07-10 05:19:24 -04:00
# More Additions ....
# [spoiler=Some hidden stuff]SPOILER HERE!![/spoiler]
raw.gsub!(%r{\[spoiler="?(.+?)"?\](.+?)\[/spoiler\]}im) do
"\n#{$1}\n[spoiler]#{$2}[/spoiler]\n"
end
2016-07-10 05:19:24 -04:00
# [IMG][IMG]http://i63.tinypic.com/akga3r.jpg[/IMG][/IMG]
raw.gsub!(%r{\[IMG\]\[IMG\](.+?)\[/IMG\]\[/IMG\]}i) { "[IMG]#{$1}[/IMG]" }
# convert list tags to ul and list=1 tags to ol
# (basically, we're only missing list=a here...)
# (https://meta.discourse.org/t/phpbb-3-importer-old/17397)
raw.gsub!(%r{\[list\](.*?)\[/list\]}im, '[ul]\1[/ul]')
raw.gsub!(%r{\[list=1\](.*?)\[/list\]}im, '[ol]\1[/ol]')
raw.gsub!(%r{\[list\](.*?)\[/list:u\]}im, '[ul]\1[/ul]')
raw.gsub!(%r{\[list=1\](.*?)\[/list:o\]}im, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
raw.gsub!(/\[\*\]\n/, "")
raw.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]')
raw.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]')
raw.gsub!(/\[\*=1\]/, "")
2016-07-10 05:19:24 -04:00
raw
end
def postprocess_post_raw(raw)
# [QUOTE=<username>;<post_id>]...[/QUOTE]
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[quote=([^;]+);(\d+)\](.+?)\[/quote\]}im) do
old_username, post_id, quote = $1, $2, $3
2018-06-12 14:41:21 -04:00
new_username = get_username_for_old_username(old_username)
# There is a bug here when the first post in a topic is quoted.
# The first post in a topic does not have an post_custom_field referring to the post number,
# but it refers to thread-XXX instead, so this lookup fails miserably then.
# Fixing this would imply rewriting that logic completely.
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
post_number = topic_lookup[:post_number]
topic_id = topic_lookup[:topic_id]
2018-06-12 14:41:21 -04:00
"\n[quote=\"#{new_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n"
else
2018-06-12 14:41:21 -04:00
"\n[quote=\"#{new_username}\"]\n#{quote}\n[/quote]\n"
end
end
# remove attachments
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[attach[^\]]*\]\d+\[/attach\]}i, "")
# [THREAD]<thread_id>[/THREAD]
# ==> http://my.discourse.org/t/slug/<topic_id>
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[thread\](\d+)\[/thread\]}i) do
thread_id = $1
if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}")
topic_lookup[:url]
else
$&
end
end
# [THREAD=<thread_id>]...[/THREAD]
# ==> [...](http://my.discourse.org/t/slug/<topic_id>)
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[thread=(\d+)\](.+?)\[/thread\]}i) do
thread_id, link = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}")
url = topic_lookup[:url]
"[#{link}](#{url})"
else
$&
end
end
# [POST]<post_id>[/POST]
# ==> http://my.discourse.org/t/slug/<topic_id>/<post_number>
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[post\](\d+)\[/post\]}i) do
post_id = $1
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
topic_lookup[:url]
else
$&
end
end
# [POST=<post_id>]...[/POST]
# ==> [...](http://my.discourse.org/t/<topic_slug>/<topic_id>/<post_number>)
2016-12-03 10:31:10 -05:00
raw.gsub!(%r{\[post=(\d+)\](.+?)\[/post\]}i) do
post_id, link = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
url = topic_lookup[:url]
"[#{link}](#{url})"
else
$&
end
end
raw
end
def create_permalink_file
puts "", "Creating Permalink File...", ""
2016-07-10 05:19:24 -04:00
id_mapping = []
Topic.listable_topics.find_each do |topic|
pcf = topic.first_post.custom_fields
if pcf && pcf["import_id"]
id = pcf["import_id"].split("-").last
id_mapping.push("XXX#{id} YYY#{topic.id}")
end
end
# Category.find_each do |cat|
# ccf = cat.custom_fields
# if ccf && ccf["import_id"]
# id = ccf["import_id"].to_i
# id_mapping.push("/forumdisplay.php?#{id} http://forum.quartertothree.com#{cat.url}")
# end
# end
CSV.open(File.expand_path("../vb_map.csv", __FILE__), "w") do |csv|
id_mapping.each { |value| csv << [value] }
end
end
def suspend_users
puts "", "updating banned users"
banned = 0
failed = 0
total = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}userban").first["count"]
system_user = Discourse.system_user
mysql_query("SELECT userid, bandate FROM #{TABLE_PREFIX}userban").each do |b|
user = User.find_by_id(user_id_from_imported_user_id(b["userid"]))
2016-07-10 05:19:24 -04:00
if user
user.suspended_at = parse_timestamp(user["bandate"])
user.suspended_till = 200.years.from_now
if user.save
StaffActionLogger.new(system_user).log_user_suspend(user, "banned during initial import")
banned += 1
else
puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}"
failed += 1
end
else
puts "Not found: #{b["userid"]}"
failed += 1
end
print_status banned + failed, total
end
end
def parse_timestamp(timestamp)
Time.zone.at(@tz.utc_to_local(timestamp))
end
def mysql_query(sql)
2015-10-21 13:07:31 -04:00
@client.query(sql, cache_rows: true)
end
2014-08-18 07:04:08 -04:00
end
ImportScripts::VBulletin.new.perform