2018-01-30 11:44:02 -05:00
|
|
|
# coding: utf-8
|
2019-05-02 18:17:27 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2018-01-30 11:44:02 -05:00
|
|
|
require "mysql2"
|
|
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
require "htmlentities"
|
|
|
|
begin
|
|
|
|
require "reverse_markdown" # https://github.com/jqr/php-serialize
|
|
|
|
rescue LoadError
|
|
|
|
puts
|
|
|
|
puts "reverse_markdown not found."
|
|
|
|
puts "Add to Gemfile, like this: "
|
|
|
|
puts
|
|
|
|
puts "echo gem \\'reverse_markdown\\' >> Gemfile"
|
|
|
|
puts "bundle install"
|
|
|
|
exit
|
|
|
|
end
|
|
|
|
|
|
|
|
# Before running this script, paste these lines into your shell,
|
|
|
|
# then use arrow keys to edit the values
|
|
|
|
=begin
|
|
|
|
export DB_HOST="localhost"
|
|
|
|
export DB_NAME="ipboard"
|
|
|
|
export DB_PW="ipboard"
|
|
|
|
export DB_USER="ipboard"
|
|
|
|
export TABLE_PREFIX="ipb_"
|
|
|
|
export IMPORT_AFTER="1970-01-01"
|
|
|
|
export UPLOADS="http://example.com/uploads"
|
|
|
|
export URL="http://example.com/"
|
|
|
|
export AVATARS_DIR="/imports/avatars/"
|
|
|
|
export USERDIR="user"
|
|
|
|
=end
|
|
|
|
|
|
|
|
class ImportScripts::IpboardSQL < ImportScripts::Base
|
|
|
|
DB_HOST ||= ENV["DB_HOST"] || "localhost"
|
|
|
|
DB_NAME ||= ENV["DB_NAME"] || "ipboard"
|
|
|
|
DB_PW ||= ENV["DB_PW"] || "ipboard"
|
|
|
|
DB_USER ||= ENV["DB_USER"] || "ipboard"
|
|
|
|
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "ipb_"
|
|
|
|
IMPORT_AFTER ||= ENV["IMPORT_AFTER"] || "1970-01-01"
|
|
|
|
UPLOADS ||= ENV["UPLOADS"] || "http://UPLOADS+LOCATION+IS+NOT+SET/uploads"
|
|
|
|
USERDIR ||= ENV["USERDIR"] || "user"
|
|
|
|
URL ||= ENV["URL"] || "https://forum.example.com"
|
|
|
|
AVATARS_DIR ||= ENV["AVATARS_DIR"] || "/home/pfaffman/data/example.com/avatars/"
|
|
|
|
BATCH_SIZE = 1000
|
|
|
|
ID_FIRST = true
|
|
|
|
QUIET = true
|
|
|
|
DEBUG = false
|
|
|
|
GALLERY_CAT_ID = 1_234_567
|
|
|
|
GALLERY_CAT_NAME = "galeria"
|
|
|
|
EMO_DIR ||= ENV["EMO_DIR"] || "default"
|
|
|
|
OLD_FORMAT = false
|
|
|
|
if OLD_FORMAT
|
|
|
|
MEMBERS_TABLE = "#{TABLE_PREFIX}core_members"
|
|
|
|
FORUMS_TABLE = "#{TABLE_PREFIX}forums_forums"
|
|
|
|
POSTS_TABLE = "#{TABLE_PREFIX}forums_posts"
|
|
|
|
TOPICS_TABLE = "#{TABLE_PREFIX}forums_topics"
|
|
|
|
else
|
|
|
|
MEMBERS_TABLE = "#{TABLE_PREFIX}members"
|
|
|
|
FORUMS_TABLE = "#{TABLE_PREFIX}forums"
|
|
|
|
POSTS_TABLE = "#{TABLE_PREFIX}posts"
|
|
|
|
TOPICS_TABLE = "#{TABLE_PREFIX}topics"
|
|
|
|
GROUPS_TABLE = "#{TABLE_PREFIX}groups"
|
|
|
|
PROFILE_TABLE = "#{TABLE_PREFIX}profile_portal"
|
|
|
|
ATTACHMENT_TABLE = "#{TABLE_PREFIX}attachments"
|
|
|
|
end
|
|
|
|
|
|
|
|
# TODO: replace ipb_ with TABLE_PREFIX
|
|
|
|
|
|
|
|
#################
|
|
|
|
# Site settings #
|
|
|
|
#################
|
|
|
|
# don't send any emails
|
2018-06-07 00:14:35 -04:00
|
|
|
SiteSetting.disable_emails = "non-staff"
|
2018-01-30 11:44:02 -05:00
|
|
|
# don't send digests (so you can enable email without users noticing)
|
|
|
|
SiteSetting.disable_digest_emails = true
|
|
|
|
# keep site and users private
|
|
|
|
SiteSetting.login_required = true
|
|
|
|
SiteSetting.hide_user_profiles_from_public = true
|
|
|
|
# if site is made available, don't let it get indexed
|
|
|
|
SiteSetting.allow_index_in_robots_txt = false
|
|
|
|
# don't notify users when images in their posts get downloaded
|
2019-07-31 10:50:41 -04:00
|
|
|
SiteSetting.disable_system_edit_notifications = true
|
2018-01-30 11:44:02 -05:00
|
|
|
# SiteSetting.force_hostname='forum.dev1dev.com'
|
|
|
|
SiteSetting.title = "IPB Import"
|
|
|
|
|
|
|
|
if ID_FIRST
|
|
|
|
# TODO figure this out
|
|
|
|
puts "WARNING: permalink_normalizations not set!!!"
|
|
|
|
sleep 1
|
|
|
|
#raw = "[ORIGINAL POST](#{URL}/topic/#{id}-#{slug})\n\n" + raw
|
|
|
|
#SiteSetting.permalink_normalizations='/topic/(.*t)\?.*/\1'
|
|
|
|
else
|
|
|
|
# remove stuff after a "?" and work for urls that end in .html
|
|
|
|
SiteSetting.permalink_normalizations = '/(.*t)[?.].*/\1'
|
|
|
|
#raw = "[ORIGINAL POST](#{URL}/#{slug}-#{id}t)\n\n" + raw
|
|
|
|
end
|
|
|
|
|
|
|
|
def initialize
|
|
|
|
print_warning("Importing data after #{IMPORT_AFTER}") if IMPORT_AFTER > "1970-01-01"
|
|
|
|
|
|
|
|
super
|
|
|
|
@htmlentities = HTMLEntities.new
|
|
|
|
begin
|
|
|
|
@client =
|
|
|
|
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
|
|
|
|
rescue Exception => e
|
|
|
|
puts "=" * 50
|
|
|
|
puts e.message
|
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-02-28 14:50:55 -05:00
|
|
|
puts <<~TEXT
|
|
|
|
Cannot log in to database.
|
2018-01-30 11:44:02 -05:00
|
|
|
|
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-02-28 14:50:55 -05:00
|
|
|
Hostname: #{DB_HOST}
|
|
|
|
Username: #{DB_USER}
|
|
|
|
Password: #{DB_PW}
|
|
|
|
database: #{DB_NAME}
|
2018-01-30 11:44:02 -05:00
|
|
|
|
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-02-28 14:50:55 -05:00
|
|
|
You should set these variables:
|
2018-01-30 11:44:02 -05:00
|
|
|
|
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-02-28 14:50:55 -05:00
|
|
|
export DB_HOST="localhost"
|
|
|
|
export DB_NAME="ipboard"
|
|
|
|
export DB_PW="ipboard"
|
|
|
|
export DB_USER="ipboard"
|
|
|
|
export TABLE_PREFIX="ipb_"
|
|
|
|
export IMPORT_AFTER="1970-01-01"
|
|
|
|
export URL="http://example.com"
|
|
|
|
export UPLOADS=
|
|
|
|
export USERDIR="user"
|
2018-01-30 11:44:02 -05:00
|
|
|
|
DEV: Correctly tag heredocs (#16061)
This allows text editors to use correct syntax coloring for the heredoc sections.
Heredoc tag names we use:
languages: SQL, JS, RUBY, LUA, HTML, CSS, SCSS, SH, HBS, XML, YAML/YML, MF, ICS
other: MD, TEXT/TXT, RAW, EMAIL
2022-02-28 14:50:55 -05:00
|
|
|
Exiting.
|
|
|
|
TEXT
|
2018-01-30 11:44:02 -05:00
|
|
|
exit
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def execute
|
|
|
|
import_users
|
|
|
|
import_categories
|
|
|
|
import_topics
|
|
|
|
import_posts
|
|
|
|
import_private_messages
|
|
|
|
|
|
|
|
# not supported import_image_categories
|
|
|
|
# NOT SUPPORTED import_gallery_topics
|
|
|
|
update_tl0
|
|
|
|
create_permalinks
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_users
|
|
|
|
puts "", "creating users"
|
|
|
|
|
|
|
|
total_count =
|
|
|
|
mysql_query(
|
|
|
|
"SELECT count(*) count FROM #{MEMBERS_TABLE}
|
|
|
|
WHERE last_activity > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'));",
|
|
|
|
).first[
|
|
|
|
"count"
|
|
|
|
]
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
#notes: no location, url,
|
|
|
|
results =
|
|
|
|
mysql_query(
|
|
|
|
"
|
|
|
|
SELECT member_id id,
|
|
|
|
name username,
|
|
|
|
member_group_id usergroup,
|
|
|
|
email,
|
|
|
|
pp_thumb_photo avatar_url,
|
|
|
|
# pp_main_photo avatar_url,
|
|
|
|
# avatar_location avatar_url,
|
|
|
|
# TODO consider joining ibf_profile_portal.avatar_location and avatar_type
|
|
|
|
FROM_UNIXTIME(joined) created_at,
|
|
|
|
FROM_UNIXTIME(last_activity) last_seen_at,
|
|
|
|
ip_address registration_ip_address,
|
|
|
|
member_banned banned,
|
|
|
|
bday_year, bday_month, bday_day,
|
|
|
|
g_title member_type,
|
|
|
|
last_visit last_seen_at
|
|
|
|
FROM #{MEMBERS_TABLE}, #{PROFILE_TABLE}, #{GROUPS_TABLE}
|
|
|
|
WHERE last_activity > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
|
|
AND member_id=pp_member_id
|
|
|
|
AND member_group_id = g_id
|
|
|
|
order by member_id ASC
|
|
|
|
LIMIT #{BATCH_SIZE}
|
|
|
|
OFFSET #{offset};",
|
|
|
|
)
|
|
|
|
|
|
|
|
break if results.size < 1
|
|
|
|
|
|
|
|
next if all_records_exist? :users, results.map { |u| u["id"].to_i }
|
|
|
|
|
|
|
|
create_users(results, total: total_count, offset: offset) do |user|
|
|
|
|
next if user["email"].blank?
|
|
|
|
next if user["username"].blank?
|
|
|
|
next if @lookup.user_id_from_imported_user_id(user["id"])
|
2023-01-07 06:53:14 -05:00
|
|
|
|
2018-01-30 11:44:02 -05:00
|
|
|
birthday =
|
2023-01-07 06:53:14 -05:00
|
|
|
begin
|
2018-01-30 11:44:02 -05:00
|
|
|
Date.parse("#{user["bday_year"]}-#{user["bday_month"]}-#{user["bday_day"]}")
|
|
|
|
rescue StandardError
|
|
|
|
nil
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
# TODO: what about timezones?
|
|
|
|
next if user["id"] == 0
|
|
|
|
{
|
|
|
|
id: user["id"],
|
|
|
|
email: user["email"],
|
|
|
|
username: user["username"],
|
|
|
|
avatar_url: user["avatar_url"],
|
|
|
|
title: user["member_type"],
|
|
|
|
created_at: user["created_at"] == nil ? 0 : Time.zone.at(user["created_at"]),
|
|
|
|
# bio_raw: user['bio_raw'],
|
|
|
|
registration_ip_address: user["registration_ip_address"],
|
|
|
|
# birthday: birthday,
|
|
|
|
last_seen_at: user["last_seen_at"] == nil ? 0 : Time.zone.at(user["last_seen_at"]),
|
|
|
|
admin: /^Admin/.match(user["member_type"]) ? true : false,
|
|
|
|
moderator: /^MOD/.match(user["member_type"]) ? true : false,
|
|
|
|
post_create_action:
|
|
|
|
proc do |newuser|
|
|
|
|
if user["avatar_url"] && user["avatar_url"].length > 0
|
|
|
|
photo_path = AVATARS_DIR + user["avatar_url"]
|
2022-01-05 12:45:08 -05:00
|
|
|
if File.exist?(photo_path)
|
2018-01-30 11:44:02 -05:00
|
|
|
begin
|
|
|
|
upload = create_upload(newuser.id, photo_path, File.basename(photo_path))
|
|
|
|
if upload && upload.persisted?
|
|
|
|
newuser.import_mode = false
|
|
|
|
newuser.create_user_avatar
|
|
|
|
newuser.import_mode = true
|
|
|
|
newuser.user_avatar.update(custom_upload_id: upload.id)
|
|
|
|
newuser.update(uploaded_avatar_id: upload.id)
|
|
|
|
else
|
|
|
|
puts "Error: Upload did not persist for #{photo_path}!"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
rescue SystemCallError => err
|
|
|
|
puts "Could not import avatar #{photo_path}: #{err.message}"
|
|
|
|
end
|
|
|
|
else
|
|
|
|
puts "avatar file not found at #{photo_path}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
suspend_user(newuser) if user["banned"] != 0
|
|
|
|
end,
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def suspend_user(user)
|
|
|
|
user.suspended_at = Time.now
|
|
|
|
user.suspended_till = 200.years.from_now
|
|
|
|
ban_reason = "Account deactivated by administrator"
|
|
|
|
|
|
|
|
user_option = user.user_option
|
|
|
|
user_option.email_digests = false
|
2019-03-15 10:55:11 -04:00
|
|
|
user_option.email_level = UserOption.email_level_types[:never]
|
|
|
|
user_option.email_messages_level = UserOption.email_level_types[:never]
|
2018-01-30 11:44:02 -05:00
|
|
|
user_option.save!
|
|
|
|
|
|
|
|
if user.save
|
|
|
|
StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason)
|
|
|
|
else
|
|
|
|
puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def file_full_path(relpath)
|
|
|
|
File.join JSON_FILES_DIR, relpath.split("?").first
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_image_categories
|
|
|
|
puts "", "importing image categories..."
|
|
|
|
|
|
|
|
categories =
|
|
|
|
mysql_query(
|
|
|
|
"
|
|
|
|
SELECT category_id id,
|
|
|
|
category_name_seo name,
|
|
|
|
category_parent_id as parent_id
|
|
|
|
FROM #{TABLE_PREFIX}gallery_categories
|
|
|
|
ORDER BY id ASC
|
|
|
|
",
|
|
|
|
).to_a
|
|
|
|
|
|
|
|
category_names =
|
|
|
|
mysql_query(
|
|
|
|
"
|
|
|
|
SELECT DISTINCT word_key, word_default title
|
|
|
|
FROM #{TABLE_PREFIX}core_sys_lang_words where word_app='gallery'
|
|
|
|
AND word_key REGEXP 'gallery_category_[0-9]+$'
|
|
|
|
ORDER BY word_key ASC
|
|
|
|
",
|
|
|
|
).to_a
|
|
|
|
|
|
|
|
cat_map = {}
|
|
|
|
puts "Creating gallery_cat_map"
|
|
|
|
category_names.each do |name|
|
|
|
|
title = name["title"]
|
|
|
|
word_key = name["word_key"]
|
|
|
|
puts "Processing #{word_key}: #{title}"
|
|
|
|
id = word_key.gsub("gallery_category_", "")
|
|
|
|
next if cat_map[id]
|
|
|
|
cat_map[id] = cat_map.has_value?(title) ? title + " " + id : title
|
|
|
|
puts "#{id} => #{cat_map[id]}"
|
|
|
|
end
|
|
|
|
|
|
|
|
params = { id: GALLERY_CAT_ID, name: GALLERY_CAT_NAME }
|
|
|
|
create_category(params, params[:id])
|
|
|
|
|
|
|
|
create_categories(categories) do |category|
|
|
|
|
id = (category["id"]).to_s
|
|
|
|
name = CGI.unescapeHTML(cat_map[id])
|
|
|
|
{
|
|
|
|
id: id + "gal",
|
|
|
|
name: name,
|
|
|
|
parent_category_id: @lookup.category_id_from_imported_category_id(GALLERY_CAT_ID),
|
|
|
|
color: random_category_color,
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_categories
|
|
|
|
puts "", "importing categories..."
|
|
|
|
|
|
|
|
categories =
|
|
|
|
mysql_query(
|
|
|
|
"
|
|
|
|
SELECT id,
|
|
|
|
name name,
|
|
|
|
parent_id as parent_id
|
|
|
|
FROM #{FORUMS_TABLE}
|
|
|
|
ORDER BY parent_id ASC
|
|
|
|
",
|
|
|
|
).to_a
|
|
|
|
|
|
|
|
top_level_categories = categories.select { |c| c["parent.id"] == -1 }
|
|
|
|
|
|
|
|
create_categories(top_level_categories) do |category|
|
|
|
|
id = category["id"].to_s
|
|
|
|
name = category["name"]
|
|
|
|
{ id: id, name: name }
|
|
|
|
end
|
|
|
|
|
|
|
|
children_categories = categories.select { |c| c["parent.id"] != -1 }
|
|
|
|
create_categories(children_categories) do |category|
|
|
|
|
id = category["id"].to_s
|
|
|
|
name = category["name"]
|
|
|
|
{
|
|
|
|
id: id,
|
|
|
|
name: name,
|
|
|
|
parent_category_id: @lookup.category_id_from_imported_category_id(category["parent_id"]),
|
|
|
|
color: random_category_color,
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_topics
|
|
|
|
puts "", "importing topics..."
|
|
|
|
|
|
|
|
total_count =
|
|
|
|
mysql_query(
|
|
|
|
"SELECT count(*) count FROM #{POSTS_TABLE}
|
|
|
|
WHERE post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
|
|
AND new_topic=1;",
|
|
|
|
).first[
|
|
|
|
"count"
|
|
|
|
]
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
discussions = mysql_query(<<-SQL)
|
|
|
|
SELECT #{TOPICS_TABLE}.tid tid,
|
|
|
|
#{TOPICS_TABLE}.forum_id category,
|
|
|
|
#{POSTS_TABLE}.pid pid,
|
|
|
|
#{TOPICS_TABLE}.title title,
|
|
|
|
#{TOPICS_TABLE}.pinned pinned,
|
|
|
|
#{POSTS_TABLE}.post raw,
|
|
|
|
#{TOPICS_TABLE}.title_seo as slug,
|
|
|
|
FROM_UNIXTIME(#{POSTS_TABLE}.post_date) created_at,
|
|
|
|
#{POSTS_TABLE}.author_id user_id
|
|
|
|
FROM #{POSTS_TABLE}, #{TOPICS_TABLE}
|
|
|
|
WHERE #{POSTS_TABLE}.topic_id = #{TOPICS_TABLE}.tid
|
|
|
|
AND #{POSTS_TABLE}.new_topic = 1
|
|
|
|
AND #{POSTS_TABLE}.post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
|
|
ORDER BY #{POSTS_TABLE}.post_date ASC
|
|
|
|
LIMIT #{BATCH_SIZE}
|
|
|
|
OFFSET #{offset}
|
|
|
|
SQL
|
|
|
|
|
|
|
|
break if discussions.size < 1
|
|
|
|
next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t["tid"].to_s }
|
|
|
|
|
|
|
|
create_posts(discussions, total: total_count, offset: offset) do |discussion|
|
|
|
|
slug = discussion["slug"]
|
|
|
|
id = discussion["tid"]
|
|
|
|
raw = clean_up(discussion["raw"])
|
|
|
|
{
|
|
|
|
id: "discussion#" + discussion["tid"].to_s,
|
|
|
|
user_id:
|
|
|
|
user_id_from_imported_user_id(discussion["user_id"]) || Discourse::SYSTEM_USER_ID,
|
|
|
|
title: CGI.unescapeHTML(discussion["title"]),
|
|
|
|
category: category_id_from_imported_category_id(discussion["category"].to_s),
|
|
|
|
raw: raw,
|
|
|
|
pinned_at: discussion["pinned"].to_i == 1 ? Time.zone.at(discussion["created_at"]) : nil,
|
|
|
|
created_at: Time.zone.at(discussion["created_at"]),
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def array_from_members_string(invited_members = "a:3:{i:0;i:22629;i:1;i:21837;i:2;i:22234;}")
|
|
|
|
out = []
|
|
|
|
count_regex = /a:(\d)+:/
|
|
|
|
count = count_regex.match(invited_members)[1]
|
|
|
|
rest = invited_members.sub(count_regex, "")
|
|
|
|
i_regex = /i:\d+;i:(\d+);/
|
|
|
|
while m = i_regex.match(rest)
|
|
|
|
i = m[1]
|
|
|
|
rest.sub!(i_regex, "")
|
|
|
|
puts "i: #{i}, #{rest}"
|
|
|
|
out += [i.to_i]
|
|
|
|
end
|
|
|
|
out
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_private_messages
|
|
|
|
puts "", "importing private messages..."
|
|
|
|
|
|
|
|
topic_count =
|
|
|
|
mysql_query("SELECT COUNT(msg_id) count FROM #{TABLE_PREFIX}message_posts").first["count"]
|
|
|
|
|
|
|
|
last_private_message_topic_id = -1
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
private_messages = mysql_query(<<-SQL)
|
|
|
|
SELECT msg_id pmtextid,
|
|
|
|
msg_topic_id topic_id,
|
|
|
|
msg_author_id fromuserid,
|
|
|
|
mt_title title,
|
|
|
|
msg_post message,
|
|
|
|
mt_invited_members touserarray,
|
|
|
|
mt_to_member_id to_user_id,
|
|
|
|
msg_is_first_post first_post,
|
|
|
|
msg_date dateline
|
|
|
|
FROM #{TABLE_PREFIX}message_topics, #{TABLE_PREFIX}message_posts
|
|
|
|
WHERE msg_topic_id = mt_id
|
|
|
|
AND msg_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
|
|
ORDER BY msg_topic_id, msg_id
|
|
|
|
LIMIT #{BATCH_SIZE}
|
|
|
|
OFFSET #{offset}
|
|
|
|
SQL
|
|
|
|
|
|
|
|
puts "Processing #{private_messages.count} messages"
|
|
|
|
break if private_messages.count < 1
|
|
|
|
puts "Processing . . . "
|
|
|
|
private_messages =
|
|
|
|
private_messages.reject { |pm| @lookup.post_already_imported?("pm-#{pm["pmtextid"]}") }
|
|
|
|
|
|
|
|
title_username_of_pm_first_post = {}
|
|
|
|
|
|
|
|
create_posts(private_messages, total: topic_count, offset: offset) do |m|
|
|
|
|
skip = false
|
|
|
|
mapped = {}
|
|
|
|
|
|
|
|
mapped[:id] = "pm-#{m["pmtextid"]}"
|
|
|
|
mapped[:user_id] = user_id_from_imported_user_id(m["fromuserid"]) ||
|
|
|
|
Discourse::SYSTEM_USER_ID
|
|
|
|
mapped[:raw] = begin
|
|
|
|
clean_up(m["message"])
|
|
|
|
rescue StandardError
|
|
|
|
nil
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
mapped[:created_at] = Time.zone.at(m["dateline"])
|
|
|
|
title = @htmlentities.decode(m["title"]).strip[0...255]
|
|
|
|
topic_id = nil
|
|
|
|
|
|
|
|
next if mapped[:raw].blank?
|
|
|
|
|
|
|
|
# users who are part of this private message.
|
|
|
|
target_usernames = []
|
|
|
|
target_userids = []
|
|
|
|
begin
|
|
|
|
to_user_array = [m["to_user_id"]] + array_from_members_string(m["touserarray"])
|
|
|
|
rescue StandardError
|
|
|
|
puts "#{m["pmtextid"]} -- #{m["touserarray"]}"
|
|
|
|
skip = true
|
|
|
|
end
|
|
|
|
|
|
|
|
begin
|
|
|
|
to_user_array.each do |to_user|
|
|
|
|
user_id = user_id_from_imported_user_id(to_user)
|
|
|
|
username = User.find_by(id: user_id).try(:username)
|
|
|
|
target_userids << user_id || Discourse::SYSTEM_USER_ID
|
|
|
|
target_usernames << username if username
|
|
|
|
if user_id
|
|
|
|
puts "Found user: #{to_user} -- #{user_id} -- #{username}"
|
|
|
|
else
|
|
|
|
puts "Can't find user: #{to_user}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
rescue StandardError
|
|
|
|
puts "skipping pm-#{m["pmtextid"]} `to_user_array` is broken -- #{to_user_array.inspect}"
|
|
|
|
skip = true
|
|
|
|
end
|
|
|
|
|
|
|
|
participants = target_userids
|
|
|
|
participants << mapped[:user_id]
|
|
|
|
begin
|
|
|
|
participants.sort!
|
|
|
|
rescue StandardError
|
|
|
|
puts "one of the participant's id is nil -- #{participants.inspect}"
|
|
|
|
end
|
|
|
|
|
|
|
|
if last_private_message_topic_id != m["topic_id"]
|
|
|
|
last_private_message_topic_id = m["topic_id"]
|
|
|
|
unless QUIET
|
|
|
|
puts "New message: #{m["topic_id"]}: #{title} from #{m["fromuserid"]} (#{mapped[:user_id]})"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
# topic post message
|
|
|
|
topic_id = m["topic_id"]
|
|
|
|
mapped[:title] = title
|
|
|
|
mapped[:archetype] = Archetype.private_message
|
|
|
|
mapped[:target_usernames] = target_usernames.join(",")
|
|
|
|
if mapped[:target_usernames].size < 1 # pm with yourself?
|
|
|
|
# skip = true
|
|
|
|
mapped[:target_usernames] = "system"
|
|
|
|
puts "pm-#{m["pmtextid"]} has no target (#{m["touserarray"]})"
|
|
|
|
end
|
|
|
|
else # reply
|
|
|
|
topic_id = topic_lookup_from_imported_post_id("pm-#{topic_id}")
|
|
|
|
skip = true if !topic_id
|
|
|
|
mapped[:topic_id] = topic_id
|
|
|
|
unless QUIET
|
|
|
|
puts "Reply message #{topic_id}: #{m["topic_id"]}: from #{m["fromuserid"]} (#{mapped[:user_id]})"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
end
|
|
|
|
# puts "#{target_usernames} -- #{mapped[:target_usernames]}"
|
|
|
|
# puts "Adding #{mapped}"
|
|
|
|
skip ? nil : mapped
|
|
|
|
# puts "#{'-'*50}> added"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_gallery_topics
|
|
|
|
# pfaffman: I'm not clear whether this is an IPBoard thing or from some other system
|
|
|
|
puts "", "importing gallery albums..."
|
|
|
|
|
|
|
|
gallery_count = 0
|
|
|
|
total_count =
|
|
|
|
mysql_query(
|
|
|
|
"SELECT count(*) count FROM #{TABLE_PREFIX}gallery_images
|
|
|
|
;",
|
|
|
|
).first[
|
|
|
|
"count"
|
|
|
|
]
|
|
|
|
|
|
|
|
# NOTE: for imports with huge numbers of galleries, this needs to use limits
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
# galleries = mysql_query(<<-SQL
|
|
|
|
|
|
|
|
# SELECT #{TABLE_PREFIX}gallery_albums.album_id tid,
|
|
|
|
# #{TABLE_PREFIX}gallery_albums.album_category_id category,
|
|
|
|
# #{TABLE_PREFIX}gallery_albums.album_owner_id user_id,
|
|
|
|
# #{TABLE_PREFIX}gallery_albums.album_name title,
|
|
|
|
# #{TABLE_PREFIX}gallery_albums.album_description raw,
|
|
|
|
# #{TABLE_PREFIX}gallery_albums.album_type,
|
|
|
|
# FROM_UNIXTIME(#{TABLE_PREFIX}gallery_albums.album_last_img_date) created_at
|
|
|
|
# FROM #{TABLE_PREFIX}gallery_albums
|
|
|
|
# ORDER BY #{TABLE_PREFIX}gallery_albums.album_id ASC
|
|
|
|
|
|
|
|
# SQL
|
|
|
|
# )
|
|
|
|
|
|
|
|
images = mysql_query(<<-SQL)
|
|
|
|
|
|
|
|
SELECT #{TABLE_PREFIX}gallery_albums.album_id tid,
|
|
|
|
#{TABLE_PREFIX}gallery_albums.album_category_id category,
|
|
|
|
#{TABLE_PREFIX}gallery_albums.album_owner_id user_id,
|
|
|
|
#{TABLE_PREFIX}gallery_albums.album_name title,
|
|
|
|
#{TABLE_PREFIX}gallery_albums.album_description raw,
|
|
|
|
#{TABLE_PREFIX}gallery_albums.album_type,
|
|
|
|
#{TABLE_PREFIX}gallery_images.image_caption caption,
|
|
|
|
#{TABLE_PREFIX}gallery_images.image_description description,
|
|
|
|
#{TABLE_PREFIX}gallery_images.image_masked_file_name masked,
|
|
|
|
#{TABLE_PREFIX}gallery_images.image_id image_id,
|
|
|
|
#{TABLE_PREFIX}gallery_images.image_medium_file_name medium,
|
|
|
|
#{TABLE_PREFIX}gallery_images.image_original_file_name orig,
|
|
|
|
FROM_UNIXTIME(#{TABLE_PREFIX}gallery_albums.album_last_img_date) created_at,
|
|
|
|
#{TABLE_PREFIX}gallery_images.image_file_name filename
|
|
|
|
FROM #{TABLE_PREFIX}gallery_albums, #{TABLE_PREFIX}gallery_images
|
|
|
|
WHERE #{TABLE_PREFIX}gallery_images.image_album_id=#{TABLE_PREFIX}gallery_albums.album_id
|
|
|
|
ORDER BY #{TABLE_PREFIX}gallery_albums.album_id, image_date DESC
|
|
|
|
LIMIT #{BATCH_SIZE}
|
|
|
|
OFFSET #{offset};
|
|
|
|
|
|
|
|
SQL
|
|
|
|
|
|
|
|
break if images.size < 1
|
|
|
|
if all_records_exist? :posts,
|
|
|
|
images.map { |t| "gallery#" + t["tid"].to_s + t["image_id"].to_s }
|
|
|
|
next
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
|
|
|
|
last_id = images.first["tid"]
|
|
|
|
raw = "Gallery ID: #{last_id}\n" + clean_up(images.first["raw"])
|
|
|
|
raw += "#{clean_up(images.first["description"])}\n"
|
|
|
|
last_gallery = images.first.dup
|
|
|
|
create_posts(images, total: total_count, offset: offset) do |gallery|
|
|
|
|
id = gallery["tid"].to_i
|
|
|
|
#puts "ID: #{id}, last_id: #{last_id}, image: #{gallery['image_id']}"
|
|
|
|
if id == last_id
|
|
|
|
raw += "### #{gallery["caption"]}\n"
|
|
|
|
raw += "#{UPLOADS}/#{gallery["orig"]}\n"
|
|
|
|
last_gallery = gallery.dup
|
|
|
|
next
|
|
|
|
else
|
|
|
|
insert_raw = raw.dup
|
|
|
|
last_id = gallery["tid"]
|
|
|
|
if DEBUG
|
|
|
|
raw = "Gallery ID: #{last_id}\n" + clean_up(gallery["raw"])
|
|
|
|
raw +=
|
2023-12-06 17:25:00 -05:00
|
|
|
"Cat: #{last_gallery["category"]} - #{category_id_from_imported_category_id(last_gallery["category"].to_s + "gal")}"
|
2018-01-30 11:44:02 -05:00
|
|
|
end
|
|
|
|
raw += "#{clean_up(images.first["description"])}\n"
|
|
|
|
raw += "### #{gallery["caption"]}\n"
|
|
|
|
raw += "User #{gallery["user_id"]}, image_id: #{gallery["image_id"]}\n" if DEBUG
|
|
|
|
raw += "#{UPLOADS}/#{gallery["orig"]}\n"
|
|
|
|
gallery_count += 1
|
|
|
|
unless QUIET
|
2023-12-06 17:25:00 -05:00
|
|
|
puts "#{gallery_count}--Cat: #{last_gallery["category"]} ==> #{category_id_from_imported_category_id(last_gallery["category"].to_s + "gal")}"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
{
|
|
|
|
id: "gallery#" + last_gallery["tid"].to_s + last_gallery["image_id"].to_s,
|
|
|
|
user_id:
|
|
|
|
user_id_from_imported_user_id(last_gallery["user_id"]) || Discourse::SYSTEM_USER_ID,
|
|
|
|
title: CGI.unescapeHTML(last_gallery["title"]),
|
|
|
|
category: category_id_from_imported_category_id(last_gallery["category"].to_s + "gal"),
|
|
|
|
raw: insert_raw,
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# TODO: use this to figure out to pin posts
|
|
|
|
def map_first_post(row, mapped)
|
|
|
|
mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id])
|
|
|
|
mapped[:title] = CGI.unescapeHTML(row[:topic_title]).strip[0...255]
|
|
|
|
mapped[:pinned_at] = mapped[:created_at] unless row[:topic_type] == Constants::POST_NORMAL
|
|
|
|
mapped[:pinned_globally] = row[:topic_type] == Constants::POST_GLOBAL
|
|
|
|
mapped[:post_create_action] = proc do |post|
|
|
|
|
@permalink_importer.create_for_topic(post.topic, row[:topic_id])
|
|
|
|
end
|
|
|
|
|
|
|
|
mapped
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_comments
|
|
|
|
puts "", "importing gallery comments..."
|
|
|
|
|
|
|
|
total_count =
|
|
|
|
mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}gallery_comments;").first["count"]
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
comments = mysql_query(<<-SQL)
|
|
|
|
|
|
|
|
SELECT #{TABLE_PREFIX}gallery_comments.tid tid,
|
|
|
|
#{TABLE_PREFIX}gallery_topics.forum_id category,
|
|
|
|
#{TABLE_PREFIX}gallery_posts.pid pid,
|
|
|
|
#{TABLE_PREFIX}gallery_topics.title title,
|
|
|
|
#{TABLE_PREFIX}gallery_posts.post raw,
|
|
|
|
FROM_UNIXTIME(#{TABLE_PREFIX}gallery_posts.post_date) created_at,
|
|
|
|
#{TABLE_PREFIX}gallery_posts.author_id user_id
|
|
|
|
FROM #{TABLE_PREFIX}gallery_posts, #{TABLE_PREFIX}gallery_topics
|
|
|
|
WHERE #{TABLE_PREFIX}gallery_posts.topic_id = #{TABLE_PREFIX}gallery_topics.tid
|
|
|
|
AND #{TABLE_PREFIX}gallery_posts.new_topic = 0
|
|
|
|
AND #{TABLE_PREFIX}gallery_posts.post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
|
|
ORDER BY #{TABLE_PREFIX}gallery_posts.post_date ASC
|
|
|
|
LIMIT #{BATCH_SIZE}
|
|
|
|
OFFSET #{offset}
|
|
|
|
|
|
|
|
SQL
|
|
|
|
|
|
|
|
break if comments.size < 1
|
|
|
|
next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment["pid"].to_s }
|
|
|
|
|
|
|
|
create_posts(comments, total: total_count, offset: offset) do |comment|
|
|
|
|
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment["tid"].to_s)
|
|
|
|
next if comment["raw"].blank?
|
|
|
|
{
|
|
|
|
id: "comment#" + comment["pid"].to_s,
|
|
|
|
user_id: user_id_from_imported_user_id(comment["user_id"]) || Discourse::SYSTEM_USER_ID,
|
|
|
|
topic_id: t[:topic_id],
|
|
|
|
raw: clean_up(comment["raw"]),
|
|
|
|
created_at: Time.zone.at(comment["created_at"]),
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def import_posts
|
|
|
|
puts "", "importing posts..."
|
|
|
|
|
|
|
|
total_count =
|
|
|
|
mysql_query(
|
|
|
|
"SELECT count(*) count FROM #{POSTS_TABLE}
|
|
|
|
WHERE post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
|
|
AND new_topic=0;",
|
|
|
|
).first[
|
|
|
|
"count"
|
|
|
|
]
|
|
|
|
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
|
|
comments = mysql_query(<<-SQL)
|
|
|
|
SELECT #{TOPICS_TABLE}.tid tid,
|
|
|
|
#{TOPICS_TABLE}.forum_id category,
|
|
|
|
#{POSTS_TABLE}.pid pid,
|
|
|
|
#{TOPICS_TABLE}.title title,
|
|
|
|
#{POSTS_TABLE}.post raw,
|
|
|
|
FROM_UNIXTIME(#{POSTS_TABLE}.post_date) created_at,
|
|
|
|
#{POSTS_TABLE}.author_id user_id
|
|
|
|
FROM #{POSTS_TABLE}, #{TOPICS_TABLE}
|
|
|
|
WHERE #{POSTS_TABLE}.topic_id = #{TOPICS_TABLE}.tid
|
|
|
|
AND #{POSTS_TABLE}.new_topic = 0
|
|
|
|
AND #{POSTS_TABLE}.post_date > UNIX_TIMESTAMP(STR_TO_DATE('#{IMPORT_AFTER}', '%Y-%m-%d'))
|
|
|
|
ORDER BY #{POSTS_TABLE}.post_date ASC
|
|
|
|
LIMIT #{BATCH_SIZE}
|
|
|
|
OFFSET #{offset}
|
|
|
|
SQL
|
|
|
|
|
|
|
|
break if comments.size < 1
|
|
|
|
next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment["pid"].to_s }
|
|
|
|
|
|
|
|
create_posts(comments, total: total_count, offset: offset) do |comment|
|
|
|
|
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment["tid"].to_s)
|
|
|
|
next if comment["raw"].blank?
|
|
|
|
{
|
|
|
|
id: "comment#" + comment["pid"].to_s,
|
|
|
|
user_id: user_id_from_imported_user_id(comment["user_id"]) || Discourse::SYSTEM_USER_ID,
|
|
|
|
topic_id: t[:topic_id],
|
|
|
|
raw: clean_up(comment["raw"]),
|
|
|
|
created_at: Time.zone.at(comment["created_at"]),
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def nokogiri_fix_blockquotes(raw)
|
|
|
|
# this makes proper quotes with user/topic/post references.
|
|
|
|
# I'm not clear if it is for just some bizarre imported data, or it might ever be useful
|
|
|
|
# It should be integrated into the Nokogiri section of clean_up, though.
|
|
|
|
@doc = Nokogiri.XML("<html>" + raw + "</html>")
|
|
|
|
|
|
|
|
# handle <blockquote>s with links to original post
|
|
|
|
@doc
|
|
|
|
.css("blockquote[class=ipsQuote]")
|
|
|
|
.each do |b|
|
|
|
|
# puts "\n#{'#'*50}\n#{b}\n\nCONTENT: #{b['data-ipsquote-contentid']}"
|
|
|
|
# b.options = Nokogiri::XML::ParseOptions::STRICT
|
|
|
|
imported_post_id = b["data-ipsquote-contentcommentid"].to_s
|
|
|
|
content_type = b["data-ipsquote-contenttype"].to_s
|
|
|
|
content_class = b["data-ipsquote-contentclass"].to_s
|
|
|
|
content_id = b["data-ipsquote-contentid"].to_s || b["data-cid"].to_s
|
|
|
|
topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id)
|
|
|
|
post_lookup = topic_lookup_from_imported_post_id("discussion#" + content_id)
|
|
|
|
post = topic_lookup ? topic_lookup[:post_number] : nil
|
|
|
|
topic = topic_lookup ? topic_lookup[:topic_id] : nil
|
|
|
|
post ||= post_lookup ? post_lookup[:post_number] : nil
|
|
|
|
topic ||= post_lookup ? post_lookup[:topic_id] : nil
|
2023-01-07 06:53:14 -05:00
|
|
|
|
2018-01-30 11:44:02 -05:00
|
|
|
# TODO: consider: <blockquote class="ipsStyle_spoiler" data-ipsspoiler="">
|
|
|
|
# consider: <pre class="ipsCode prettyprint">
|
|
|
|
# TODO make sure it's the imported username
|
|
|
|
# TODO: do _s still get \-escaped?
|
|
|
|
ips_username = b["data-ipsquote-username"] || b["data-author"]
|
|
|
|
username = ips_username
|
|
|
|
new_text = ""
|
|
|
|
if DEBUG
|
|
|
|
# new_text += "post: #{imported_post_id} --> #{post_lookup} --> |#{post}|<br>\n"
|
|
|
|
# new_text += "topic: #{content_id} --> #{topic_lookup} --> |#{topic}|<br>\n"
|
|
|
|
# new_text += "user: #{ips_username} --> |#{username}|<br>\n"
|
|
|
|
# new_text += "class: #{content_class}<br>\n"
|
|
|
|
# new_text += "type: #{content_type}<br>\n"
|
|
|
|
if content_class.length > 0 && content_class != "forums_Topic"
|
|
|
|
new_text += "UNEXPECTED CONTENT CLASS! #{content_class}<br>\n"
|
|
|
|
end
|
|
|
|
if content_type.length > 0 && content_type != "forums"
|
|
|
|
new_text += "UNEXPECTED CONTENT TYPE! #{content_type}<br>\n"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
# puts "#{'-'*20} and NOWWWWW!!!! \n #{new_text}"
|
|
|
|
end
|
|
|
|
if post && topic && username
|
|
|
|
quote = "\n[quote=\"#{username}, post:#{post}, topic: #{topic}\"]\n\n"
|
|
|
|
else
|
|
|
|
if username && username.length > 1
|
|
|
|
quote = "\n[quote=\"#{username}\"]\n\n"
|
2023-01-07 06:53:14 -05:00
|
|
|
else
|
2018-01-30 11:44:02 -05:00
|
|
|
quote = "\n[quote]\n"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
# new_doc = Nokogiri::XML("<div>#{new_text}</div>")
|
|
|
|
end
|
|
|
|
puts "QUOTE: #{quote}"
|
2023-01-07 06:53:14 -05:00
|
|
|
sleep 1
|
2018-01-30 11:44:02 -05:00
|
|
|
b.content = quote + b.content + "\n[/quote]\n"
|
|
|
|
b.name = "div"
|
|
|
|
end
|
|
|
|
|
|
|
|
raw = @doc.to_html
|
|
|
|
end
|
|
|
|
|
|
|
|
def clean_up(raw)
|
|
|
|
return "" if raw.blank?
|
|
|
|
|
|
|
|
raw.gsub!(/<#EMO_DIR#>/, EMO_DIR)
|
|
|
|
# TODO what about uploads?
|
|
|
|
# raw.gsub!(/<fileStore.core_Attachment>/,UPLOADS)
|
|
|
|
raw.gsub!(/<br>/, "\n\n")
|
|
|
|
raw.gsub!(%r{<br />}, "\n\n")
|
|
|
|
raw.gsub!(%r{<p> </p>}, "\n\n")
|
2018-01-30 14:37:15 -05:00
|
|
|
raw.gsub!(/\[hr\]/, "\n***\n")
|
2018-01-30 11:44:02 -05:00
|
|
|
raw.gsub!(/'/, "'")
|
|
|
|
raw.gsub!(%r{\[url="(.+?)"\]http.+?\[/url\]}, "\\1\n")
|
|
|
|
raw.gsub!(%r{\[media\](.+?)\[/media\]}, "\n\\1\n\n")
|
2018-09-11 21:12:28 -04:00
|
|
|
raw.gsub!(%r{\[php\](.+?)\[/php\]}m) do |m|
|
|
|
|
"\n\n```php\n\n" + @htmlentities.decode($1.gsub(/\n\n/, "\n")) + "\n\n```\n\n"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-09-11 21:12:28 -04:00
|
|
|
raw.gsub!(%r{\[code\](.+?)\[/code\]}m) do |m|
|
|
|
|
"\n\n```\n\n" + @htmlentities.decode($1.gsub(/\n\n/, "\n")) + "\n\n```\n\n"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-09-11 21:12:28 -04:00
|
|
|
raw.gsub!(%r{\[list\](.+?)\[/list\]}m) { |m| "\n" + $1.gsub(/\[\*\]/, "\n- ") + "\n\n" }
|
|
|
|
raw.gsub!(/\[quote\]/, "\n[quote]\n")
|
2018-01-30 11:44:02 -05:00
|
|
|
raw.gsub!(%r{\[/quote\]}, "\n[/quote]\n")
|
|
|
|
raw.gsub!(/date=\'(.+?)\'/, "")
|
|
|
|
raw.gsub!(/timestamp=\'(.+?)\' /, "")
|
|
|
|
|
|
|
|
quote_regex = /\[quote name=\'(.+?)\'\s+post=\'(\d+?)\'\s*\]/
|
|
|
|
while quote = quote_regex.match(raw)
|
|
|
|
# get IPB post number and find Discourse post and topic number
|
|
|
|
unless QUIET
|
|
|
|
puts "----------------------------------------\nName: #{quote[1]}, post: #{quote[2]}"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
imported_post_id = quote[2].to_s
|
|
|
|
topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id)
|
|
|
|
post_lookup = topic_lookup_from_imported_post_id("discussion#" + imported_post_id)
|
|
|
|
puts "topic_lookup: #{topic_lookup}, post: #{post_lookup}" unless QUIET
|
|
|
|
post_num = topic_lookup ? topic_lookup[:post_number] : nil
|
|
|
|
topic_num = topic_lookup ? topic_lookup[:topic_id] : nil
|
|
|
|
post_num ||= post_lookup ? post_lookup[:post_number] : nil
|
|
|
|
topic_num ||= post_lookup ? post_lookup[:topic_id] : nil
|
|
|
|
|
|
|
|
# Fix or leave bogus username?
|
|
|
|
username = find_user_by_import_id(quote[1]) || quote[1]
|
|
|
|
puts "username: #{username}, post_id: #{post_num}, topic_id: #{topic_num}" unless QUIET
|
|
|
|
puts "Before fixing a quote: #{raw}\n**************************************** " unless QUIET
|
|
|
|
post_string = post_num ? ", post:#{post_num}" : ""
|
|
|
|
topic_string = topic_num ? ", topic:#{topic_num}" : ""
|
|
|
|
raw.gsub!(quote_regex, "\n[quote=\"#{username}#{post_string}#{topic_string}\"]\n\n")
|
|
|
|
puts "AFTER!!!!!!!!!!!!1: #{raw}" unless QUIET
|
|
|
|
sleep 1
|
|
|
|
raw
|
|
|
|
end
|
|
|
|
|
|
|
|
attach_regex = /\[attachment=(\d+?):.+\]/
|
|
|
|
while attach = attach_regex.match(raw)
|
|
|
|
attach_id = attach[1]
|
|
|
|
attachments =
|
|
|
|
mysql_query(
|
|
|
|
"SELECT attach_location as loc,
|
|
|
|
attach_file as filename
|
|
|
|
FROM #{ATTACHMENT_TABLE}
|
|
|
|
WHERE attach_id=#{attach_id}",
|
|
|
|
)
|
|
|
|
if attachments.count < 1
|
|
|
|
puts "Attachment #{attach_id} not found."
|
|
|
|
attach_string = "Attachment #{attach_id} not found."
|
|
|
|
else
|
2018-09-11 21:12:28 -04:00
|
|
|
attach_url = "#{UPLOADS}/#{attachments.first["loc"].gsub(" ", "%20")}"
|
|
|
|
if attachments.first["filename"].match(/(png|jpg|jpeg|gif)$/)
|
|
|
|
# images are rendered as a link that contains the image
|
|
|
|
attach_string =
|
|
|
|
"#{attach_id}\n\n[![#{attachments.first["filename"]}](#{attach_url})](#{attach_url})\n"
|
|
|
|
else
|
|
|
|
# other attachments are simple download links
|
|
|
|
attach_string = "#{attach_id}\n\n[#{attachments.first["filename"]}](#{attach_url})\n"
|
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
end
|
2018-09-11 21:12:28 -04:00
|
|
|
raw.sub!(attach_regex, attach_string)
|
2018-01-30 11:44:02 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
raw
|
|
|
|
end
|
|
|
|
|
|
|
|
def random_category_color
|
|
|
|
colors = SiteSetting.category_colors.split("|")
|
|
|
|
colors[rand(colors.count)]
|
|
|
|
end
|
|
|
|
|
|
|
|
def old_clean_up(raw)
|
|
|
|
# This was for a forum that appeared to have lots of customization's.
|
|
|
|
# IT did a good job of handling quotes and whatnot, but I don't know
|
|
|
|
# what version if IPBoard it was for.
|
|
|
|
return "" if raw.blank?
|
|
|
|
|
|
|
|
raw.gsub!(/<___base_url___>/, URL)
|
|
|
|
raw.gsub!(/<fileStore.core_Emoticons>/, UPLOADS)
|
|
|
|
raw.gsub!(/<fileStore.core_Attachment>/, UPLOADS)
|
|
|
|
raw.gsub!(/<br>/, "\n")
|
|
|
|
|
|
|
|
@doc = Nokogiri.XML("<html>" + raw + "</html>")
|
|
|
|
|
|
|
|
# handle <blockquote>s with links to original post
|
|
|
|
@doc
|
|
|
|
.css("blockquote[class=ipsQuote]")
|
|
|
|
.each do |b|
|
|
|
|
imported_post_id = b["data-ipsquote-contentcommentid"].to_s
|
|
|
|
content_type = b["data-ipsquote-contenttype"].to_s
|
|
|
|
content_class = b["data-ipsquote-contentclass"].to_s
|
|
|
|
content_id = b["data-ipsquote-contentid"].to_s || b["data-cid"].to_s
|
|
|
|
topic_lookup = topic_lookup_from_imported_post_id("comment#" + imported_post_id)
|
|
|
|
post_lookup = topic_lookup_from_imported_post_id("discussion#" + content_id)
|
|
|
|
post = topic_lookup ? topic_lookup[:post_number] : nil
|
|
|
|
topic = topic_lookup ? topic_lookup[:topic_id] : nil
|
|
|
|
post ||= post_lookup ? post_lookup[:post_number] : nil
|
|
|
|
topic ||= post_lookup ? post_lookup[:topic_id] : nil
|
2023-01-07 06:53:14 -05:00
|
|
|
|
2018-01-30 11:44:02 -05:00
|
|
|
# TODO: consider: <blockquote class="ipsStyle_spoiler" data-ipsspoiler="">
|
|
|
|
# consider: <pre class="ipsCode prettyprint">
|
|
|
|
ips_username = b["data-ipsquote-username"] || b["data-author"]
|
|
|
|
username = ips_username
|
|
|
|
new_text = ""
|
|
|
|
if DEBUG
|
|
|
|
if content_class.length > 0 && content_class != "forums_Topic"
|
|
|
|
new_text += "UNEXPECTED CONTENT CLASS! #{content_class}<br>\n"
|
|
|
|
end
|
|
|
|
if content_type.length > 0 && content_type != "forums"
|
|
|
|
new_text += "UNEXPECTED CONTENT TYPE! #{content_type}<br>\n"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
end
|
|
|
|
if post && topic && username
|
|
|
|
quote = "[quote=\"#{username}, post:#{post}, topic: #{topic}\"]\n\n"
|
|
|
|
else
|
|
|
|
if username && username.length > 1
|
|
|
|
quote = "[quote=\"#{username}\"]\n\n"
|
2023-01-07 06:53:14 -05:00
|
|
|
else
|
2018-01-30 11:44:02 -05:00
|
|
|
quote = "[quote]\n"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
end
|
|
|
|
b.content = quote + b.content + "\n[/quote]\n"
|
|
|
|
b.name = "div"
|
|
|
|
end
|
|
|
|
|
|
|
|
@doc.css("object param embed").each { |embed| embed.replace("\n#{embed["src"]}\n") }
|
|
|
|
|
|
|
|
# handle <iframe data-embedcontent>s with links to original post
|
|
|
|
# no examples in recent import
|
|
|
|
@doc
|
|
|
|
.css("iframe[data-embedcontent]")
|
|
|
|
.each do |d|
|
|
|
|
d.to_s.match(/\-([0-9]+)t/)
|
|
|
|
imported_post_id = $1
|
|
|
|
if imported_post_id
|
|
|
|
puts "Searching for #{imported_post_id}" unless QUIET
|
|
|
|
topic_lookup = topic_lookup_from_imported_post_id("discussion#" + imported_post_id)
|
|
|
|
topic = topic_lookup ? topic_lookup[:topic_id] : nil
|
|
|
|
if topic
|
|
|
|
url = URL + "/t/#{topic}"
|
|
|
|
d.to_s.match(/comment=([0-9]+)&/)
|
|
|
|
content_id = $1 || "-1"
|
|
|
|
if content_id
|
|
|
|
post_lookup = topic_lookup_from_imported_post_id("comment#" + content_id)
|
|
|
|
post = topic_lookup ? topic_lookup[:post_number] : 1
|
|
|
|
url += "/#{post}"
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
d.content = url
|
|
|
|
end
|
|
|
|
end
|
|
|
|
d.name = "div"
|
|
|
|
end
|
|
|
|
|
|
|
|
@doc.css("div[class=ipsQuote_citation]").each { |d| d.remove }
|
|
|
|
|
|
|
|
raw = @doc.to_html
|
|
|
|
|
|
|
|
# let ReverseMarkdown handle the rest
|
|
|
|
raw = ReverseMarkdown.convert raw
|
|
|
|
|
|
|
|
# remove tabs at start of line to avoid everything being a <pre>
|
|
|
|
raw = raw.gsub(/^\t+/, "")
|
|
|
|
|
|
|
|
# un \-escape _s in usernames in [quote]s
|
|
|
|
raw.gsub!(/^\[quote=.+?_.*$/) do |match|
|
|
|
|
match = match.gsub('\_', "_")
|
|
|
|
match
|
|
|
|
end
|
|
|
|
raw
|
|
|
|
end
|
|
|
|
|
|
|
|
def staff_guardian
|
|
|
|
@_staff_guardian ||= Guardian.new(Discourse.system_user)
|
|
|
|
end
|
|
|
|
|
|
|
|
def mysql_query(sql)
|
|
|
|
@client.query(sql)
|
|
|
|
# @client.query(sql, cache_rows: false) #segfault: cache_rows: false causes segmentation fault
|
|
|
|
end
|
|
|
|
|
|
|
|
def create_permalinks
|
|
|
|
puts "", "Creating redirects...", ""
|
|
|
|
|
|
|
|
# TODO: permalink normalizations: /(.*t)\?.*/\1
|
|
|
|
|
|
|
|
puts "", "Users...", ""
|
|
|
|
User.find_each do |u|
|
|
|
|
ucf = u.custom_fields
|
|
|
|
if ucf && ucf["import_id"] && ucf["import_username"]
|
2019-12-11 21:49:21 -05:00
|
|
|
username = UrlHelper.encode_component(ucf["import_username"])
|
2023-01-07 06:53:14 -05:00
|
|
|
begin
|
2018-01-30 11:44:02 -05:00
|
|
|
Permalink.create(
|
|
|
|
url: "#{USERDIR}/#{ucf["import_id"]}-#{username}",
|
|
|
|
external_url: "/users/#{u.username}",
|
|
|
|
)
|
|
|
|
rescue StandardError
|
|
|
|
nil
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
print "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "", "Posts...", ""
|
|
|
|
Post.find_each do |post|
|
|
|
|
pcf = post.custom_fields
|
|
|
|
if pcf && pcf["import_id"]
|
|
|
|
if post.post_number == 1
|
|
|
|
topic = post.topic
|
|
|
|
id = pcf["import_id"].split("#").last
|
|
|
|
slug = topic.slug
|
|
|
|
if ID_FIRST
|
2023-01-07 06:53:14 -05:00
|
|
|
begin
|
2018-01-30 11:44:02 -05:00
|
|
|
Permalink.create(url: "topic/#{id}-#{slug}", topic_id: topic.id)
|
|
|
|
rescue StandardError
|
|
|
|
nil
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
unless QUIET
|
|
|
|
print_warning(
|
|
|
|
"#{URL}topic/#{id}-#{slug} --> http://localhost:3000/topic/#{id}-#{slug}",
|
|
|
|
)
|
|
|
|
end
|
|
|
|
else
|
2023-01-07 06:53:14 -05:00
|
|
|
begin
|
2018-01-30 11:44:02 -05:00
|
|
|
Permalink.create(url: "#{slug}-#{id}t", topic_id: topic.id)
|
|
|
|
rescue StandardError
|
|
|
|
nil
|
2023-01-07 06:53:14 -05:00
|
|
|
end
|
2018-01-30 11:44:02 -05:00
|
|
|
unless QUIET
|
|
|
|
print_warning("#{URL}/#{slug}-#{id}t --> http://localhost:3000/t/#{topic.id}")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
else # don't think we can do posts
|
|
|
|
# Permalink.create( url: "#{BASE}/forum_entry-id-#{id}.html", post_id: post.id ) rescue nil
|
|
|
|
# unless QUIET
|
|
|
|
# print_warning("forum_entry-id-#{id}.html --> http://localhost:3000/t/#{topic.id}/#{post.id}")
|
|
|
|
# end
|
|
|
|
end
|
|
|
|
print "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
puts "", "Categories...", ""
|
|
|
|
Category.find_each do |cat|
|
|
|
|
ccf = cat.custom_fields
|
|
|
|
next unless id = ccf["import_id"]
|
|
|
|
slug = cat["slug"]
|
|
|
|
print_warning("/forum/#{URL}-#{slug}-#{id} --> /c/#{slug}") unless QUIET
|
2023-01-07 06:53:14 -05:00
|
|
|
begin
|
2018-01-30 11:44:02 -05:00
|
|
|
Permalink.create(url: "/forum/#{id}-#{slug}", category_id: cat.id)
|
|
|
|
rescue StandardError
|
2023-01-07 06:53:14 -05:00
|
|
|
nil
|
2018-01-30 11:44:02 -05:00
|
|
|
end
|
|
|
|
print "."
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def print_warning(message)
|
|
|
|
$stderr.puts "#{message}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
ImportScripts::IpboardSQL.new.perform
|