Merge pull request #4990 from quangbuule/add-charset-when-bulk-import-vbulletin
Add charset preference when bulk importing vBulletin
This commit is contained in:
commit
933711a771
|
@ -5,6 +5,44 @@ require "htmlentities"
|
||||||
class BulkImport::VBulletin < BulkImport::Base
|
class BulkImport::VBulletin < BulkImport::Base
|
||||||
|
|
||||||
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
|
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
|
||||||
|
CHARSET_MAP = {
|
||||||
|
"armscii8" => nil,
|
||||||
|
"ascii" => Encoding::US_ASCII,
|
||||||
|
"big5" => Encoding::Big5,
|
||||||
|
"binary" => Encoding::ASCII_8BIT,
|
||||||
|
"cp1250" => Encoding::Windows_1250,
|
||||||
|
"cp1251" => Encoding::Windows_1251,
|
||||||
|
"cp1256" => Encoding::Windows_1256,
|
||||||
|
"cp1257" => Encoding::Windows_1257,
|
||||||
|
"cp850" => Encoding::CP850,
|
||||||
|
"cp852" => Encoding::CP852,
|
||||||
|
"cp866" => Encoding::IBM866,
|
||||||
|
"cp932" => Encoding::Windows_31J,
|
||||||
|
"dec8" => nil,
|
||||||
|
"eucjpms" => Encoding::EucJP_ms,
|
||||||
|
"euckr" => Encoding::EUC_KR,
|
||||||
|
"gb2312" => Encoding::EUC_CN,
|
||||||
|
"gbk" => Encoding::GBK,
|
||||||
|
"geostd8" => nil,
|
||||||
|
"greek" => Encoding::ISO_8859_7,
|
||||||
|
"hebrew" => Encoding::ISO_8859_8,
|
||||||
|
"hp8" => nil,
|
||||||
|
"keybcs2" => nil,
|
||||||
|
"koi8r" => Encoding::KOI8_R,
|
||||||
|
"koi8u" => Encoding::KOI8_U,
|
||||||
|
"latin1" => Encoding::ISO_8859_1,
|
||||||
|
"latin2" => Encoding::ISO_8859_2,
|
||||||
|
"latin5" => Encoding::ISO_8859_9,
|
||||||
|
"latin7" => Encoding::ISO_8859_13,
|
||||||
|
"macce" => Encoding::MacCentEuro,
|
||||||
|
"macroman" => Encoding::MacRoman,
|
||||||
|
"sjis" => Encoding::SHIFT_JIS,
|
||||||
|
"swe7" => nil,
|
||||||
|
"tis620" => Encoding::TIS_620,
|
||||||
|
"ucs2" => Encoding::UTF_16BE,
|
||||||
|
"ujis" => Encoding::EucJP_ms,
|
||||||
|
"utf8" => Encoding::UTF_8,
|
||||||
|
}
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
super
|
super
|
||||||
|
@ -13,10 +51,19 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
username = ENV["DB_USERNAME"] || "root"
|
username = ENV["DB_USERNAME"] || "root"
|
||||||
password = ENV["DB_PASSWORD"]
|
password = ENV["DB_PASSWORD"]
|
||||||
database = ENV["DB_NAME"] || "vbulletin"
|
database = ENV["DB_NAME"] || "vbulletin"
|
||||||
|
charset = ENV["DB_CHARSET"] || "utf8"
|
||||||
|
|
||||||
@html_entities = HTMLEntities.new
|
@html_entities = HTMLEntities.new
|
||||||
|
@encoding = CHARSET_MAP[charset]
|
||||||
|
|
||||||
|
@client = Mysql2::Client.new(
|
||||||
|
host: host,
|
||||||
|
username: username,
|
||||||
|
password: password,
|
||||||
|
database: database,
|
||||||
|
encoding: charset
|
||||||
|
)
|
||||||
|
|
||||||
@client = Mysql2::Client.new(host: host, username: username, password: password, database: database)
|
|
||||||
@client.query_options.merge!(as: :array, cache_rows: false)
|
@client.query_options.merge!(as: :array, cache_rows: false)
|
||||||
|
|
||||||
@has_post_thanks = mysql_query(<<-SQL
|
@has_post_thanks = mysql_query(<<-SQL
|
||||||
|
@ -63,9 +110,9 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
create_groups(groups) do |row|
|
create_groups(groups) do |row|
|
||||||
{
|
{
|
||||||
imported_id: row[0],
|
imported_id: row[0],
|
||||||
name: html_decode(row[1]),
|
name: normalize_text(row[1]),
|
||||||
bio_raw: html_decode(row[2]),
|
bio_raw: normalize_text(row[2]),
|
||||||
title: html_decode(row[3]),
|
title: normalize_text(row[3]),
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -84,7 +131,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
create_users(users) do |row|
|
create_users(users) do |row|
|
||||||
u = {
|
u = {
|
||||||
imported_id: row[0],
|
imported_id: row[0],
|
||||||
username: row[1],
|
username: normalize_text(row[1]),
|
||||||
created_at: Time.zone.at(row[3]),
|
created_at: Time.zone.at(row[3]),
|
||||||
date_of_birth: parse_birthday(row[4]),
|
date_of_birth: parse_birthday(row[4]),
|
||||||
primary_group_id: group_id_from_imported_id(row[6]),
|
primary_group_id: group_id_from_imported_id(row[6]),
|
||||||
|
@ -253,8 +300,8 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
create_categories(parent_categories) do |row|
|
create_categories(parent_categories) do |row|
|
||||||
{
|
{
|
||||||
imported_id: row[0],
|
imported_id: row[0],
|
||||||
name: html_decode(row[2]),
|
name: normalize_text(row[2]),
|
||||||
description: html_decode(row[3]),
|
description: normalize_text(row[3]),
|
||||||
position: row[4],
|
position: row[4],
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
@ -263,8 +310,8 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
create_categories(children_categories) do |row|
|
create_categories(children_categories) do |row|
|
||||||
{
|
{
|
||||||
imported_id: row[0],
|
imported_id: row[0],
|
||||||
name: html_decode(row[2]),
|
name: normalize_text(row[2]),
|
||||||
description: html_decode(row[3]),
|
description: normalize_text(row[3]),
|
||||||
position: row[4],
|
position: row[4],
|
||||||
parent_category_id: category_id_from_imported_id(row[1]),
|
parent_category_id: category_id_from_imported_id(row[1]),
|
||||||
}
|
}
|
||||||
|
@ -287,7 +334,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
|
|
||||||
t = {
|
t = {
|
||||||
imported_id: row[0],
|
imported_id: row[0],
|
||||||
title: html_decode(row[1]),
|
title: normalize_text(row[1]),
|
||||||
category_id: category_id_from_imported_id(row[2]),
|
category_id: category_id_from_imported_id(row[2]),
|
||||||
user_id: user_id_from_imported_id(row[3]),
|
user_id: user_id_from_imported_id(row[3]),
|
||||||
closed: row[4] == 0,
|
closed: row[4] == 0,
|
||||||
|
@ -325,7 +372,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
user_id: user_id_from_imported_id(row[3]),
|
user_id: user_id_from_imported_id(row[3]),
|
||||||
created_at: Time.zone.at(row[4]),
|
created_at: Time.zone.at(row[4]),
|
||||||
hidden: row[5] == 0,
|
hidden: row[5] == 0,
|
||||||
raw: html_decode(row[6]),
|
raw: normalize_text(row[6]),
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -353,7 +400,7 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
{
|
{
|
||||||
archetype: Archetype.private_message,
|
archetype: Archetype.private_message,
|
||||||
imported_id: row[0] + PRIVATE_OFFSET,
|
imported_id: row[0] + PRIVATE_OFFSET,
|
||||||
title: title,
|
title: normalize_text(title),
|
||||||
user_id: user_id_from_imported_id(row[2]),
|
user_id: user_id_from_imported_id(row[2]),
|
||||||
created_at: Time.zone.at(row[4]),
|
created_at: Time.zone.at(row[4]),
|
||||||
}
|
}
|
||||||
|
@ -409,17 +456,22 @@ class BulkImport::VBulletin < BulkImport::Base
|
||||||
topic_id: topic_id,
|
topic_id: topic_id,
|
||||||
user_id: user_id_from_imported_id(row[2]),
|
user_id: user_id_from_imported_id(row[2]),
|
||||||
created_at: Time.zone.at(row[4]),
|
created_at: Time.zone.at(row[4]),
|
||||||
raw: html_decode(row[5]),
|
raw: normalize_text(row[5]),
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_pm_title(title)
|
def extract_pm_title(title)
|
||||||
html_decode(title).scrub.gsub(/^Re\s*:\s*/i, "")
|
normalize_text(title).scrub.gsub(/^Re\s*:\s*/i, "")
|
||||||
end
|
end
|
||||||
|
|
||||||
def html_decode(text)
|
def normalize_text(text)
|
||||||
@html_entities.decode((text.presence || "").scrub)
|
@html_entities.decode(normalize_charset(text.presence || "").scrub)
|
||||||
|
end
|
||||||
|
|
||||||
|
def normalize_charset(text)
|
||||||
|
return text if @encoding == Encoding::UTF_8
|
||||||
|
return text && text.encode(@encoding).force_encoding(Encoding::UTF_8)
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_birthday(birthday)
|
def parse_birthday(birthday)
|
||||||
|
|
Loading…
Reference in New Issue