2014-12-22 07:22:16 -05:00
require 'mysql2'
2015-03-18 15:30:42 -04:00
require File . expand_path ( File . dirname ( __FILE__ ) + " /base.rb " )
2015-01-19 09:00:55 -05:00
require 'htmlentities'
2016-07-10 05:19:24 -04:00
require 'php_serialize' # https://github.com/jqr/php-serialize
2014-08-18 07:04:08 -04:00
class ImportScripts :: VBulletin < ImportScripts :: Base
2015-01-19 09:00:55 -05:00
BATCH_SIZE = 1000
2014-08-18 07:04:08 -04:00
2015-01-19 09:00:55 -05:00
# CHANGE THESE BEFORE RUNNING THE IMPORTER
2016-07-10 05:19:24 -04:00
DATABASE = " q23 "
TABLE_PREFIX = " vb_ "
TIMEZONE = " America/Los_Angeles "
2015-05-15 07:26:53 -04:00
ATTACHMENT_DIR = '/path/to/your/attachment/folder'
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
def initialize
super
2014-08-18 07:04:08 -04:00
2015-01-26 14:35:30 -05:00
@old_username_to_new_usernames = { }
2015-01-19 09:00:55 -05:00
@tz = TZInfo :: Timezone . get ( TIMEZONE )
@htmlentities = HTMLEntities . new
2014-12-22 07:22:16 -05:00
@client = Mysql2 :: Client . new (
host : " localhost " ,
username : " root " ,
database : DATABASE
)
2014-08-18 07:04:08 -04:00
end
def execute
import_groups
import_users
2016-12-05 07:11:59 -05:00
create_groups_membership
2014-08-18 07:04:08 -04:00
import_categories
import_topics
import_posts
2016-07-10 05:19:24 -04:00
import_private_messages
2015-05-15 07:26:53 -04:00
import_attachments
2014-08-18 07:04:08 -04:00
2014-09-04 11:55:05 -04:00
close_topics
2015-01-19 09:00:55 -05:00
post_process_posts
2016-07-10 05:19:24 -04:00
2016-12-05 07:11:59 -05:00
create_permalink_file
2016-07-10 05:19:24 -04:00
suspend_users
2014-08-18 07:04:08 -04:00
end
2014-12-22 07:22:16 -05:00
def import_groups
puts " " , " importing groups... "
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
groups = mysql_query <<-SQL
SELECT usergroupid , title
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}usergroup
2014-12-22 07:22:16 -05:00
ORDER BY usergroupid
SQL
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
create_groups ( groups ) do | group |
{
2015-01-19 09:00:55 -05:00
id : group [ " usergroupid " ] ,
name : @htmlentities . decode ( group [ " title " ] ) . strip
2014-12-22 07:22:16 -05:00
}
2014-08-18 07:04:08 -04:00
end
2014-12-22 07:22:16 -05:00
end
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
def import_users
puts " " , " importing users "
2014-08-18 07:04:08 -04:00
2016-07-10 05:19:24 -04:00
user_count = mysql_query ( " SELECT COUNT(userid) count FROM #{ TABLE_PREFIX } user " ) . first [ " count " ]
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
batches ( BATCH_SIZE ) do | offset |
users = mysql_query <<-SQL
2015-03-09 10:01:12 -04:00
SELECT userid , username , homepage , usertitle , usergroupid , joindate , email
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}user
2014-12-22 07:22:16 -05:00
ORDER BY userid
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
break if users . size < 1
2014-08-25 04:48:29 -04:00
2015-09-21 19:48:42 -04:00
next if all_records_exist? :users , users . map { | u | u [ " userid " ] . to_i }
2014-12-22 07:22:16 -05:00
create_users ( users , total : user_count , offset : offset ) do | user |
2015-01-19 09:00:55 -05:00
username = @htmlentities . decode ( user [ " username " ] ) . strip
2014-08-18 07:04:08 -04:00
{
2015-01-19 09:00:55 -05:00
id : user [ " userid " ] ,
name : username ,
username : username ,
2014-12-22 07:22:16 -05:00
email : user [ " email " ] . presence || fake_email ,
2015-01-19 09:00:55 -05:00
website : user [ " homepage " ] . strip ,
title : @htmlentities . decode ( user [ " usertitle " ] ) . strip ,
2016-12-05 07:11:59 -05:00
primary_group_id : group_id_from_imported_group_id ( user [ " usergroupid " ] . to_i ) ,
2015-01-19 09:00:55 -05:00
created_at : parse_timestamp ( user [ " joindate " ] ) ,
2016-07-10 05:19:24 -04:00
last_seen_at : parse_timestamp ( user [ " lastvisit " ] ) ,
2014-12-22 07:22:16 -05:00
post_create_action : proc do | u |
@old_username_to_new_usernames [ user [ " username " ] ] = u . username
2015-01-19 09:00:55 -05:00
import_profile_picture ( user , u )
import_profile_background ( user , u )
2014-12-22 07:22:16 -05:00
end
2014-08-18 07:04:08 -04:00
}
end
end
2014-12-22 07:22:16 -05:00
end
2014-08-18 07:04:08 -04:00
2016-12-05 07:11:59 -05:00
def create_groups_membership
puts " " , " Creating groups membership... "
Group . find_each do | group |
begin
next if group . automatic
puts " \t #{ group . name } "
next if GroupUser . where ( group_id : group . id ) . count > 0
user_ids_in_group = User . where ( primary_group_id : group . id ) . pluck ( :id ) . to_a
next if user_ids_in_group . size == 0
values = user_ids_in_group . map { | user_id | " ( #{ group . id } , #{ user_id } , CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) " } . join ( " , " )
User . exec_sql <<-SQL
BEGIN ;
INSERT INTO group_users ( group_id , user_id , created_at , updated_at ) VALUES #{values};
COMMIT ;
SQL
Group . reset_counters ( group . id , :group_users )
rescue Exception = > e
puts e . message
puts e . backtrace . join ( " \n " )
end
end
end
2015-01-19 09:00:55 -05:00
def import_profile_picture ( old_user , imported_user )
query = mysql_query <<-SQL
SELECT filedata , filename
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}customavatar
2015-01-19 09:00:55 -05:00
WHERE userid = #{old_user["userid"]}
ORDER BY dateline DESC
LIMIT 1
SQL
picture = query . first
return if picture . nil?
file = Tempfile . new ( " profile-picture " )
file . write ( picture [ " filedata " ] . encode ( " ASCII-8BIT " ) . force_encoding ( " UTF-8 " ) )
file . rewind
upload = Upload . create_for ( imported_user . id , file , picture [ " filename " ] , file . size )
return if ! upload . persisted?
imported_user . create_user_avatar
imported_user . user_avatar . update ( custom_upload_id : upload . id )
imported_user . update ( uploaded_avatar_id : upload . id )
ensure
file . close rescue nil
file . unlind rescue nil
end
def import_profile_background ( old_user , imported_user )
query = mysql_query <<-SQL
SELECT filedata , filename
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}customprofilepic
2015-01-19 09:00:55 -05:00
WHERE userid = #{old_user["userid"]}
ORDER BY dateline DESC
LIMIT 1
SQL
background = query . first
return if background . nil?
file = Tempfile . new ( " profile-background " )
file . write ( background [ " filedata " ] . encode ( " ASCII-8BIT " ) . force_encoding ( " UTF-8 " ) )
file . rewind
upload = Upload . create_for ( imported_user . id , file , background [ " filename " ] , file . size )
return if ! upload . persisted?
imported_user . user_profile . update ( profile_background : upload . url )
ensure
file . close rescue nil
file . unlink rescue nil
end
2014-12-22 07:22:16 -05:00
def import_categories
puts " " , " importing top level categories... "
2016-07-10 05:19:24 -04:00
categories = mysql_query ( " SELECT forumid, title, description, displayorder, parentid FROM #{ TABLE_PREFIX } forum ORDER BY forumid " ) . to_a
2014-12-22 07:22:16 -05:00
2016-12-05 07:11:59 -05:00
top_level_categories = categories . select { | c | c [ " parentid " ] == - 1 }
2014-12-22 07:22:16 -05:00
2016-12-05 07:11:59 -05:00
create_categories ( top_level_categories ) do | category |
2014-12-22 07:22:16 -05:00
{
2015-01-19 09:00:55 -05:00
id : category [ " forumid " ] ,
name : @htmlentities . decode ( category [ " title " ] ) . strip ,
position : category [ " displayorder " ] ,
description : @htmlentities . decode ( category [ " description " ] ) . strip
2014-12-22 07:22:16 -05:00
}
2014-08-18 07:04:08 -04:00
end
2016-12-05 07:11:59 -05:00
puts " " , " importing children categories... "
children_categories = categories . select { | c | c [ " parentid " ] != - 1 }
top_level_category_ids = Set . new ( top_level_categories . map { | c | c [ " forumid " ] } )
# cut down the tree to only 2 levels of categories
children_categories . each do | cc |
while ! top_level_category_ids . include? ( cc [ " parentid " ] )
cc [ " parentid " ] = categories . detect { | c | c [ " forumid " ] == cc [ " parentid " ] } [ " parentid " ]
end
end
create_categories ( children_categories ) do | category |
{
id : category [ " forumid " ] ,
name : @htmlentities . decode ( category [ " title " ] ) . strip ,
position : category [ " displayorder " ] ,
description : @htmlentities . decode ( category [ " description " ] ) . strip ,
parent_category_id : category_id_from_imported_category_id ( category [ " parentid " ] )
}
end
2014-12-22 07:22:16 -05:00
end
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
def import_topics
puts " " , " importing topics... "
2014-09-04 11:55:05 -04:00
2014-12-22 07:22:16 -05:00
# keep track of closed topics
@closed_topic_ids = [ ]
2014-08-25 04:48:29 -04:00
2016-07-10 05:19:24 -04:00
topic_count = mysql_query ( " SELECT COUNT(threadid) count FROM #{ TABLE_PREFIX } thread " ) . first [ " count " ]
2014-09-04 11:55:05 -04:00
2014-12-22 07:22:16 -05:00
batches ( BATCH_SIZE ) do | offset |
topics = mysql_query <<-SQL
SELECT t . threadid threadid , t . title title , forumid , open , postuserid , t . dateline dateline , views , t . visible visible , sticky ,
p . pagetext raw
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}thread t
JOIN #{TABLE_PREFIX}post p ON p.postid = t.firstpostid
2014-12-22 07:22:16 -05:00
ORDER BY t . threadid
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
2014-09-04 11:55:05 -04:00
2014-12-22 07:22:16 -05:00
break if topics . size < 1
2015-10-21 13:07:31 -04:00
next if all_records_exist? :posts , topics . map { | t | " thread- #{ t [ " threadid " ] } " }
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
create_posts ( topics , total : topic_count , offset : offset ) do | topic |
2015-01-21 03:36:46 -05:00
raw = preprocess_post_raw ( topic [ " raw " ] ) rescue nil
next if raw . blank?
2014-12-22 07:22:16 -05:00
topic_id = " thread- #{ topic [ " threadid " ] } "
2016-12-03 10:31:10 -05:00
@closed_topic_ids << topic_id if topic [ " open " ] == 0
2014-08-18 07:04:08 -04:00
t = {
2014-12-22 07:22:16 -05:00
id : topic_id ,
2015-01-19 09:00:55 -05:00
user_id : user_id_from_imported_user_id ( topic [ " postuserid " ] ) || Discourse :: SYSTEM_USER_ID ,
title : @htmlentities . decode ( topic [ " title " ] ) . strip [ 0 ... 255 ] ,
2015-03-12 16:15:02 -04:00
category : category_id_from_imported_category_id ( topic [ " forumid " ] ) ,
2015-01-21 03:36:46 -05:00
raw : raw ,
2015-01-19 09:00:55 -05:00
created_at : parse_timestamp ( topic [ " dateline " ] ) ,
2014-12-22 07:22:16 -05:00
visible : topic [ " visible " ] . to_i == 1 ,
2015-01-19 09:00:55 -05:00
views : topic [ " views " ] ,
2014-08-18 07:04:08 -04:00
}
2014-12-22 07:22:16 -05:00
t [ :pinned_at ] = t [ :created_at ] if topic [ " sticky " ] . to_i == 1
2014-08-18 07:04:08 -04:00
t
end
2016-12-05 07:11:59 -05:00
# uncomment below lines to create permalink
# topics.each do |thread|
# topic_id = "thread-#{thread["threadid"]}"
# topic = topic_lookup_from_imported_post_id(topic_id)
# if topic.present?
# title_slugified = thread["title"].gsub(" ","-").gsub(".","-") if thread["title"].present?
# url_slug = "threads/#{thread["threadid"]}-#{title_slugified}" if thread["title"].present?
# Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i) if url_slug.present? && topic[:topic_id].present?
# end
# end
2014-08-18 07:04:08 -04:00
end
2014-12-22 07:22:16 -05:00
end
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
def import_posts
puts " " , " importing posts... "
2014-08-18 07:04:08 -04:00
2015-01-21 03:36:46 -05:00
# make sure `firstpostid` is indexed
2016-12-03 10:31:10 -05:00
begin
mysql_query ( " CREATE INDEX firstpostid_index ON #{ TABLE_PREFIX } thread (firstpostid) " )
rescue Mysql2 :: Error
puts 'Index already exists'
end
2015-01-21 03:36:46 -05:00
2016-07-10 05:19:24 -04:00
post_count = mysql_query ( " SELECT COUNT(postid) count FROM #{ TABLE_PREFIX } post WHERE postid NOT IN (SELECT firstpostid FROM #{ TABLE_PREFIX } thread) " ) . first [ " count " ]
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
batches ( BATCH_SIZE ) do | offset |
posts = mysql_query <<-SQL
SELECT postid , userid , threadid , pagetext raw , dateline , visible , parentid
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}post
WHERE postid NOT IN ( SELECT firstpostid FROM #{TABLE_PREFIX}thread)
2014-12-22 07:22:16 -05:00
ORDER BY postid
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
2014-08-25 04:48:29 -04:00
2014-12-22 07:22:16 -05:00
break if posts . size < 1
2015-09-21 19:48:42 -04:00
next if all_records_exist? :posts , posts . map { | p | p [ " postid " ] }
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
create_posts ( posts , total : post_count , offset : offset ) do | post |
2015-01-21 03:36:46 -05:00
raw = preprocess_post_raw ( post [ " raw " ] ) rescue nil
next if raw . blank?
2014-12-22 07:22:16 -05:00
next unless topic = topic_lookup_from_imported_post_id ( " thread- #{ post [ " threadid " ] } " )
2014-08-18 07:04:08 -04:00
p = {
2015-01-19 09:00:55 -05:00
id : post [ " postid " ] ,
2014-12-22 07:22:16 -05:00
user_id : user_id_from_imported_user_id ( post [ " userid " ] ) || Discourse :: SYSTEM_USER_ID ,
topic_id : topic [ :topic_id ] ,
2015-01-21 03:36:46 -05:00
raw : raw ,
2015-01-19 09:00:55 -05:00
created_at : parse_timestamp ( post [ " dateline " ] ) ,
2014-12-22 07:22:16 -05:00
hidden : post [ " visible " ] . to_i == 0 ,
2014-08-18 07:04:08 -04:00
}
2014-12-22 07:22:16 -05:00
if parent = topic_lookup_from_imported_post_id ( post [ " parentid " ] )
2014-08-18 07:04:08 -04:00
p [ :reply_to_post_number ] = parent [ :post_number ]
end
p
end
end
2014-12-22 07:22:16 -05:00
end
2014-08-18 07:04:08 -04:00
2015-05-15 07:26:53 -04:00
# find the uploaded file information from the db
def find_upload ( post , attachment_id )
sql = " SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename,
a . caption caption
2016-07-10 05:19:24 -04:00
FROM #{TABLE_PREFIX}attachment a
2015-05-15 07:26:53 -04:00
WHERE a . attachmentid = #{attachment_id}"
results = mysql_query ( sql )
unless ( row = results . first )
puts " Couldn't find attachment record for post.id = #{ post . id } , import_id = #{ post . custom_fields [ 'import_id' ] } "
return nil
end
filename = File . join ( ATTACHMENT_DIR , row [ 'user_id' ] . to_s . split ( '' ) . join ( '/' ) , " #{ row [ 'file_id' ] } .attach " )
unless File . exists? ( filename )
puts " Attachment file doesn't exist: #{ filename } "
return nil
end
real_filename = row [ 'filename' ]
real_filename . prepend SecureRandom . hex if real_filename [ 0 ] == '.'
upload = create_upload ( post . user . id , filename , real_filename )
if upload . nil? || ! upload . valid?
puts " Upload not valid :( "
puts upload . errors . inspect if upload
return nil
end
return upload , real_filename
rescue Mysql2 :: Error = > e
puts " SQL Error "
puts e . message
puts sql
return nil
end
2016-07-10 05:19:24 -04:00
def import_private_messages
puts " " , " importing private messages... "
topic_count = mysql_query ( " SELECT COUNT(pmtextid) count FROM #{ TABLE_PREFIX } pmtext " ) . first [ " count " ]
batches ( BATCH_SIZE ) do | offset |
private_messages = mysql_query <<-SQL
SELECT pmtextid , fromuserid , title , message , touserarray , dateline
FROM #{TABLE_PREFIX}pmtext
ORDER BY pmtextid
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if private_messages . size < 1
next if all_records_exist? :posts , private_messages . map { | pm | " pm- #{ pm [ 'pmtextid' ] } " }
title_username_of_pm_first_post = { }
create_posts ( private_messages , total : topic_count , offset : offset ) do | m |
skip = false
mapped = { }
mapped [ :id ] = " pm- #{ m [ 'pmtextid' ] } "
mapped [ :user_id ] = user_id_from_imported_user_id ( m [ 'fromuserid' ] ) || Discourse :: SYSTEM_USER_ID
mapped [ :raw ] = preprocess_post_raw ( m [ 'message' ] ) rescue nil
mapped [ :created_at ] = Time . zone . at ( m [ 'dateline' ] )
title = @htmlentities . decode ( m [ 'title' ] ) . strip [ 0 ... 255 ]
topic_id = nil
next if mapped [ :raw ] . blank?
# users who are part of this private message.
target_usernames = [ ]
target_userids = [ ]
begin
to_user_array = PHP . unserialize ( m [ 'touserarray' ] )
rescue
puts " #{ m [ 'pmtextid' ] } -- #{ m [ 'touserarray' ] } "
skip = true
end
begin
to_user_array . each do | to_user |
if to_user [ 0 ] == " cc " || to_user [ 0 ] == " bcc " # not sure if we should include bcc users
to_user [ 1 ] . each do | to_user_cc |
user_id = user_id_from_imported_user_id ( to_user_cc [ 0 ] )
username = User . find_by ( id : user_id ) . try ( :username )
target_userids << user_id || Discourse :: SYSTEM_USER_ID
target_usernames << username if username
end
else
user_id = user_id_from_imported_user_id ( to_user [ 0 ] )
username = User . find_by ( id : user_id ) . try ( :username )
target_userids << user_id || Discourse :: SYSTEM_USER_ID
target_usernames << username if username
end
end
rescue
puts " skipping pm- #{ m [ 'pmtextid' ] } `to_user_array` is not properly serialized -- #{ to_user_array . inspect } "
skip = true
end
participants = target_userids
participants << mapped [ :user_id ]
begin
participants . sort!
rescue
puts " one of the participant's id is nil -- #{ participants . inspect } "
end
if title =~ / ^Re: /
parent_id = title_username_of_pm_first_post [ [ title [ 3 .. - 1 ] , participants ] ]
parent_id = title_username_of_pm_first_post [ [ title [ 4 .. - 1 ] , participants ] ] unless parent_id
parent_id = title_username_of_pm_first_post [ [ title [ 5 .. - 1 ] , participants ] ] unless parent_id
parent_id = title_username_of_pm_first_post [ [ title [ 6 .. - 1 ] , participants ] ] unless parent_id
parent_id = title_username_of_pm_first_post [ [ title [ 7 .. - 1 ] , participants ] ] unless parent_id
parent_id = title_username_of_pm_first_post [ [ title [ 8 .. - 1 ] , participants ] ] unless parent_id
2016-09-13 22:45:48 -04:00
if parent_id
2016-07-10 05:19:24 -04:00
if t = topic_lookup_from_imported_post_id ( " pm- #{ parent_id } " )
topic_id = t [ :topic_id ]
end
end
else
title_username_of_pm_first_post [ [ title , participants ] ] || = m [ 'pmtextid' ]
end
unless topic_id
mapped [ :title ] = title
mapped [ :archetype ] = Archetype . private_message
mapped [ :target_usernames ] = target_usernames . join ( ',' )
if mapped [ :target_usernames ] . empty? # pm with yourself?
# skip = true
mapped [ :target_usernames ] = " system "
puts " pm- #{ m [ 'pmtextid' ] } has no target ( #{ m [ 'touserarray' ] } ) "
end
else
mapped [ :topic_id ] = topic_id
end
skip ? nil : mapped
end
end
end
2015-05-15 07:26:53 -04:00
def import_attachments
puts '' , 'importing attachments...'
current_count = 0
2016-07-10 05:19:24 -04:00
total_count = mysql_query ( " SELECT COUNT(postid) count FROM #{ TABLE_PREFIX } post WHERE postid NOT IN (SELECT firstpostid FROM #{ TABLE_PREFIX } thread) " ) . first [ " count " ]
2015-05-15 07:26:53 -04:00
success_count = 0
fail_count = 0
attachment_regex = / \ [attach[^ \ ]]* \ ]( \ d+) \ [ \/ attach \ ] /i
Post . find_each do | post |
current_count += 1
print_status current_count , total_count
new_raw = post . raw . dup
new_raw . gsub! ( attachment_regex ) do | s |
matches = attachment_regex . match ( s )
attachment_id = matches [ 1 ]
upload , filename = find_upload ( post , attachment_id )
unless upload
fail_count += 1
next
end
html_for_upload ( upload , filename )
end
if new_raw != post . raw
PostRevisor . new ( post ) . revise! ( post . user , { raw : new_raw } , { bypass_bump : true , edit_reason : 'Import attachments from vBulletin' } )
end
success_count += 1
end
end
2014-12-22 07:22:16 -05:00
def close_topics
puts " " , " Closing topics... "
sql = <<-SQL
WITH closed_topic_ids AS (
SELECT t . id AS topic_id
2016-12-03 10:31:10 -05:00
FROM post_custom_fields pcf
JOIN posts p ON p . id = pcf . post_id
JOIN topics t ON t . id = p . topic_id
2014-12-22 07:22:16 -05:00
WHERE pcf . name = 'import_id'
AND pcf . value IN ( ?)
)
UPDATE topics
SET closed = true
2016-12-03 10:31:10 -05:00
WHERE id IN ( SELECT topic_id FROM closed_topic_ids )
2014-12-22 07:22:16 -05:00
SQL
Topic . exec_sql ( sql , @closed_topic_ids )
end
2014-08-25 04:48:29 -04:00
2015-01-19 09:00:55 -05:00
def post_process_posts
puts " " , " Postprocessing posts... "
current = 0
max = Post . count
Post . find_each do | post |
begin
new_raw = postprocess_post_raw ( post . raw )
if new_raw != post . raw
post . raw = new_raw
post . save
end
2015-01-26 14:35:30 -05:00
rescue PrettyText :: JavaScriptError
nil
2015-01-19 09:00:55 -05:00
ensure
print_status ( current += 1 , max )
end
end
end
2014-12-22 07:22:16 -05:00
def preprocess_post_raw ( raw )
return " " if raw . blank?
2014-08-18 07:04:08 -04:00
2015-01-19 09:00:55 -05:00
# decode HTML entities
raw = @htmlentities . decode ( raw )
# fix whitespaces
2016-12-03 10:31:10 -05:00
raw . gsub! ( / ( \\ r)? \\ n / , " \n " )
2016-12-04 21:16:59 -05:00
raw . gsub! ( " \\ t " , " \t " )
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
# [HTML]...[/HTML]
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [html \ ] /i , " \n ```html \n " )
2016-12-04 21:16:59 -05:00
raw . gsub! ( / \ [ \/ html \ ] /i , " \n ``` \n " )
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
# [PHP]...[/PHP]
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [php \ ] /i , " \n ```php \n " )
2016-12-04 21:16:59 -05:00
raw . gsub! ( / \ [ \/ php \ ] /i , " \n ``` \n " )
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
# [HIGHLIGHT="..."]
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [highlight="?( \ w+)"? \ ] /i ) { " \n ``` #{ $1 . downcase } \n " }
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
# [CODE]...[/CODE]
# [HIGHLIGHT]...[/HIGHLIGHT]
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [ \/ ?code \ ] /i , " \n ``` \n " )
2016-12-04 21:16:59 -05:00
raw . gsub! ( / \ [ \/ ?highlight \ ] /i , " \n ``` \n " )
2014-09-04 11:55:05 -04:00
2014-12-22 07:22:16 -05:00
# [SAMP]...[/SAMP]
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [ \/ ?samp \ ] /i , " ` " )
2014-09-04 11:55:05 -04:00
2014-12-22 07:22:16 -05:00
# replace all chevrons with HTML entities
# NOTE: must be done
# - AFTER all the "code" processing
# - BEFORE the "quote" processing
2016-12-03 10:31:10 -05:00
raw . gsub! ( / `([^`]+)` /im ) { " ` " + $1 . gsub ( " < " , " \ u2603 " ) + " ` " }
2016-12-04 21:16:59 -05:00
raw . gsub! ( " < " , " < " )
raw . gsub! ( " \ u2603 " , " < " )
2014-08-18 07:04:08 -04:00
2016-12-03 10:31:10 -05:00
raw . gsub! ( / `([^`]+)` /im ) { " ` " + $1 . gsub ( " > " , " \ u2603 " ) + " ` " }
2016-12-04 21:16:59 -05:00
raw . gsub! ( " > " , " > " )
raw . gsub! ( " \ u2603 " , " > " )
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
# [URL=...]...[/URL]
2016-07-10 05:19:24 -04:00
raw . gsub! ( / \ [url="?([^"]+?)"? \ ](.*?) \ [ \/ url \ ] /im ) { " [ #{ $2 . strip } ]( #{ $1 } ) " }
raw . gsub! ( / \ [url="?(.+?)"? \ ](.+) \ [ \/ url \ ] /im ) { " [ #{ $2 . strip } ]( #{ $1 } ) " }
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
# [URL]...[/URL]
# [MP3]...[/MP3]
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [ \/ ?url \ ] /i , " " )
2016-12-04 21:16:59 -05:00
raw . gsub! ( / \ [ \/ ?mp3 \ ] /i , " " )
2014-08-18 07:04:08 -04:00
2014-12-22 07:22:16 -05:00
# [MENTION]<username>[/MENTION]
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [mention \ ](.+?) \ [ \/ mention \ ] /i ) do
2014-12-22 07:22:16 -05:00
old_username = $1
if @old_username_to_new_usernames . has_key? ( old_username )
old_username = @old_username_to_new_usernames [ old_username ]
2014-08-18 07:04:08 -04:00
end
2014-12-22 07:22:16 -05:00
" @ #{ old_username } "
end
2014-08-18 07:04:08 -04:00
2016-12-05 07:11:59 -05:00
# [FONT=blah] and [COLOR=blah]
raw . gsub! / \ [FONT=.*? \ ](.*?) \ [ \/ FONT \ ] /im , '\1'
raw . gsub! / \ [COLOR=.*? \ ](.*?) \ [ \/ COLOR \ ] /im , '\1'
raw . gsub! / \ [COLOR= # .*? \ ](.*?) \ [ \/ COLOR \ ] /im , '\1'
raw . gsub! / \ [SIZE=.*? \ ](.*?) \ [ \/ SIZE \ ] /im , '\1'
raw . gsub! / \ [h=.*? \ ](.*?) \ [ \/ h \ ] /im , '\1'
# [CENTER]...[/CENTER]
raw . gsub! / \ [CENTER \ ](.*?) \ [ \/ CENTER \ ] /im , '\1'
# [INDENT]...[/INDENT]
raw . gsub! / \ [INDENT \ ](.*?) \ [ \/ INDENT \ ] /im , '\1'
raw . gsub! / \ [TABLE \ ](.*?) \ [ \/ TABLE \ ] /im , '\1'
raw . gsub! / \ [TR \ ](.*?) \ [ \/ TR \ ] /im , '\1'
raw . gsub! / \ [TD \ ](.*?) \ [ \/ TD \ ] /im , '\1'
raw . gsub! / \ [TD="?.*?"? \ ](.*?) \ [ \/ TD \ ] /im , '\1'
2014-12-22 07:22:16 -05:00
# [QUOTE]...[/QUOTE]
2016-07-10 05:19:24 -04:00
raw . gsub! ( / \ [quote \ ](.+?) \ [ \/ quote \ ] /im ) { | quote |
quote . gsub! ( / \ [quote \ ](.+?) \ [ \/ quote \ ] /im ) { " \n #{ $1 } \n " }
quote . gsub! ( / \ n(.+?) / ) { " \n > #{ $1 } " }
}
2014-12-22 07:22:16 -05:00
# [QUOTE=<username>]...[/QUOTE]
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [quote=([^; \ ]]+) \ ](.+?) \ [ \/ quote \ ] /im ) do
2014-12-22 07:22:16 -05:00
old_username , quote = $1 , $2
if @old_username_to_new_usernames . has_key? ( old_username )
old_username = @old_username_to_new_usernames [ old_username ]
2014-08-18 07:04:08 -04:00
end
2014-12-22 07:22:16 -05:00
" \n [quote= \" #{ old_username } \" ] \n #{ quote } \n [/quote] \n "
2014-08-18 07:04:08 -04:00
end
2014-12-22 07:22:16 -05:00
# [YOUTUBE]<id>[/YOUTUBE]
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [youtube \ ](.+?) \ [ \/ youtube \ ] /i ) { " \n //youtu.be/ #{ $1 } \n " }
2014-12-22 07:22:16 -05:00
# [VIDEO=youtube;<id>]...[/VIDEO]
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [video=youtube;([^ \ ]]+) \ ].*? \ [ \/ video \ ] /i ) { " \n //youtu.be/ #{ $1 } \n " }
2014-12-22 07:22:16 -05:00
2016-07-10 05:19:24 -04:00
# More Additions ....
# [spoiler=Some hidden stuff]SPOILER HERE!![/spoiler]
raw . gsub! ( / \ [spoiler="?(.+?)"? \ ](.+?) \ [ \/ spoiler \ ] /im ) { " \n #{ $1 } \n [spoiler] #{ $2 } [/spoiler] \n " }
# [IMG][IMG]http://i63.tinypic.com/akga3r.jpg[/IMG][/IMG]
raw . gsub! ( / \ [IMG \ ] \ [IMG \ ](.+?) \ [ \/ IMG \ ] \ [ \/ IMG \ ] /i ) { " [IMG] #{ $1 } [/IMG] " }
# convert list tags to ul and list=1 tags to ol
# (basically, we're only missing list=a here...)
# (https://meta.discourse.org/t/phpbb-3-importer-old/17397)
raw . gsub! ( / \ [list \ ](.*?) \ [ \/ list \ ] /im , '[ul]\1[/ul]' )
raw . gsub! ( / \ [list=1 \ ](.*?) \ [ \/ list \ ] /im , '[ol]\1[/ol]' )
raw . gsub! ( / \ [list \ ](.*?) \ [ \/ list:u \ ] /im , '[ul]\1[/ul]' )
raw . gsub! ( / \ [list=1 \ ](.*?) \ [ \/ list:o \ ] /im , '[ol]\1[/ol]' )
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
raw . gsub! ( / \ [ \ * \ ] \ n / , '' )
raw . gsub! ( / \ [ \ * \ ](.*?) \ [ \/ \ *:m \ ] / , '[li]\1[/li]' )
raw . gsub! ( / \ [ \ * \ ](.*?) \ n / , '[li]\1[/li]' )
2016-12-05 07:11:59 -05:00
raw . gsub! ( / \ [ \ *=1 \ ] / , '' )
2016-07-10 05:19:24 -04:00
2014-12-22 07:22:16 -05:00
raw
end
2015-01-19 09:00:55 -05:00
def postprocess_post_raw ( raw )
# [QUOTE=<username>;<post_id>]...[/QUOTE]
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [quote=([^;]+);( \ d+) \ ](.+?) \ [ \/ quote \ ] /im ) do
2015-01-19 09:00:55 -05:00
old_username , post_id , quote = $1 , $2 , $3
if @old_username_to_new_usernames . has_key? ( old_username )
old_username = @old_username_to_new_usernames [ old_username ]
end
if topic_lookup = topic_lookup_from_imported_post_id ( post_id )
post_number = topic_lookup [ :post_number ]
topic_id = topic_lookup [ :topic_id ]
" \n [quote= \" #{ old_username } ,post: #{ post_number } ,topic: #{ topic_id } \" ] \n #{ quote } \n [/quote] \n "
else
" \n [quote= \" #{ old_username } \" ] \n #{ quote } \n [/quote] \n "
end
end
2015-05-15 07:26:53 -04:00
# remove attachments
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [attach[^ \ ]]* \ ] \ d+ \ [ \/ attach \ ] /i , " " )
2015-05-15 07:26:53 -04:00
2015-01-19 09:00:55 -05:00
# [THREAD]<thread_id>[/THREAD]
# ==> http://my.discourse.org/t/slug/<topic_id>
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [thread \ ]( \ d+) \ [ \/ thread \ ] /i ) do
2015-01-19 09:00:55 -05:00
thread_id = $1
if topic_lookup = topic_lookup_from_imported_post_id ( " thread- #{ thread_id } " )
topic_lookup [ :url ]
else
$&
end
end
# [THREAD=<thread_id>]...[/THREAD]
# ==> [...](http://my.discourse.org/t/slug/<topic_id>)
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [thread=( \ d+) \ ](.+?) \ [ \/ thread \ ] /i ) do
2015-01-19 09:00:55 -05:00
thread_id , link = $1 , $2
if topic_lookup = topic_lookup_from_imported_post_id ( " thread- #{ thread_id } " )
url = topic_lookup [ :url ]
" [ #{ link } ]( #{ url } ) "
else
$&
end
end
# [POST]<post_id>[/POST]
# ==> http://my.discourse.org/t/slug/<topic_id>/<post_number>
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [post \ ]( \ d+) \ [ \/ post \ ] /i ) do
2015-01-19 09:00:55 -05:00
post_id = $1
if topic_lookup = topic_lookup_from_imported_post_id ( post_id )
topic_lookup [ :url ]
else
$&
end
end
# [POST=<post_id>]...[/POST]
# ==> [...](http://my.discourse.org/t/<topic_slug>/<topic_id>/<post_number>)
2016-12-03 10:31:10 -05:00
raw . gsub! ( / \ [post=( \ d+) \ ](.+?) \ [ \/ post \ ] /i ) do
2015-01-19 09:00:55 -05:00
post_id , link = $1 , $2
if topic_lookup = topic_lookup_from_imported_post_id ( post_id )
url = topic_lookup [ :url ]
" [ #{ link } ]( #{ url } ) "
else
$&
end
end
raw
end
2016-07-10 05:19:24 -04:00
2016-12-05 07:11:59 -05:00
def create_permalink_file
puts '' , 'Creating Permalink File...' , ''
2016-07-10 05:19:24 -04:00
id_mapping = [ ]
Topic . listable_topics . find_each do | topic |
pcf = topic . first_post . custom_fields
if pcf && pcf [ " import_id " ]
id = pcf [ " import_id " ] . split ( '-' ) . last
id_mapping . push ( " XXX #{ id } YYY #{ topic . id } " )
end
end
# Category.find_each do |cat|
# ccf = cat.custom_fields
# if ccf && ccf["import_id"]
# id = ccf["import_id"].to_i
# id_mapping.push("/forumdisplay.php?#{id} http://forum.quartertothree.com#{cat.url}")
# end
# end
CSV . open ( File . expand_path ( " ../vb_map.csv " , __FILE__ ) , " w " ) do | csv |
id_mapping . each do | value |
csv << [ value ]
end
end
end
def suspend_users
puts '' , " updating banned users "
banned = 0
failed = 0
total = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } userban " ) . first [ 'count' ]
system_user = Discourse . system_user
mysql_query ( " SELECT userid, bandate FROM #{ TABLE_PREFIX } userban " ) . each do | b |
2016-12-05 07:11:59 -05:00
user = User . find_by_id ( user_id_from_imported_user_id ( b [ 'userid' ] ) )
2016-07-10 05:19:24 -04:00
if user
user . suspended_at = parse_timestamp ( user [ " bandate " ] )
user . suspended_till = 200 . years . from_now
if user . save
StaffActionLogger . new ( system_user ) . log_user_suspend ( user , " banned during initial import " )
banned += 1
else
puts " Failed to suspend user #{ user . username } . #{ user . errors . try ( :full_messages ) . try ( :inspect ) } "
failed += 1
end
else
puts " Not found: #{ b [ 'userid' ] } "
failed += 1
end
print_status banned + failed , total
end
end
2015-01-19 09:00:55 -05:00
def parse_timestamp ( timestamp )
Time . zone . at ( @tz . utc_to_local ( timestamp ) )
end
2014-12-22 07:22:16 -05:00
def fake_email
SecureRandom . hex << " @domain.com "
end
def mysql_query ( sql )
2015-10-21 13:07:31 -04:00
@client . query ( sql , cache_rows : true )
2014-12-22 07:22:16 -05:00
end
2014-08-18 07:04:08 -04:00
end
2014-12-22 07:22:16 -05:00
ImportScripts :: VBulletin . new . perform