diff --git a/script/import_scripts/phpbb3.rb b/script/import_scripts/phpbb3.rb
index 9e4758246cb..112d6f4a87a 100644
--- a/script/import_scripts/phpbb3.rb
+++ b/script/import_scripts/phpbb3.rb
@@ -7,6 +7,9 @@ class ImportScripts::PhpBB3 < ImportScripts::Base
PHPBB_DB = "phpbb"
BATCH_SIZE = 1000
+ ORIGINAL_SITE_PREFIX = "oldsite.example.com/forums" # without http(s)://
+ NEW_SITE_PREFIX = "http://discourse.example.com" # with http:// or https://
+
def initialize
super
@@ -94,6 +97,7 @@ class ImportScripts::PhpBB3 < ImportScripts::Base
FROM phpbb_posts p,
phpbb_topics t
WHERE p.topic_id = t.topic_id
+ ORDER BY id
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
@@ -106,7 +110,7 @@ class ImportScripts::PhpBB3 < ImportScripts::Base
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
- mapped[:raw] = decode_phpbb_post(m['raw'])
+ mapped[:raw] = process_phpbb_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['post_time'])
if m['id'] == m['first_post_id']
@@ -154,7 +158,7 @@ class ImportScripts::PhpBB3 < ImportScripts::Base
mapped[:id] = "pm:#{m['id']}"
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
- mapped[:raw] = decode_phpbb_post(m['message_text'])
+ mapped[:raw] = process_phpbb_post(m['message_text'], m['id'])
mapped[:created_at] = Time.zone.at(m['message_time'])
if m['root_level'] == 0
@@ -226,16 +230,17 @@ class ImportScripts::PhpBB3 < ImportScripts::Base
end
end
- def mysql_query(sql)
- @client.query(sql, cache_rows: false)
- end
-
- def decode_phpbb_post(raw)
+ def process_phpbb_post(raw, import_id)
s = raw.dup
# :) is encoded as
s.gsub!(/(?:.*)/, '\1')
+ # Internal forum links of this form: viewtopic.php?f=26&t=3412
+ s.gsub!(/viewtopic(?:.*)t=(\d+)<\/a>/) do |phpbb_link|
+ replace_internal_link(phpbb_link, $1, import_id)
+ end
+
# Some links look like this: http://www.onegameamonth.com
s.gsub!(/(.+)<\/a>/, '[\2](\1)')
@@ -244,7 +249,46 @@ class ImportScripts::PhpBB3 < ImportScripts::Base
# [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
- CGI.unescapeHTML(s)
+ s = CGI.unescapeHTML(s)
+
+ # phpBB shortens link text like this, which breaks our markdown processing:
+ # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
+ #
+ # Work around it for now:
+ s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
+
+ # Replace internal forum links that aren't in the format
+ s.gsub!(internal_url_regexp) do |phpbb_link|
+ replace_internal_link(phpbb_link, $1, import_id)
+ end
+
+ s
+ end
+
+ def replace_internal_link(phpbb_link, import_topic_id, from_import_post_id)
+ results = mysql_query("select topic_first_post_id from phpbb_topics where topic_id = #{import_topic_id}")
+
+ return phpbb_link unless results.size > 0
+
+ linked_topic_id = results.first['topic_first_post_id']
+ lookup = topic_lookup_from_imported_post_id(linked_topic_id)
+
+ return phpbb_link unless lookup
+
+ t = Topic.find_by_id(lookup[:topic_id])
+ if t
+ "#{NEW_SITE_PREFIX}/t/#{t.slug}/#{t.id}"
+ else
+ phpbb_link
+ end
+ end
+
+ def internal_url_regexp
+ @internal_url_regexp ||= Regexp.new("http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}/viewtopic\\.php?(?:\\S*)t=(\\d+)")
+ end
+
+ def mysql_query(sql)
+ @client.query(sql, cache_rows: false)
end
end