some improvements for importers (#5295)

* decode html entities within code blocks

* Only import users that actually participated in the bbpress part of Wordpress; import password hashes

* create permalinks for topics

* Better handling of [code] blocks
This commit is contained in:
discoursehosting 2017-11-07 17:50:43 +01:00 committed by Régis Hanol
parent faf8bba9a6
commit 4f0bdec370
2 changed files with 66 additions and 8 deletions

View File

@ -22,6 +22,8 @@ class ImportScripts::Bbpress < ImportScripts::Base
def initialize
super
@he = HTMLEntities.new
@client = Mysql2::Client.new(
host: BB_PRESS_HOST,
username: BB_PRESS_USER,
@ -36,21 +38,32 @@ class ImportScripts::Bbpress < ImportScripts::Base
import_categories
import_topics_and_posts
import_private_messages
create_permalinks
end
def import_users
puts "", "importing users..."
last_user_id = -1
total_users = bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}users WHERE user_email LIKE '%@%'").first["count"]
total_users = bbpress_query(<<-SQL
SELECT COUNT(DISTINCT(u.id)) AS cnt
FROM #{BB_PRESS_PREFIX}users u
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
WHERE p.post_type IN ('forum', 'reply', 'topic')
AND user_email LIKE '%@%'
SQL
).first["cnt"]
batches(BATCH_SIZE) do |offset|
users = bbpress_query(<<-SQL
SELECT id, user_nicename, display_name, user_email, user_registered, user_url
FROM #{BB_PRESS_PREFIX}users
SELECT u.id, user_nicename, display_name, user_email, user_registered, user_url, user_pass
FROM #{BB_PRESS_PREFIX}users u
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
WHERE user_email LIKE '%@%'
AND id > #{last_user_id}
ORDER BY id
AND p.post_type IN ('forum', 'reply', 'topic')
AND u.id > #{last_user_id}
GROUP BY u.id
ORDER BY u.id
LIMIT #{BATCH_SIZE}
SQL
).to_a
@ -86,6 +99,7 @@ class ImportScripts::Bbpress < ImportScripts::Base
{
id: u["id"].to_i,
username: u["user_nicename"],
password: u["user_pass"],
email: u["user_email"].downcase,
name: u["display_name"].presence || u['user_nicename'],
created_at: u["user_registered"],
@ -242,8 +256,7 @@ class ImportScripts::Bbpress < ImportScripts::Base
}
if post[:raw].present?
post[:raw].gsub!("<pre><code>", "```\n")
post[:raw].gsub!("</code></pre>", "\n```")
post[:raw].gsub!(/\<pre\>\<code(=[a-z]*)?\>(.*?)\<\/code\>\<\/pre\>/im) { "```\n#{@he.decode($2)}\n```" }
end
if p["post_type"] == "topic"
@ -264,6 +277,40 @@ class ImportScripts::Bbpress < ImportScripts::Base
end
end
def create_permalinks
puts "", "creating permalinks..."
last_topic_id = -1
total_topics = bbpress_query(<<-SQL
SELECT COUNT(*) count
FROM #{BB_PRESS_PREFIX}posts
WHERE post_status <> 'spam'
AND post_type IN ('topic')
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
topics = bbpress_query(<<-SQL
SELECT id,
guid
FROM #{BB_PRESS_PREFIX}posts
WHERE post_status <> 'spam'
AND post_type IN ('topic')
AND id > #{last_topic_id}
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if topics.empty?
topics.each do |t|
topic = topic_lookup_from_imported_post_id(t['id'])
Permalink.create( url: URI.parse(t['guid']).path.chomp('/'), topic_id: topic[:topic_id] ) rescue nil
end
last_topic_id = topics[-1]["id"].to_i
end
end
def import_private_messages
puts "", "importing private messages..."

View File

@ -8,6 +8,7 @@ module ImportScripts::PhpBB3
@lookup = lookup
@database = database
@smiley_processor = smiley_processor
@he = HTMLEntities.new
@settings = settings
@new_site_prefix = settings.new_site_prefix
@ -25,7 +26,7 @@ module ImportScripts::PhpBB3
process_smilies(text)
process_links(text)
process_lists(text)
process_code(text)
text
end
@ -48,6 +49,9 @@ module ImportScripts::PhpBB3
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
text.gsub!(/:(?:\w{8})\]/, ']')
# remove color tags
text.gsub!(/\[\/?color(=#[a-z0-9]*)?\]/i, "")
end
def bbcode_to_md(text)
@ -142,5 +146,12 @@ module ImportScripts::PhpBB3
@long_internal_link_regexp = Regexp.new(%Q|<!-- l --><a(?:.+)href="#{link_regex}"(?:.*)</a><!-- l -->|, Regexp::IGNORECASE)
@short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE)
end
def process_code(text)
text.gsub!(/<span class="syntax.*?>(.*?)<\/span>/) {"#{$1}"}
text.gsub!(/\[code(=[a-z]*)?\](.*?)\[\/code\]/i) { "[code]#{@he.decode($2)}[/code]" }
text.gsub!(/<br \/>/, "\n")
text
end
end
end