some improvements for importers (#5295)
* decode html entities within code blocks * Only import users that actually participated in the bbpress part of Wordpress; import password hashes * create permalinks for topics * Better handling of [code] blocks
This commit is contained in:
parent
faf8bba9a6
commit
4f0bdec370
|
@ -22,6 +22,8 @@ class ImportScripts::Bbpress < ImportScripts::Base
|
|||
def initialize
|
||||
super
|
||||
|
||||
@he = HTMLEntities.new
|
||||
|
||||
@client = Mysql2::Client.new(
|
||||
host: BB_PRESS_HOST,
|
||||
username: BB_PRESS_USER,
|
||||
|
@ -36,21 +38,32 @@ class ImportScripts::Bbpress < ImportScripts::Base
|
|||
import_categories
|
||||
import_topics_and_posts
|
||||
import_private_messages
|
||||
create_permalinks
|
||||
end
|
||||
|
||||
def import_users
|
||||
puts "", "importing users..."
|
||||
|
||||
last_user_id = -1
|
||||
total_users = bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}users WHERE user_email LIKE '%@%'").first["count"]
|
||||
total_users = bbpress_query(<<-SQL
|
||||
SELECT COUNT(DISTINCT(u.id)) AS cnt
|
||||
FROM #{BB_PRESS_PREFIX}users u
|
||||
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
|
||||
WHERE p.post_type IN ('forum', 'reply', 'topic')
|
||||
AND user_email LIKE '%@%'
|
||||
SQL
|
||||
).first["cnt"]
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
users = bbpress_query(<<-SQL
|
||||
SELECT id, user_nicename, display_name, user_email, user_registered, user_url
|
||||
FROM #{BB_PRESS_PREFIX}users
|
||||
SELECT u.id, user_nicename, display_name, user_email, user_registered, user_url, user_pass
|
||||
FROM #{BB_PRESS_PREFIX}users u
|
||||
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
|
||||
WHERE user_email LIKE '%@%'
|
||||
AND id > #{last_user_id}
|
||||
ORDER BY id
|
||||
AND p.post_type IN ('forum', 'reply', 'topic')
|
||||
AND u.id > #{last_user_id}
|
||||
GROUP BY u.id
|
||||
ORDER BY u.id
|
||||
LIMIT #{BATCH_SIZE}
|
||||
SQL
|
||||
).to_a
|
||||
|
@ -86,6 +99,7 @@ class ImportScripts::Bbpress < ImportScripts::Base
|
|||
{
|
||||
id: u["id"].to_i,
|
||||
username: u["user_nicename"],
|
||||
password: u["user_pass"],
|
||||
email: u["user_email"].downcase,
|
||||
name: u["display_name"].presence || u['user_nicename'],
|
||||
created_at: u["user_registered"],
|
||||
|
@ -242,8 +256,7 @@ class ImportScripts::Bbpress < ImportScripts::Base
|
|||
}
|
||||
|
||||
if post[:raw].present?
|
||||
post[:raw].gsub!("<pre><code>", "```\n")
|
||||
post[:raw].gsub!("</code></pre>", "\n```")
|
||||
post[:raw].gsub!(/\<pre\>\<code(=[a-z]*)?\>(.*?)\<\/code\>\<\/pre\>/im) { "```\n#{@he.decode($2)}\n```" }
|
||||
end
|
||||
|
||||
if p["post_type"] == "topic"
|
||||
|
@ -264,6 +277,40 @@ class ImportScripts::Bbpress < ImportScripts::Base
|
|||
end
|
||||
end
|
||||
|
||||
def create_permalinks
|
||||
puts "", "creating permalinks..."
|
||||
|
||||
last_topic_id = -1
|
||||
total_topics = bbpress_query(<<-SQL
|
||||
SELECT COUNT(*) count
|
||||
FROM #{BB_PRESS_PREFIX}posts
|
||||
WHERE post_status <> 'spam'
|
||||
AND post_type IN ('topic')
|
||||
SQL
|
||||
).first["count"]
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
topics = bbpress_query(<<-SQL
|
||||
SELECT id,
|
||||
guid
|
||||
FROM #{BB_PRESS_PREFIX}posts
|
||||
WHERE post_status <> 'spam'
|
||||
AND post_type IN ('topic')
|
||||
AND id > #{last_topic_id}
|
||||
ORDER BY id
|
||||
LIMIT #{BATCH_SIZE}
|
||||
SQL
|
||||
).to_a
|
||||
break if topics.empty?
|
||||
|
||||
topics.each do |t|
|
||||
topic = topic_lookup_from_imported_post_id(t['id'])
|
||||
Permalink.create( url: URI.parse(t['guid']).path.chomp('/'), topic_id: topic[:topic_id] ) rescue nil
|
||||
end
|
||||
last_topic_id = topics[-1]["id"].to_i
|
||||
end
|
||||
end
|
||||
|
||||
def import_private_messages
|
||||
puts "", "importing private messages..."
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ module ImportScripts::PhpBB3
|
|||
@lookup = lookup
|
||||
@database = database
|
||||
@smiley_processor = smiley_processor
|
||||
@he = HTMLEntities.new
|
||||
|
||||
@settings = settings
|
||||
@new_site_prefix = settings.new_site_prefix
|
||||
|
@ -25,7 +26,7 @@ module ImportScripts::PhpBB3
|
|||
process_smilies(text)
|
||||
process_links(text)
|
||||
process_lists(text)
|
||||
|
||||
process_code(text)
|
||||
text
|
||||
end
|
||||
|
||||
|
@ -48,6 +49,9 @@ module ImportScripts::PhpBB3
|
|||
# [url=https://google.com:1qh1i7ky]click here[/url:1qh1i7ky]
|
||||
# [quote="cybereality":b0wtlzex]Some text.[/quote:b0wtlzex]
|
||||
text.gsub!(/:(?:\w{8})\]/, ']')
|
||||
|
||||
# remove color tags
|
||||
text.gsub!(/\[\/?color(=#[a-z0-9]*)?\]/i, "")
|
||||
end
|
||||
|
||||
def bbcode_to_md(text)
|
||||
|
@ -142,5 +146,12 @@ module ImportScripts::PhpBB3
|
|||
@long_internal_link_regexp = Regexp.new(%Q|<!-- l --><a(?:.+)href="#{link_regex}"(?:.*)</a><!-- l -->|, Regexp::IGNORECASE)
|
||||
@short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE)
|
||||
end
|
||||
|
||||
def process_code(text)
|
||||
text.gsub!(/<span class="syntax.*?>(.*?)<\/span>/) {"#{$1}"}
|
||||
text.gsub!(/\[code(=[a-z]*)?\](.*?)\[\/code\]/i) { "[code]#{@he.decode($2)}[/code]" }
|
||||
text.gsub!(/<br \/>/, "\n")
|
||||
text
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue