update bbPress importer

This commit is contained in:
Régis Hanol 2016-03-07 18:21:09 +01:00
parent 5741717322
commit b716886240
2 changed files with 157 additions and 88 deletions

View File

@ -197,20 +197,20 @@ class ImportScripts::Base
def all_records_exist?(type, import_ids) def all_records_exist?(type, import_ids)
return false if import_ids.empty? return false if import_ids.empty?
Post.exec_sql('create temp table import_ids(val varchar(200) primary key)') Post.exec_sql('CREATE TEMP TABLE import_ids(val varchar(200) PRIMARY KEY)')
import_id_clause = import_ids.map{|id| "('#{PG::Connection.escape_string(id.to_s)}')"}.join(",") import_id_clause = import_ids.map { |id| "('#{PG::Connection.escape_string(id.to_s)}')" }.join(",")
Post.exec_sql("insert into import_ids values #{import_id_clause}") Post.exec_sql("INSERT INTO import_ids VALUES #{import_id_clause}")
existing = "#{type.to_s.classify}CustomField".constantize.where(name: 'import_id') existing = "#{type.to_s.classify}CustomField".constantize.where(name: 'import_id')
existing = existing.joins('JOIN import_ids ON val=value') existing = existing.joins('JOIN import_ids ON val = value')
if existing.count == import_ids.length if existing.count == import_ids.length
puts "Skipping #{import_ids.length} already imported #{type}" puts "Skipping #{import_ids.length} already imported #{type}"
return true return true
end end
ensure ensure
Post.exec_sql('drop table import_ids') Post.exec_sql('DROP TABLE import_ids')
end end
# Iterate through a list of user records to be imported. # Iterate through a list of user records to be imported.

View File

@ -1,74 +1,121 @@
# `dropdb bbpress`
# `createdb bbpress`
# `bundle exec rake db:migrate`
require 'mysql2' require 'mysql2'
require File.expand_path(File.dirname(__FILE__) + "/base.rb") require File.expand_path(File.dirname(__FILE__) + "/base.rb")
BB_PRESS_DB = ENV['BBPRESS_DB'] || "bbpress"
DB_TABLE_PREFIX = "wp_"
class ImportScripts::Bbpress < ImportScripts::Base class ImportScripts::Bbpress < ImportScripts::Base
BB_PRESS_DB ||= ENV['BBPRESS_DB'] || "bbpress"
BATCH_SIZE ||= 1000
def initialize def initialize
super super
@client = Mysql2::Client.new( @client = Mysql2::Client.new(
host: "localhost", host: "localhost",
username: "root", username: "root",
#password: "password", database: BB_PRESS_DB,
database: BB_PRESS_DB
) )
end end
def table_name(name)
DB_TABLE_PREFIX + name
end
def execute def execute
users_results = @client.query(" import_users
SELECT id, import_categories
user_login username, import_topics_and_posts
display_name name,
user_url website,
user_email email,
user_registered created_at
FROM #{table_name 'users'}", cache_rows: false)
puts '', "creating users"
create_users(users_results) do |u|
ActiveSupport::HashWithIndifferentAccess.new(u)
end end
def import_users
puts "", "importing users..."
puts '', '', "creating categories" last_user_id = -1
total_users = bbpress_query("SELECT COUNT(*) count FROM wp_users WHERE user_email LIKE '%@%'").first["count"]
create_categories(@client.query("SELECT id, post_name, post_parent from #{table_name 'posts'} WHERE post_type = 'forum' AND post_name != '' ORDER BY post_parent")) do |c| batches(BATCH_SIZE) do |offset|
result = {id: c['id'], name: c['post_name']} users = bbpress_query(<<-SQL
parent_id = c['post_parent'].to_i SELECT id, user_nicename, display_name, user_email, user_registered, user_url
if parent_id > 0 FROM wp_users
result[:parent_category_id] = category_id_from_imported_category_id(parent_id) WHERE user_email LIKE '%@%'
AND id > #{last_user_id}
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
last_user_id = users[-1]["id"]
user_ids = users.map { |u| u["id"].to_i }
next if all_records_exist?(:users, user_ids)
user_ids_sql = user_ids.join(",")
users_description = {}
bbpress_query(<<-SQL
SELECT user_id, meta_value description
FROM wp_usermeta
WHERE user_id IN (#{user_ids_sql})
AND meta_key = 'description'
SQL
).each { |um| users_description[um["user_id"]] = um["description"] }
users_last_activity = {}
bbpress_query(<<-SQL
SELECT user_id, meta_value last_activity
FROM wp_usermeta
WHERE user_id IN (#{user_ids_sql})
AND meta_key = 'last_activity'
SQL
).each { |um| users_last_activity[um["user_id"]] = um["last_activity"] }
create_users(users, total: total_users, offset: offset) do |u|
{
id: u["id"].to_i,
username: u["user_nicename"],
email: u["user_email"].downcase,
name: u["display_name"],
created_at: u["user_registered"],
website: u["user_url"],
bio_raw: users_description[u["id"]],
last_seen_at: users_last_activity[u["id"]],
}
end
end end
result
end end
import_posts def import_categories
puts "", "importing categories..."
categories = bbpress_query(<<-SQL
SELECT id, post_name, post_parent
FROM wp_posts
WHERE post_type = 'forum'
AND LENGTH(COALESCE(post_name, '')) > 0
ORDER BY post_parent, id
SQL
)
create_categories(categories) do |c|
category = { id: c['id'], name: c['post_name'] }
if (parent_id = c['post_parent'].to_i) > 0
category[:parent_category_id] = category_id_from_imported_category_id(parent_id)
end
category
end
end end
def import_posts def import_topics_and_posts
puts '', "creating topics and posts" puts "", "importing topics and posts..."
total_count = @client.query(" last_post_id = -1
SELECT count(*) count total_posts = bbpress_query(<<-SQL
FROM #{table_name 'posts'} SELECT COUNT(*) count
FROM wp_posts
WHERE post_status <> 'spam' WHERE post_status <> 'spam'
AND post_type IN ('topic', 'reply')").first['count'] AND post_type IN ('topic', 'reply')
SQL
).first["count"]
batch_size = 1000 batches(BATCH_SIZE) do |offset|
posts = bbpress_query(<<-SQL
batches(batch_size) do |offset|
results = @client.query("
SELECT id, SELECT id,
post_author, post_author,
post_date, post_date,
@ -76,49 +123,71 @@ class ImportScripts::Bbpress < ImportScripts::Base
post_title, post_title,
post_type, post_type,
post_parent post_parent
FROM #{table_name 'posts'} FROM wp_posts
WHERE post_status <> 'spam' WHERE post_status <> 'spam'
AND post_type IN ('topic', 'reply') AND post_type IN ('topic', 'reply')
AND id > #{last_post_id}
ORDER BY id ORDER BY id
LIMIT #{batch_size} LIMIT #{BATCH_SIZE}
OFFSET #{offset}", cache_rows: false) SQL
).to_a
break if results.size < 1 break if posts.empty?
next if all_records_exist? :posts, results.map {|p| p["id"].to_i} last_post_id = posts[-1]["id"].to_i
post_ids = posts.map { |p| p["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |post| next if all_records_exist?(:posts, post_ids)
post_ids_sql = post_ids.join(",")
posts_likes = {}
bbpress_query(<<-SQL
SELECT post_id, meta_value likes
FROM wp_postmeta
WHERE post_id IN (#{post_ids_sql})
AND meta_key = 'Likes'
SQL
).each { |pm| posts_likes[pm["post_id"]] = pm["likes"].to_i }
create_posts(posts, total: total_posts, offset: offset) do |p|
skip = false skip = false
mapped = {}
mapped[:id] = post["id"] post = {
mapped[:user_id] = user_id_from_imported_user_id(post["post_author"]) || find_user_by_import_id(post["post_author"]).try(:id) || -1 id: p["id"],
mapped[:raw] = post["post_content"] user_id: user_id_from_imported_user_id(p["post_author"]) || find_user_by_import_id(p["post_author"]).try(:id) || -1,
if mapped[:raw] raw: p["post_content"],
mapped[:raw] = mapped[:raw].gsub("<pre><code>", "```\n").gsub("</code></pre>", "\n```") created_at: p["post_date"],
like_count: posts_likes[p["id"]],
}
if post[:raw].present?
post[:raw].gsub!("<pre><code>", "```\n")
post[:raw].gsub!("</code></pre>", "\n```")
end end
mapped[:created_at] = post["post_date"]
mapped[:custom_fields] = {import_id: post["id"]}
if post["post_type"] == "topic" if p["post_type"] == "topic"
mapped[:category] = category_id_from_imported_category_id(post["post_parent"]) post[:category] = category_id_from_imported_category_id(p["post_parent"])
mapped[:title] = CGI.unescapeHTML post["post_title"] post[:title] = CGI.unescapeHTML(p["post_title"])
else else
parent = topic_lookup_from_imported_post_id(post["post_parent"]) if parent = topic_lookup_from_imported_post_id(p["post_parent"])
if parent post[:topic_id] = parent[:topic_id]
mapped[:topic_id] = parent[:topic_id] post[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1
mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1
else else
puts "Skipping #{post["id"]}: #{post["post_content"][0..40]}" puts "Skipping #{p["id"]}: #{p["post_content"][0..40]}"
skip = true skip = true
end end
end end
skip ? nil : mapped skip ? nil : post
end end
end end
end end
def bbpress_query(sql)
@client.query(sql, cache_rows: false)
end
end end
ImportScripts::Bbpress.new.perform ImportScripts::Bbpress.new.perform