From b716886240d044b1770e0657e5e5894254bbfeb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Hanol?= Date: Mon, 7 Mar 2016 18:21:09 +0100 Subject: [PATCH] update bbPress importer --- script/import_scripts/base.rb | 10 +- script/import_scripts/bbpress.rb | 235 ++++++++++++++++++++----------- 2 files changed, 157 insertions(+), 88 deletions(-) diff --git a/script/import_scripts/base.rb b/script/import_scripts/base.rb index 63c55de5904..5c4cd5bc800 100644 --- a/script/import_scripts/base.rb +++ b/script/import_scripts/base.rb @@ -197,20 +197,20 @@ class ImportScripts::Base def all_records_exist?(type, import_ids) return false if import_ids.empty? - Post.exec_sql('create temp table import_ids(val varchar(200) primary key)') + Post.exec_sql('CREATE TEMP TABLE import_ids(val varchar(200) PRIMARY KEY)') - import_id_clause = import_ids.map{|id| "('#{PG::Connection.escape_string(id.to_s)}')"}.join(",") - Post.exec_sql("insert into import_ids values #{import_id_clause}") + import_id_clause = import_ids.map { |id| "('#{PG::Connection.escape_string(id.to_s)}')" }.join(",") + Post.exec_sql("INSERT INTO import_ids VALUES #{import_id_clause}") existing = "#{type.to_s.classify}CustomField".constantize.where(name: 'import_id') - existing = existing.joins('JOIN import_ids ON val=value') + existing = existing.joins('JOIN import_ids ON val = value') if existing.count == import_ids.length puts "Skipping #{import_ids.length} already imported #{type}" return true end ensure - Post.exec_sql('drop table import_ids') + Post.exec_sql('DROP TABLE import_ids') end # Iterate through a list of user records to be imported. diff --git a/script/import_scripts/bbpress.rb b/script/import_scripts/bbpress.rb index aba552d11e0..87ee57bdd14 100644 --- a/script/import_scripts/bbpress.rb +++ b/script/import_scripts/bbpress.rb @@ -1,124 +1,193 @@ -# `dropdb bbpress` -# `createdb bbpress` -# `bundle exec rake db:migrate` - require 'mysql2' require File.expand_path(File.dirname(__FILE__) + "/base.rb") -BB_PRESS_DB = ENV['BBPRESS_DB'] || "bbpress" -DB_TABLE_PREFIX = "wp_" - class ImportScripts::Bbpress < ImportScripts::Base + BB_PRESS_DB ||= ENV['BBPRESS_DB'] || "bbpress" + BATCH_SIZE ||= 1000 + def initialize super @client = Mysql2::Client.new( host: "localhost", username: "root", - #password: "password", - database: BB_PRESS_DB + database: BB_PRESS_DB, ) end - def table_name(name) - DB_TABLE_PREFIX + name - end - def execute - users_results = @client.query(" - SELECT id, - user_login username, - display_name name, - user_url website, - user_email email, - user_registered created_at - FROM #{table_name 'users'}", cache_rows: false) - - puts '', "creating users" - - create_users(users_results) do |u| - ActiveSupport::HashWithIndifferentAccess.new(u) - end - - - puts '', '', "creating categories" - - create_categories(@client.query("SELECT id, post_name, post_parent from #{table_name 'posts'} WHERE post_type = 'forum' AND post_name != '' ORDER BY post_parent")) do |c| - result = {id: c['id'], name: c['post_name']} - parent_id = c['post_parent'].to_i - if parent_id > 0 - result[:parent_category_id] = category_id_from_imported_category_id(parent_id) - end - result - end - - import_posts + import_users + import_categories + import_topics_and_posts end - def import_posts - puts '', "creating topics and posts" + def import_users + puts "", "importing users..." - total_count = @client.query(" - SELECT count(*) count - FROM #{table_name 'posts'} + last_user_id = -1 + total_users = bbpress_query("SELECT COUNT(*) count FROM wp_users WHERE user_email LIKE '%@%'").first["count"] + + batches(BATCH_SIZE) do |offset| + users = bbpress_query(<<-SQL + SELECT id, user_nicename, display_name, user_email, user_registered, user_url + FROM wp_users + WHERE user_email LIKE '%@%' + AND id > #{last_user_id} + ORDER BY id + LIMIT #{BATCH_SIZE} + SQL + ).to_a + + break if users.empty? + + last_user_id = users[-1]["id"] + user_ids = users.map { |u| u["id"].to_i } + + next if all_records_exist?(:users, user_ids) + + user_ids_sql = user_ids.join(",") + + users_description = {} + bbpress_query(<<-SQL + SELECT user_id, meta_value description + FROM wp_usermeta + WHERE user_id IN (#{user_ids_sql}) + AND meta_key = 'description' + SQL + ).each { |um| users_description[um["user_id"]] = um["description"] } + + users_last_activity = {} + bbpress_query(<<-SQL + SELECT user_id, meta_value last_activity + FROM wp_usermeta + WHERE user_id IN (#{user_ids_sql}) + AND meta_key = 'last_activity' + SQL + ).each { |um| users_last_activity[um["user_id"]] = um["last_activity"] } + + create_users(users, total: total_users, offset: offset) do |u| + { + id: u["id"].to_i, + username: u["user_nicename"], + email: u["user_email"].downcase, + name: u["display_name"], + created_at: u["user_registered"], + website: u["user_url"], + bio_raw: users_description[u["id"]], + last_seen_at: users_last_activity[u["id"]], + } + end + end + end + + def import_categories + puts "", "importing categories..." + + categories = bbpress_query(<<-SQL + SELECT id, post_name, post_parent + FROM wp_posts + WHERE post_type = 'forum' + AND LENGTH(COALESCE(post_name, '')) > 0 + ORDER BY post_parent, id + SQL + ) + + create_categories(categories) do |c| + category = { id: c['id'], name: c['post_name'] } + if (parent_id = c['post_parent'].to_i) > 0 + category[:parent_category_id] = category_id_from_imported_category_id(parent_id) + end + category + end + end + + def import_topics_and_posts + puts "", "importing topics and posts..." + + last_post_id = -1 + total_posts = bbpress_query(<<-SQL + SELECT COUNT(*) count + FROM wp_posts WHERE post_status <> 'spam' - AND post_type IN ('topic', 'reply')").first['count'] + AND post_type IN ('topic', 'reply') + SQL + ).first["count"] - batch_size = 1000 + batches(BATCH_SIZE) do |offset| + posts = bbpress_query(<<-SQL + SELECT id, + post_author, + post_date, + post_content, + post_title, + post_type, + post_parent + FROM wp_posts + WHERE post_status <> 'spam' + AND post_type IN ('topic', 'reply') + AND id > #{last_post_id} + ORDER BY id + LIMIT #{BATCH_SIZE} + SQL + ).to_a - batches(batch_size) do |offset| - results = @client.query(" - SELECT id, - post_author, - post_date, - post_content, - post_title, - post_type, - post_parent - FROM #{table_name 'posts'} - WHERE post_status <> 'spam' - AND post_type IN ('topic', 'reply') - ORDER BY id - LIMIT #{batch_size} - OFFSET #{offset}", cache_rows: false) + break if posts.empty? - break if results.size < 1 + last_post_id = posts[-1]["id"].to_i + post_ids = posts.map { |p| p["id"].to_i } - next if all_records_exist? :posts, results.map {|p| p["id"].to_i} + next if all_records_exist?(:posts, post_ids) - create_posts(results, total: total_count, offset: offset) do |post| + post_ids_sql = post_ids.join(",") + + posts_likes = {} + bbpress_query(<<-SQL + SELECT post_id, meta_value likes + FROM wp_postmeta + WHERE post_id IN (#{post_ids_sql}) + AND meta_key = 'Likes' + SQL + ).each { |pm| posts_likes[pm["post_id"]] = pm["likes"].to_i } + + create_posts(posts, total: total_posts, offset: offset) do |p| skip = false - mapped = {} - mapped[:id] = post["id"] - mapped[:user_id] = user_id_from_imported_user_id(post["post_author"]) || find_user_by_import_id(post["post_author"]).try(:id) || -1 - mapped[:raw] = post["post_content"] - if mapped[:raw] - mapped[:raw] = mapped[:raw].gsub("
", "```\n").gsub("
", "\n```") + post = { + id: p["id"], + user_id: user_id_from_imported_user_id(p["post_author"]) || find_user_by_import_id(p["post_author"]).try(:id) || -1, + raw: p["post_content"], + created_at: p["post_date"], + like_count: posts_likes[p["id"]], + } + + if post[:raw].present? + post[:raw].gsub!("
", "```\n")
+          post[:raw].gsub!("
", "\n```") end - mapped[:created_at] = post["post_date"] - mapped[:custom_fields] = {import_id: post["id"]} - if post["post_type"] == "topic" - mapped[:category] = category_id_from_imported_category_id(post["post_parent"]) - mapped[:title] = CGI.unescapeHTML post["post_title"] + if p["post_type"] == "topic" + post[:category] = category_id_from_imported_category_id(p["post_parent"]) + post[:title] = CGI.unescapeHTML(p["post_title"]) else - parent = topic_lookup_from_imported_post_id(post["post_parent"]) - if parent - mapped[:topic_id] = parent[:topic_id] - mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1 + if parent = topic_lookup_from_imported_post_id(p["post_parent"]) + post[:topic_id] = parent[:topic_id] + post[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1 else - puts "Skipping #{post["id"]}: #{post["post_content"][0..40]}" + puts "Skipping #{p["id"]}: #{p["post_content"][0..40]}" skip = true end end - skip ? nil : mapped + skip ? nil : post end end end + def bbpress_query(sql) + @client.query(sql, cache_rows: false) + end + end ImportScripts::Bbpress.new.perform