discourse/script/import_scripts/drupal.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

532 lines
14 KiB
Ruby
Raw Permalink Normal View History

# frozen_string_literal: true
require "mysql2"
require "htmlentities"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Drupal < ImportScripts::Base
DRUPAL_DB = ENV["DRUPAL_DB"] || "drupal"
VID = ENV["DRUPAL_VID"] || 1
BATCH_SIZE = 1000
ATTACHMENT_DIR = "/root/files/upload"
def initialize
super
@htmlentities = HTMLEntities.new
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: DRUPAL_DB,
)
end
def execute
import_users
import_categories
# "Nodes" in Drupal are divided into types. Here we import two types,
# and will later import all the comments/replies for each node.
# You will need to figure out what the type names are on your install and edit the queries to match.
import_blog_topics if ENV["DRUPAL_IMPORT_BLOG"]
import_forum_topics
import_replies
import_likes
mark_topics_as_solved
import_sso_records
import_attachments
postprocess_posts
create_permalinks
import_gravatars
end
def import_users
puts "", "importing users"
user_count = mysql_query("SELECT count(uid) count FROM users").first["count"]
last_user_id = -1
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<-SQL).to_a
SELECT uid,
name username,
mail email,
created
FROM users
WHERE uid > #{last_user_id}
ORDER BY uid
LIMIT #{BATCH_SIZE}
SQL
break if users.empty?
last_user_id = users[-1]["uid"]
users.reject! { |u| @lookup.user_already_imported?(u["uid"]) }
create_users(users, total: user_count, offset: offset) do |user|
email = user["email"].presence || fake_email
email = fake_email if !EmailAddressValidator.valid_value?(email)
username = @htmlentities.decode(user["username"]).strip
{ id: user["uid"], name: username, email: email, created_at: Time.zone.at(user["created"]) }
end
end
end
def import_categories
# You'll need to edit the following query for your Drupal install:
#
# * Drupal allows duplicate category names, so you may need to exclude some categories or rename them here.
# * Table name may be term_data.
# * May need to select a vid other than 1
puts "", "importing categories"
categories = mysql_query(<<-SQL).to_a
SELECT tid,
name,
description
FROM taxonomy_term_data
WHERE vid = #{VID}
SQL
create_categories(categories) do |category|
{
id: category["tid"],
name: @htmlentities.decode(category["name"]).strip,
description: @htmlentities.decode(category["description"]).strip,
}
end
end
def import_blog_topics
puts "", "importing blog topics"
unless Category.find_by_name("Blog")
create_category({ name: "Blog", description: "Articles from the blog" }, nil)
end
blogs = mysql_query(<<-SQL).to_a
SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky,
f.body_value body
FROM node n,
field_data_body f
WHERE n.type = 'article'
AND n.nid = f.entity_id
AND n.status = 1
SQL
category_id = Category.find_by_name("Blog").id
create_posts(blogs) do |topic|
{
id: "nid:#{topic["nid"]}",
user_id: user_id_from_imported_user_id(topic["uid"]) || -1,
category: category_id,
raw: topic["body"],
created_at: Time.zone.at(topic["created"]),
pinned_at: topic["sticky"].to_i == 1 ? Time.zone.at(topic["created"]) : nil,
title: topic["title"].try(:strip),
custom_fields: {
import_id: "nid:#{topic["nid"]}",
},
}
end
end
def import_forum_topics
puts "", "importing forum topics"
total_count = mysql_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM forum_index fi, node n
WHERE n.type = 'forum'
AND fi.nid = n.nid
AND n.status = 1
SQL
batches(BATCH_SIZE) do |offset|
results = mysql_query(<<-SQL).to_a
SELECT fi.nid nid,
fi.title title,
fi.tid tid,
n.uid uid,
fi.created created,
fi.sticky sticky,
f.body_value body,
nc.totalcount views,
fl.timestamp solved
FROM forum_index fi
LEFT JOIN node n ON fi.nid = n.nid
LEFT JOIN field_data_body f ON f.entity_id = n.nid
LEFT JOIN flagging fl ON fl.entity_id = n.nid
AND fl.fid = 7
LEFT JOIN node_counter nc ON nc.nid = n.nid
WHERE n.type = 'forum'
AND n.status = 1
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
SQL
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
raw = preprocess_raw(row["body"])
topic = {
id: "nid:#{row["nid"]}",
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
category: category_id_from_imported_category_id(row["tid"]),
raw: raw,
created_at: Time.zone.at(row["created"]),
pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil,
title: row["title"].try(:strip),
views: row["views"],
}
topic[:custom_fields] = { import_solved: true } if row["solved"].present?
topic
end
end
end
def import_replies
puts "", "creating replies in topics"
total_count = mysql_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM comment c,
node n
WHERE n.nid = c.nid
AND c.status = 1
AND n.type IN ('article', 'forum')
AND n.status = 1
SQL
batches(BATCH_SIZE) do |offset|
results = mysql_query(<<-SQL).to_a
SELECT c.cid, c.pid, c.nid, c.uid, c.created,
f.comment_body_value body
FROM comment c,
field_data_comment_body f,
node n
WHERE c.cid = f.entity_id
AND n.nid = c.nid
AND c.status = 1
AND n.type IN ('blog', 'forum')
AND n.status = 1
ORDER BY c.cid ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
raw = preprocess_raw(row["body"])
h = {
id: "cid:#{row["cid"]}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
raw: raw,
created_at: Time.zone.at(row["created"]),
}
if row["pid"]
parent = topic_lookup_from_imported_post_id("cid:#{row["pid"]}")
h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1)
end
h
else
puts "No topic found for comment #{row["cid"]}"
nil
end
end
end
end
def import_likes
puts "", "importing post likes"
batches(BATCH_SIZE) do |offset|
likes = mysql_query(<<-SQL).to_a
SELECT flagging_id,
fid,
entity_id,
uid
FROM flagging
WHERE fid = 5
OR fid = 6
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if likes.empty?
likes.each do |l|
identifier = l["fid"] == 5 ? "nid" : "cid"
next unless user_id = user_id_from_imported_user_id(l["uid"])
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l["entity_id"]}")
next unless user = User.find_by(id: user_id)
next unless post = Post.find_by(id: post_id)
begin
PostActionCreator.like(user, post)
rescue StandardError
nil
end
end
end
end
def mark_topics_as_solved
puts "", "marking topics as solved"
solved_topics =
TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id)
solved_topics.each do |topic_id|
next unless topic = Topic.find(topic_id)
next unless post = topic.posts.last
post_id = post.id
PostCustomField.create!(post_id: post_id, name: "is_accepted_answer", value: true)
TopicCustomField.create!(topic_id: topic_id, name: "accepted_answer_post_id", value: post_id)
end
end
def import_sso_records
puts "", "importing sso records"
start_time = Time.now
current_count = 0
users = UserCustomField.where(name: "import_id")
total_count = users.count
return if users.empty?
users.each do |ids|
user_id = ids.user_id
external_id = ids.value
next unless user = User.find(user_id)
begin
current_count += 1
print_status(current_count, total_count, start_time)
SingleSignOnRecord.create!(
user_id: user.id,
external_id: external_id,
external_email: user.email,
last_payload: "",
)
rescue StandardError
next
end
end
end
def import_attachments
puts "", "importing attachments"
current_count = 0
success_count = 0
fail_count = 0
total_count = mysql_query(<<-SQL).first["count"]
SELECT count(field_post_attachment_fid) count
FROM field_data_field_post_attachment
SQL
batches(BATCH_SIZE) do |offset|
attachments = mysql_query(<<-SQL).to_a
SELECT *
FROM field_data_field_post_attachment fp
LEFT JOIN file_managed fm
ON fp.field_post_attachment_fid = fm.fid
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if attachments.size < 1
attachments.each do |attachment|
current_count += 1
print_status current_count, total_count
identifier = attachment["entity_type"] == "comment" ? "cid" : "nid"
next unless user_id = user_id_from_imported_user_id(attachment["uid"])
unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment["entity_id"]}")
next
end
next unless user = User.find(user_id)
next unless post = Post.find(post_id)
begin
new_raw = post.raw.dup
upload, filename = find_upload(post, attachment)
unless upload
fail_count += 1
next
end
upload_html = html_for_upload(upload, filename)
new_raw = "#{new_raw}\n\n#{upload_html}" if new_raw.exclude?(upload_html)
if new_raw != post.raw
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
bypass_bump: true,
edit_reason: "Import attachment from Drupal",
)
else
puts "", "Skipped upload: already imported"
end
success_count += 1
rescue => e
puts e
end
end
end
end
def create_permalinks
puts "", "creating permalinks..."
Topic.listable_topics.find_each do |topic|
begin
tcf = topic.custom_fields
if tcf && tcf["import_id"]
node_id = tcf["import_id"][/nid:(\d+)/, 1]
slug = "/node/#{node_id}"
Permalink.create(url: slug, topic_id: topic.id)
end
rescue => e
puts e.message
puts "Permalink creation failed for id #{topic.id}"
end
end
end
def find_upload(post, attachment)
uri = attachment["uri"][%r{public://upload/(.+)}, 1]
real_filename = CGI.unescapeHTML(uri)
file = File.join(ATTACHMENT_DIR, real_filename)
unless File.exist?(file)
puts "Attachment file #{attachment["filename"]} doesn't exist"
tmpfile = "attachments_failed.txt"
filename = File.join("/tmp/", tmpfile)
File.open(filename, "a") { |f| f.puts attachment["filename"] }
end
upload = create_upload(post.user.id || -1, file, real_filename)
if upload.nil? || upload.errors.any?
puts "Upload not valid"
puts upload.errors.inspect if upload
return
end
[upload, real_filename]
end
def preprocess_raw(raw)
return if raw.blank?
# quotes on new lines
raw.gsub!(%r{\[quote\](.+?)\[/quote\]}im) do |quote|
quote.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n#{$1}\n" }
quote.gsub!(/\n(.+?)/) { "\n> #{$1}" }
end
# [QUOTE=<username>]...[/QUOTE]
raw.gsub!(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do
username, quote = $1, $2
"\n[quote=\"#{username}\"]\n#{quote}\n[/quote]\n"
end
raw.strip!
raw
end
def postprocess_posts
puts "", "postprocessing posts"
current = 0
max = Post.count
Post.find_each do |post|
begin
raw = post.raw
new_raw = raw.dup
# replace old topic to new topic links
new_raw.gsub!(%r{https://site.com/forum/topic/(\d+)}im) do
post_id = post_id_from_imported_post_id("nid:#{$1}")
next unless post_id
topic = Post.find(post_id).topic
"https://community.site.com/t/-/#{topic.id}"
end
# replace old comment to reply links
new_raw.gsub!(%r{https://site.com/comment/(\d+)#comment-\d+}im) do
post_id = post_id_from_imported_post_id("cid:#{$1}")
next unless post_id
post_ref = Post.find(post_id)
"https://community.site.com/t/-/#{post_ref.topic_id}/#{post_ref.post_number}"
end
if raw != new_raw
post.raw = new_raw
post.save
end
rescue StandardError
puts "", "Failed rewrite on post: #{post.id}"
ensure
print_status(current += 1, max)
end
end
end
def import_gravatars
puts "", "importing gravatars"
current = 0
max = User.count
User.find_each do |user|
begin
user.create_user_avatar(user_id: user.id) unless user.user_avatar
user.user_avatar.update_gravatar!
rescue StandardError
puts "", 'Failed avatar update on user #{user.id}'
ensure
print_status(current += 1, max)
end
end
end
def parse_datetime(time)
DateTime.strptime(time, "%s")
end
def mysql_query(sql)
@client.query(sql, cache_rows: true)
end
end
2014-09-10 14:27:18 -04:00
ImportScripts::Drupal.new.perform if __FILE__ == $0