DEV: Improvements to Drupal script (#10016)
Refactors script to follow conventions of other importers and adds some features including like import, processing of post raw text, and, if needed, SSO import.
This commit is contained in:
parent
3a7ca97c36
commit
be28fc73a0
|
@ -1,16 +1,21 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require "mysql2"
|
||||
require "htmlentities"
|
||||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||||
|
||||
class ImportScripts::Drupal < ImportScripts::Base
|
||||
|
||||
DRUPAL_DB = ENV['DRUPAL_DB'] || "newsite3"
|
||||
DRUPAL_DB = ENV['DRUPAL_DB'] || "drupal"
|
||||
VID = ENV['DRUPAL_VID'] || 1
|
||||
BATCH_SIZE = 1000
|
||||
ATTACHMENT_DIR = "/root/files/upload"
|
||||
|
||||
def initialize
|
||||
super
|
||||
|
||||
@htmlentities = HTMLEntities.new
|
||||
|
||||
@client = Mysql2::Client.new(
|
||||
host: "localhost",
|
||||
username: "root",
|
||||
|
@ -19,142 +24,210 @@ class ImportScripts::Drupal < ImportScripts::Base
|
|||
)
|
||||
end
|
||||
|
||||
def categories_query
|
||||
@client.query("SELECT tid, name, description FROM taxonomy_term_data WHERE vid = #{VID}")
|
||||
end
|
||||
|
||||
def execute
|
||||
create_users(@client.query("SELECT uid id, name, mail email, created FROM users;")) do |row|
|
||||
{ id: row['id'], username: row['name'], email: row['email'], created_at: Time.zone.at(row['created']) }
|
||||
end
|
||||
|
||||
# You'll need to edit the following query for your Drupal install:
|
||||
#
|
||||
# * Drupal allows duplicate category names, so you may need to exclude some categories or rename them here.
|
||||
# * Table name may be term_data.
|
||||
# * May need to select a vid other than 1.
|
||||
create_categories(categories_query) do |c|
|
||||
{ id: c['tid'], name: c['name'], description: c['description'] }
|
||||
end
|
||||
import_users
|
||||
import_categories
|
||||
|
||||
# "Nodes" in Drupal are divided into types. Here we import two types,
|
||||
# and will later import all the comments/replies for each node.
|
||||
# You will need to figure out what the type names are on your install and edit the queries to match.
|
||||
if ENV['DRUPAL_IMPORT_BLOG']
|
||||
create_blog_topics
|
||||
import_blog_topics
|
||||
end
|
||||
|
||||
create_forum_topics
|
||||
import_forum_topics
|
||||
|
||||
create_replies
|
||||
import_replies
|
||||
import_likes
|
||||
mark_topics_as_solved
|
||||
import_sso_records
|
||||
import_attachments
|
||||
postprocess_posts
|
||||
create_permalinks
|
||||
import_gravatars
|
||||
end
|
||||
|
||||
begin
|
||||
create_admin(email: 'neil.lalonde@discourse.org', username: UserNameSuggester.suggest('neil'))
|
||||
rescue => e
|
||||
puts '', "Failed to create admin user"
|
||||
puts e.message
|
||||
def import_users
|
||||
puts "", "importing users"
|
||||
|
||||
user_count = mysql_query("SELECT count(uid) count FROM users").first["count"]
|
||||
|
||||
last_user_id = -1
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
users = mysql_query(<<-SQL
|
||||
SELECT uid,
|
||||
name username,
|
||||
mail email,
|
||||
created
|
||||
FROM users
|
||||
WHERE uid > #{last_user_id}
|
||||
ORDER BY uid
|
||||
LIMIT #{BATCH_SIZE}
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
break if users.empty?
|
||||
|
||||
last_user_id = users[-1]["uid"]
|
||||
|
||||
users.reject! { |u| @lookup.user_already_imported?(u["uid"]) }
|
||||
|
||||
create_users(users, total: user_count, offset: offset) do |user|
|
||||
email = user["email"].presence || fake_email
|
||||
email = fake_email unless email[EmailValidator.email_regex]
|
||||
|
||||
username = @htmlentities.decode(user["username"]).strip
|
||||
|
||||
{
|
||||
id: user["uid"],
|
||||
name: username,
|
||||
email: email,
|
||||
created_at: Time.zone.at(user["created"])
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def create_blog_topics
|
||||
puts '', "creating blog topics"
|
||||
def import_categories
|
||||
# You'll need to edit the following query for your Drupal install:
|
||||
#
|
||||
# * Drupal allows duplicate category names, so you may need to exclude some categories or rename them here.
|
||||
# * Table name may be term_data.
|
||||
# * May need to select a vid other than 1
|
||||
|
||||
create_category({
|
||||
name: 'Blog',
|
||||
user_id: -1,
|
||||
description: "Articles from the blog"
|
||||
}, nil) unless Category.find_by_name('Blog')
|
||||
puts "", "importing categories"
|
||||
|
||||
results = @client.query("
|
||||
SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky,
|
||||
f.body_value body
|
||||
FROM node n,
|
||||
field_data_body f
|
||||
WHERE n.type = 'blog'
|
||||
AND n.nid = f.entity_id
|
||||
AND n.status = 1
|
||||
", cache_rows: false)
|
||||
categories = mysql_query(<<-SQL
|
||||
SELECT tid,
|
||||
name,
|
||||
description
|
||||
FROM taxonomy_term_data
|
||||
WHERE vid = #{VID}
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
create_posts(results) do |row|
|
||||
create_categories(categories) do |category|
|
||||
{
|
||||
id: "nid:#{row['nid']}",
|
||||
user_id: user_id_from_imported_user_id(row['uid']) || -1,
|
||||
category: 'Blog',
|
||||
raw: row['body'],
|
||||
created_at: Time.zone.at(row['created']),
|
||||
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
|
||||
title: row['title'].try(:strip),
|
||||
custom_fields: { import_id: "nid:#{row['nid']}" }
|
||||
id: category['tid'],
|
||||
name: @htmlentities.decode(category['name']).strip,
|
||||
description: @htmlentities.decode(category['description']).strip
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def create_forum_topics
|
||||
puts '', "creating forum topics"
|
||||
def import_blog_topics
|
||||
puts '', "importing blog topics"
|
||||
|
||||
total_count = @client.query("
|
||||
create_category(
|
||||
{
|
||||
name: 'Blog',
|
||||
description: "Articles from the blog"
|
||||
},
|
||||
nil) unless Category.find_by_name('Blog')
|
||||
|
||||
blogs = mysql_query(<<-SQL
|
||||
SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky,
|
||||
f.body_value body
|
||||
FROM node n,
|
||||
field_data_body f
|
||||
WHERE n.type = 'article'
|
||||
AND n.nid = f.entity_id
|
||||
AND n.status = 1
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
category_id = Category.find_by_name('Blog').id
|
||||
|
||||
create_posts(blogs) do |topic|
|
||||
{
|
||||
id: "nid:#{topic['nid']}",
|
||||
user_id: user_id_from_imported_user_id(topic['uid']) || -1,
|
||||
category: category_id,
|
||||
raw: topic['body'],
|
||||
created_at: Time.zone.at(topic['created']),
|
||||
pinned_at: topic['sticky'].to_i == 1 ? Time.zone.at(topic['created']) : nil,
|
||||
title: topic['title'].try(:strip),
|
||||
custom_fields: { import_id: "nid:#{topic['nid']}" }
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_forum_topics
|
||||
puts '', "importing forum topics"
|
||||
|
||||
total_count = mysql_query(<<-SQL
|
||||
SELECT COUNT(*) count
|
||||
FROM forum_index fi, node n
|
||||
WHERE n.type = 'forum'
|
||||
AND fi.nid = n.nid
|
||||
AND n.status = 1;").first['count']
|
||||
AND n.status = 1
|
||||
SQL
|
||||
).first['count']
|
||||
|
||||
batch_size = 1000
|
||||
|
||||
batches(batch_size) do |offset|
|
||||
results = @client.query("
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
results = mysql_query(<<-SQL
|
||||
SELECT fi.nid nid,
|
||||
fi.title title,
|
||||
fi.tid tid,
|
||||
n.uid uid,
|
||||
fi.created created,
|
||||
fi.sticky sticky,
|
||||
f.body_value body
|
||||
FROM forum_index fi,
|
||||
node n,
|
||||
field_data_body f
|
||||
f.body_value body,
|
||||
nc.totalcount views,
|
||||
fl.timestamp solved
|
||||
FROM forum_index fi
|
||||
LEFT JOIN node n ON fi.nid = n.nid
|
||||
LEFT JOIN field_data_body f ON f.entity_id = n.nid
|
||||
LEFT JOIN flagging fl ON fl.entity_id = n.nid
|
||||
AND fl.fid = 7
|
||||
LEFT JOIN node_counter nc ON nc.nid = n.nid
|
||||
WHERE n.type = 'forum'
|
||||
AND fi.nid = n.nid
|
||||
AND n.nid = f.entity_id
|
||||
AND n.status = 1
|
||||
LIMIT #{batch_size}
|
||||
LIMIT #{BATCH_SIZE}
|
||||
OFFSET #{offset};
|
||||
", cache_rows: false)
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
break if results.size < 1
|
||||
|
||||
next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" }
|
||||
|
||||
create_posts(results, total: total_count, offset: offset) do |row|
|
||||
{
|
||||
raw = preprocess_raw(row['body'])
|
||||
topic = {
|
||||
id: "nid:#{row['nid']}",
|
||||
user_id: user_id_from_imported_user_id(row['uid']) || -1,
|
||||
category: category_id_from_imported_category_id(row['tid']),
|
||||
raw: row['body'],
|
||||
raw: raw,
|
||||
created_at: Time.zone.at(row['created']),
|
||||
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
|
||||
title: row['title'].try(:strip)
|
||||
title: row['title'].try(:strip),
|
||||
views: row['views']
|
||||
}
|
||||
topic[:custom_fields] = { import_solved: true } if row['solved'].present?
|
||||
topic
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def create_replies
|
||||
def import_replies
|
||||
puts '', "creating replies in topics"
|
||||
|
||||
total_count = @client.query("
|
||||
total_count = mysql_query(<<-SQL
|
||||
SELECT COUNT(*) count
|
||||
FROM comment c,
|
||||
node n
|
||||
WHERE n.nid = c.nid
|
||||
AND c.status = 1
|
||||
AND n.type IN ('blog', 'forum')
|
||||
AND n.status = 1;").first['count']
|
||||
AND n.type IN ('article', 'forum')
|
||||
AND n.status = 1
|
||||
SQL
|
||||
).first['count']
|
||||
|
||||
batch_size = 1000
|
||||
|
||||
batches(batch_size) do |offset|
|
||||
results = @client.query("
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
results = mysql_query(<<-SQL
|
||||
SELECT c.cid, c.pid, c.nid, c.uid, c.created,
|
||||
f.comment_body_value body
|
||||
FROM comment c,
|
||||
|
@ -165,9 +238,10 @@ class ImportScripts::Drupal < ImportScripts::Base
|
|||
AND c.status = 1
|
||||
AND n.type IN ('blog', 'forum')
|
||||
AND n.status = 1
|
||||
LIMIT #{batch_size}
|
||||
OFFSET #{offset};
|
||||
", cache_rows: false)
|
||||
LIMIT #{BATCH_SIZE}
|
||||
OFFSET #{offset}
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
break if results.size < 1
|
||||
|
||||
|
@ -176,11 +250,12 @@ class ImportScripts::Drupal < ImportScripts::Base
|
|||
create_posts(results, total: total_count, offset: offset) do |row|
|
||||
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
|
||||
if topic_mapping && topic_id = topic_mapping[:topic_id]
|
||||
raw = preprocess_raw(row['body'])
|
||||
h = {
|
||||
id: "cid:#{row['cid']}",
|
||||
topic_id: topic_id,
|
||||
user_id: user_id_from_imported_user_id(row['uid']) || -1,
|
||||
raw: row['body'],
|
||||
raw: raw,
|
||||
created_at: Time.zone.at(row['created']),
|
||||
}
|
||||
if row['pid']
|
||||
|
@ -196,6 +271,265 @@ class ImportScripts::Drupal < ImportScripts::Base
|
|||
end
|
||||
end
|
||||
|
||||
def import_likes
|
||||
puts "", "importing post likes"
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
likes = mysql_query(<<-SQL
|
||||
SELECT flagging_id,
|
||||
fid,
|
||||
entity_id,
|
||||
uid
|
||||
FROM flagging
|
||||
WHERE fid = 5
|
||||
OR fid = 6
|
||||
LIMIT #{BATCH_SIZE}
|
||||
OFFSET #{offset}
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
break if likes.empty?
|
||||
|
||||
likes.each do |l|
|
||||
identifier = l['fid'] == 5 ? 'nid' : 'cid'
|
||||
next unless user_id = user_id_from_imported_user_id(l['uid'])
|
||||
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l['entity_id']}")
|
||||
next unless user = User.find_by(id: user_id)
|
||||
next unless post = Post.find_by(id: post_id)
|
||||
PostActionCreator.like(user, post) rescue nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def mark_topics_as_solved
|
||||
puts "", "marking topics as solved"
|
||||
|
||||
solved_topics = TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id)
|
||||
|
||||
solved_topics.each do |topic_id|
|
||||
next unless topic = Topic.find(topic_id)
|
||||
next unless post = topic.posts.last
|
||||
post_id = post.id
|
||||
|
||||
PostCustomField.create!(post_id: post_id, name: "is_accepted_answer", value: true)
|
||||
TopicCustomField.create!(topic_id: topic_id, name: "accepted_answer_post_id", value: post_id)
|
||||
end
|
||||
end
|
||||
|
||||
def import_sso_records
|
||||
puts "", "importing sso records"
|
||||
|
||||
start_time = Time.now
|
||||
current_count = 0
|
||||
|
||||
users = UserCustomField.where(name: "import_id")
|
||||
|
||||
total_count = users.count
|
||||
|
||||
return if users.empty?
|
||||
|
||||
users.each do |ids|
|
||||
user_id = ids.user_id
|
||||
external_id = ids.value
|
||||
next unless user = User.find(user_id)
|
||||
|
||||
begin
|
||||
current_count += 1
|
||||
print_status(current_count, total_count, start_time)
|
||||
SingleSignOnRecord.create!(user_id: user.id, external_id: external_id, external_email: user.email, last_payload: '')
|
||||
rescue
|
||||
next
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_attachments
|
||||
puts "", "importing attachments"
|
||||
|
||||
current_count = 0
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
|
||||
total_count = mysql_query(<<-SQL
|
||||
SELECT count(field_post_attachment_fid) count
|
||||
FROM field_data_field_post_attachment
|
||||
SQL
|
||||
).first["count"]
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
attachments = mysql_query(<<-SQL
|
||||
SELECT *
|
||||
FROM field_data_field_post_attachment fp
|
||||
LEFT JOIN file_managed fm
|
||||
ON fp.field_post_attachment_fid = fm.fid
|
||||
LIMIT #{BATCH_SIZE}
|
||||
OFFSET #{offset}
|
||||
SQL
|
||||
).to_a
|
||||
|
||||
break if attachments.size < 1
|
||||
|
||||
attachments.each do |attachment|
|
||||
current_count += 1
|
||||
print_status current_count, total_count
|
||||
|
||||
identifier = attachment['entity_type'] == "comment" ? "cid" : "nid"
|
||||
next unless user_id = user_id_from_imported_user_id(attachment['uid'])
|
||||
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment['entity_id']}")
|
||||
next unless user = User.find(user_id)
|
||||
next unless post = Post.find(post_id)
|
||||
|
||||
begin
|
||||
new_raw = post.raw.dup
|
||||
upload, filename = find_upload(post, attachment)
|
||||
|
||||
unless upload
|
||||
fail_count += 1
|
||||
next
|
||||
end
|
||||
|
||||
upload_html = html_for_upload(upload, filename)
|
||||
new_raw = "#{new_raw}\n\n#{upload_html}" unless new_raw.include?(upload_html)
|
||||
|
||||
if new_raw != post.raw
|
||||
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: "Import attachment from Drupal")
|
||||
else
|
||||
puts '', 'Skipped upload: already imported'
|
||||
end
|
||||
|
||||
success_count += 1
|
||||
rescue => e
|
||||
puts e
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def create_permalinks
|
||||
puts '', 'creating permalinks...'
|
||||
|
||||
Topic.listable_topics.find_each do |topic|
|
||||
begin
|
||||
tcf = topic.custom_fields
|
||||
if tcf && tcf['import_id']
|
||||
node_id = tcf['import_id'][/nid:(\d+)/, 1]
|
||||
slug = "/topic/#{node_id}"
|
||||
Permalink.create(url: slug, topic_id: topic.id)
|
||||
end
|
||||
rescue => e
|
||||
puts e.message
|
||||
puts "Permalink creation failed for id #{topic.id}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def find_upload(post, attachment)
|
||||
uri = attachment['uri'][/public:\/\/upload\/(.+)/, 1]
|
||||
real_filename = CGI.unescapeHTML(uri)
|
||||
file = File.join(ATTACHMENT_DIR, real_filename)
|
||||
|
||||
unless File.exists?(file)
|
||||
puts "Attachment file #{attachment['filename']} doesn't exist"
|
||||
|
||||
tmpfile = "attachments_failed.txt"
|
||||
filename = File.join('/tmp/', tmpfile)
|
||||
File.open(filename, 'a') { |f|
|
||||
f.puts attachment['filename']
|
||||
}
|
||||
end
|
||||
|
||||
upload = create_upload(post.user.id || -1, file, real_filename)
|
||||
|
||||
if upload.nil? || upload.errors.any?
|
||||
puts "Upload not valid"
|
||||
puts upload.errors.inspect if upload
|
||||
return
|
||||
end
|
||||
|
||||
[upload, real_filename]
|
||||
end
|
||||
|
||||
def preprocess_raw(raw)
|
||||
return if raw.blank?
|
||||
# quotes on new lines
|
||||
raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote|
|
||||
quote.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n#{$1}\n" }
|
||||
quote.gsub!(/\n(.+?)/) { "\n> #{$1}" }
|
||||
}
|
||||
|
||||
# [QUOTE=<username>]...[/QUOTE]
|
||||
raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do
|
||||
username, quote = $1, $2
|
||||
"\n[quote=\"#{username}\"]\n#{quote}\n[/quote]\n"
|
||||
end
|
||||
|
||||
raw.strip!
|
||||
raw
|
||||
end
|
||||
|
||||
def postprocess_posts
|
||||
puts '', 'postprocessing posts'
|
||||
|
||||
current = 0
|
||||
max = Post.count
|
||||
|
||||
Post.find_each do |post|
|
||||
begin
|
||||
raw = post.raw
|
||||
new_raw = raw.dup
|
||||
|
||||
# replace old topic to new topic links
|
||||
new_raw.gsub!(/https:\/\/site.com\/forum\/topic\/(\d+)/im) do
|
||||
post_id = post_id_from_imported_post_id("nid:#{$1}")
|
||||
next unless post_id
|
||||
topic = Post.find(post_id).topic
|
||||
"https://community.site.com/t/-/#{topic.id}"
|
||||
end
|
||||
|
||||
# replace old comment to reply links
|
||||
new_raw.gsub!(/https:\/\/site.com\/comment\/(\d+)#comment-\d+/im) do
|
||||
post_id = post_id_from_imported_post_id("cid:#{$1}")
|
||||
next unless post_id
|
||||
post_ref = Post.find(post_id)
|
||||
"https://community.site.com/t/-/#{post_ref.topic_id}/#{post_ref.post_number}"
|
||||
end
|
||||
|
||||
if raw != new_raw
|
||||
post.raw = new_raw
|
||||
post.save
|
||||
end
|
||||
rescue
|
||||
puts '', "Failed rewrite on post: #{post.id}"
|
||||
ensure
|
||||
print_status(current += 1, max)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_gravatars
|
||||
puts '', 'importing gravatars'
|
||||
current = 0
|
||||
max = User.count
|
||||
User.find_each do |user|
|
||||
begin
|
||||
user.create_user_avatar(user_id: user.id) unless user.user_avatar
|
||||
user.user_avatar.update_gravatar!
|
||||
rescue
|
||||
puts '', 'Failed avatar update on user #{user.id}'
|
||||
ensure
|
||||
print_status(current += 1, max)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def parse_datetime(time)
|
||||
DateTime.strptime(time, '%s')
|
||||
end
|
||||
|
||||
def mysql_query(sql)
|
||||
@client.query(sql, cache_rows: true)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if __FILE__ == $0
|
||||
|
|
Loading…
Reference in New Issue