discourse/script/import_scripts/zendesk.rb

249 lines
6.2 KiB
Ruby
Raw Normal View History

# Zendesk importer
#
# You will need a bunch of CSV files:
#
# - users.csv
# - topics.csv (topics in Zendesk are categories in Discourse)
# - posts.csv (posts in Zendesk are topics in Discourse)
# - comments.csv (comments in Zendesk are posts in Discourse)
require 'csv'
require 'reverse_markdown'
require_relative 'base'
require_relative 'base/generic_database'
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk.rb DIRNAME
class ImportScripts::Zendesk < ImportScripts::Base
OLD_DOMAIN = "https://support.example.com"
BATCH_SIZE = 1000
def initialize(path)
super()
@path = path
@db = ImportScripts::GenericDatabase.new(@path, batch_size: BATCH_SIZE, recreate: true)
end
def execute
read_csv_files
import_categories
import_users
import_topics
import_posts
end
def read_csv_files
puts "", "reading CSV files"
csv_parse("topics") do |row|
@db.insert_category(
id: row[:id],
name: row[:name],
description: row[:description],
position: row[:position],
url: row[:htmlurl]
)
end
csv_parse("users") do |row|
@db.insert_user(
id: row[:id],
email: row[:email],
name: row[:name],
created_at: parse_datetime(row[:createdat]),
last_seen_at: parse_datetime(row[:lastloginat]),
active: true
)
end
csv_parse("posts") do |row|
@db.insert_topic(
id: row[:id],
title: row[:title],
raw: row[:details],
category_id: row[:topicid],
closed: row[:closed] == "TRUE",
user_id: row[:authorid],
created_at: parse_datetime(row[:createdat]),
url: row[:htmlurl]
)
end
csv_parse("comments") do |row|
@db.insert_post(
id: row[:id],
raw: row[:body],
topic_id: row[:postid],
user_id: row[:authorid],
created_at: parse_datetime(row[:createdat]),
url: row[:htmlurl]
)
end
@db.execute_sql(<<~SQL)
DELETE FROM user
WHERE NOT EXISTS(
SELECT 1
FROM topic
WHERE topic.user_id = user.id
) AND NOT EXISTS(
SELECT 1
FROM post
WHERE post.user_id = user.id
)
SQL
@db.sort_posts_by_created_at
end
def parse_datetime(text)
return nil if text.blank? || text == "null"
DateTime.parse(text)
end
def import_categories
puts "", "creating categories"
rows = @db.fetch_categories
create_categories(rows) do |row|
{
id: row['id'],
name: row['name'],
description: row['description'],
position: row['position'],
post_create_action: proc do |category|
url = remove_domain(row['url'])
Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url)
end
}
end
end
def batches
super(BATCH_SIZE)
end
def import_users
puts "", "creating users"
total_count = @db.count_users
last_id = ''
batches do |offset|
rows, last_id = @db.fetch_users(last_id)
break if rows.empty?
next if all_records_exist?(:users, rows.map { |row| row['id'] })
create_users(rows, total: total_count, offset: offset) do |row|
{
id: row['id'],
email: row['email'],
name: row['name'],
created_at: row['created_at'],
last_seen_at: row['last_seen_at'],
active: row['active'] == 1
}
end
end
end
def import_topics
puts "", "creating topics"
total_count = @db.count_topics
last_id = ''
batches do |offset|
rows, last_id = @db.fetch_topics(last_id)
break if rows.empty?
next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row['id']) })
create_posts(rows, total: total_count, offset: offset) do |row|
{
id: import_topic_id(row['id']),
title: row['title'].present? ? row['title'].strip[0...255] : "Topic title missing",
raw: normalize_raw(row['raw']),
category: category_id_from_imported_category_id(row['category_id']),
user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id,
created_at: row['created_at'],
closed: row['closed'] == 1,
post_create_action: proc do |post|
url = remove_domain(row['url'])
Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
end
}
end
end
end
def import_topic_id(topic_id)
"T#{topic_id}"
end
def import_posts
puts "", "creating posts"
total_count = @db.count_posts
last_row_id = 0
batches do |offset|
rows, last_row_id = @db.fetch_posts(last_row_id)
break if rows.empty?
next if all_records_exist?(:posts, rows.map { |row| row['id'] })
create_posts(rows, total: total_count, offset: offset) do |row|
topic = topic_lookup_from_imported_post_id(import_topic_id(row['topic_id']))
if topic.nil?
p "MISSING TOPIC #{row['topic_id']}"
p row
next
end
{
id: row['id'],
raw: normalize_raw(row['raw']),
user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id,
topic_id: topic[:topic_id],
created_at: row['created_at'],
post_create_action: proc do |post|
url = remove_domain(row['url'])
Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url)
end
}
end
end
end
def normalize_raw(raw)
raw = raw.gsub('\n', '')
raw = ReverseMarkdown.convert(raw)
raw
end
def remove_domain(url)
url.sub(OLD_DOMAIN, "")
end
def permalink_exists?(url)
Permalink.find_by(url: url)
end
def csv_parse(table_name)
CSV.foreach(File.join(@path, "#{table_name}.csv"),
headers: true,
header_converters: :symbol,
skip_blanks: true,
encoding: 'bom|utf-8') { |row| yield row }
end
end
unless ARGV[0] && Dir.exist?(ARGV[0])
puts "", "Usage:", "", "bundle exec ruby script/import_scripts/zendesk.rb DIRNAME", ""
exit 1
end
ImportScripts::Zendesk.new(ARGV[0]).perform