FEATURE: Import script for AnswerBase
Improves the generic database used by some import scripts: * Adds additional columns for users * Adds support for attachments * Allows setting the data type for keys (numeric or string) to ensure correct sorting
This commit is contained in:
parent
ff26b4ed9b
commit
c36c9c2ee5
3
Gemfile
3
Gemfile
|
@ -202,10 +202,11 @@ gem 'rchardet', require: false
|
||||||
if ENV["IMPORT"] == "1"
|
if ENV["IMPORT"] == "1"
|
||||||
gem 'mysql2'
|
gem 'mysql2'
|
||||||
gem 'redcarpet'
|
gem 'redcarpet'
|
||||||
gem 'sqlite3', '~> 1.3.13'
|
gem 'sqlite3', '~> 1.3', '>= 1.3.13'
|
||||||
gem 'ruby-bbcode-to-md', git: 'https://github.com/nlalonde/ruby-bbcode-to-md'
|
gem 'ruby-bbcode-to-md', git: 'https://github.com/nlalonde/ruby-bbcode-to-md'
|
||||||
gem 'reverse_markdown'
|
gem 'reverse_markdown'
|
||||||
gem 'tiny_tds'
|
gem 'tiny_tds'
|
||||||
|
gem 'csv', '~> 3.0'
|
||||||
end
|
end
|
||||||
|
|
||||||
gem 'webpush', require: false
|
gem 'webpush', require: false
|
||||||
|
|
|
@ -0,0 +1,341 @@
|
||||||
|
require 'csv'
|
||||||
|
require 'reverse_markdown'
|
||||||
|
require_relative 'base'
|
||||||
|
require_relative 'base/generic_database'
|
||||||
|
|
||||||
|
# Call it like this:
|
||||||
|
# RAILS_ENV=production bundle exec ruby script/import_scripts/answerbase.rb DIRNAME
|
||||||
|
class ImportScripts::Answerbase < ImportScripts::Base
|
||||||
|
OLD_DOMAIN = "http://answerbase.example.com" # without trailing slash
|
||||||
|
NEW_DOMAIN = "https://discourse.example.com"
|
||||||
|
AVATAR_DIRECTORY = "User Images"
|
||||||
|
ANSWER_ATTACHMENT_DIRECTORY = "Answer Attachments"
|
||||||
|
ANSWER_IMAGE_DIRECTORY = "Answer Images"
|
||||||
|
QUESTION_ATTACHMENT_DIRECTORY = "Question Attachments"
|
||||||
|
QUESTION_IMAGE_DIRECTORY = "Question Images"
|
||||||
|
EMBEDDED_IMAGE_REGEX = /<a[^>]*href="[^"]*relativeUrl=(?<path>[^"\&]*)[^"]*"[^>]*>\s*<img[^>]*>\s*<\/a>/i
|
||||||
|
QUESTION_LINK_REGEX = /<a[^>]*?href="#{Regexp.escape(OLD_DOMAIN)}\/[^"]*?(?:q|questionid=)(?<id>\d+)[^"]*?"[^>]*>(?<text>.*?)<\/a>/i
|
||||||
|
TOPIC_LINK_NORMALIZATION = '/.*?-(q\d+).*/\1'
|
||||||
|
BATCH_SIZE = 1000
|
||||||
|
|
||||||
|
def initialize(path)
|
||||||
|
super()
|
||||||
|
|
||||||
|
@path = path
|
||||||
|
@db = ImportScripts::GenericDatabase.new(
|
||||||
|
@path,
|
||||||
|
batch_size: BATCH_SIZE,
|
||||||
|
recreate: true,
|
||||||
|
numeric_keys: true
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
def execute
|
||||||
|
read_csv_files
|
||||||
|
|
||||||
|
add_permalink_normalizations
|
||||||
|
import_categories
|
||||||
|
import_users
|
||||||
|
import_topics
|
||||||
|
import_posts
|
||||||
|
end
|
||||||
|
|
||||||
|
def read_csv_files
|
||||||
|
puts "", "reading CSV files..."
|
||||||
|
|
||||||
|
category_position = 0
|
||||||
|
csv_parse("categories") do |row|
|
||||||
|
@db.insert_category(
|
||||||
|
id: row[:id],
|
||||||
|
name: row[:name],
|
||||||
|
position: category_position += 1
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
csv_parse("users") do |row|
|
||||||
|
@db.insert_user(
|
||||||
|
id: row[:id],
|
||||||
|
email: row[:email],
|
||||||
|
username: row[:username],
|
||||||
|
bio: row[:description],
|
||||||
|
avatar_path: row[:profile_image],
|
||||||
|
created_at: parse_date(row[:createtime]),
|
||||||
|
active: true
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
last_topic_id = nil
|
||||||
|
csv_parse("questions-answers-comments") do |row|
|
||||||
|
next if row[:published] == "No"
|
||||||
|
user_id = @db.get_user_id(row[:username])
|
||||||
|
created_at = parse_datetime(row[:createtime])
|
||||||
|
|
||||||
|
begin
|
||||||
|
if row[:type] == "Question"
|
||||||
|
attachments = parse_filenames(row[:attachments], QUESTION_ATTACHMENT_DIRECTORY) +
|
||||||
|
parse_filenames(row[:images], QUESTION_IMAGE_DIRECTORY)
|
||||||
|
|
||||||
|
@db.insert_topic(
|
||||||
|
id: row[:id],
|
||||||
|
title: row[:title],
|
||||||
|
raw: row[:text],
|
||||||
|
category_id: row[:categorylist],
|
||||||
|
user_id: user_id,
|
||||||
|
created_at: created_at,
|
||||||
|
attachments: attachments
|
||||||
|
)
|
||||||
|
last_topic_id = row[:id]
|
||||||
|
else
|
||||||
|
attachments = parse_filenames(row[:attachments], ANSWER_ATTACHMENT_DIRECTORY) +
|
||||||
|
parse_filenames(row[:images], ANSWER_IMAGE_DIRECTORY)
|
||||||
|
|
||||||
|
@db.insert_post(
|
||||||
|
id: row[:id],
|
||||||
|
raw: row[:text],
|
||||||
|
topic_id: last_topic_id,
|
||||||
|
user_id: user_id,
|
||||||
|
created_at: created_at,
|
||||||
|
attachments: attachments
|
||||||
|
)
|
||||||
|
end
|
||||||
|
rescue
|
||||||
|
p row
|
||||||
|
raise
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_filenames(text, directory)
|
||||||
|
return [] if text.blank?
|
||||||
|
|
||||||
|
text
|
||||||
|
.split(';')
|
||||||
|
.map { |filename| File.join(@path, directory, filename.strip) }
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_date(text)
|
||||||
|
return nil if text.blank?
|
||||||
|
DateTime.strptime(text, "%m/%d/%y")
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_datetime(text)
|
||||||
|
return nil if text.blank?
|
||||||
|
# DateTime.strptime(text, "%m/%d/%Y %H:%M")
|
||||||
|
DateTime.parse(text).utc.to_datetime
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_categories
|
||||||
|
puts "", "creating categories"
|
||||||
|
rows = @db.fetch_categories
|
||||||
|
|
||||||
|
create_categories(rows) do |row|
|
||||||
|
{
|
||||||
|
id: row['id'],
|
||||||
|
name: row['name'],
|
||||||
|
description: row['description'],
|
||||||
|
position: row['position']
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def batches
|
||||||
|
super(BATCH_SIZE)
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_users
|
||||||
|
puts "", "creating users"
|
||||||
|
total_count = @db.count_users
|
||||||
|
last_id = 0
|
||||||
|
|
||||||
|
batches do |offset|
|
||||||
|
rows, last_id = @db.fetch_users(last_id)
|
||||||
|
break if rows.empty?
|
||||||
|
|
||||||
|
next if all_records_exist?(:users, rows.map { |row| row['id'] })
|
||||||
|
|
||||||
|
create_users(rows, total: total_count, offset: offset) do |row|
|
||||||
|
{
|
||||||
|
id: row['id'],
|
||||||
|
email: row['email'],
|
||||||
|
username: row['username'],
|
||||||
|
bio_raw: row['bio'],
|
||||||
|
created_at: row['created_at'],
|
||||||
|
active: row['active'] == 1,
|
||||||
|
post_create_action: proc do |user|
|
||||||
|
create_avatar(user, row['avatar_path'])
|
||||||
|
end
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def create_avatar(user, avatar_path)
|
||||||
|
return if avatar_path.blank?
|
||||||
|
avatar_path = File.join(@path, AVATAR_DIRECTORY, avatar_path)
|
||||||
|
|
||||||
|
if File.exist?(avatar_path)
|
||||||
|
@uploader.create_avatar(user, avatar_path)
|
||||||
|
else
|
||||||
|
STDERR.puts "Could not find avatar: #{avatar_path}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_topics
|
||||||
|
puts "", "creating topics"
|
||||||
|
total_count = @db.count_topics
|
||||||
|
last_id = 0
|
||||||
|
|
||||||
|
batches do |offset|
|
||||||
|
rows, last_id = @db.fetch_topics(last_id)
|
||||||
|
break if rows.empty?
|
||||||
|
|
||||||
|
next if all_records_exist?(:posts, rows.map { |row| row['id'] })
|
||||||
|
|
||||||
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
||||||
|
attachments = @db.fetch_topic_attachments(row['id']) if row['upload_count'] > 0
|
||||||
|
user_id = user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id
|
||||||
|
|
||||||
|
{
|
||||||
|
id: row['id'],
|
||||||
|
title: row['title'],
|
||||||
|
raw: raw_with_attachments(row['raw'].presence || row['title'], attachments, user_id),
|
||||||
|
category: category_id_from_imported_category_id(row['category_id']),
|
||||||
|
user_id: user_id,
|
||||||
|
created_at: row['created_at'],
|
||||||
|
closed: row['closed'] == 1,
|
||||||
|
post_create_action: proc do |post|
|
||||||
|
url = "q#{row['id']}"
|
||||||
|
Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
|
||||||
|
end
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def import_posts
|
||||||
|
puts "", "creating posts"
|
||||||
|
total_count = @db.count_posts
|
||||||
|
last_row_id = 0
|
||||||
|
|
||||||
|
batches do |offset|
|
||||||
|
rows, last_row_id = @db.fetch_posts(last_row_id)
|
||||||
|
break if rows.empty?
|
||||||
|
|
||||||
|
next if all_records_exist?(:posts, rows.map { |row| row['id'] })
|
||||||
|
|
||||||
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
||||||
|
topic = topic_lookup_from_imported_post_id(row['topic_id'])
|
||||||
|
attachments = @db.fetch_post_attachments(row['id']) if row['upload_count'] > 0
|
||||||
|
user_id = user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id
|
||||||
|
|
||||||
|
{
|
||||||
|
id: row['id'],
|
||||||
|
raw: raw_with_attachments(row['raw'], attachments, user_id),
|
||||||
|
user_id: user_id,
|
||||||
|
topic_id: topic[:topic_id],
|
||||||
|
created_at: row['created_at']
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def raw_with_attachments(raw, attachments, user_id)
|
||||||
|
raw, embedded_paths, upload_ids = replace_embedded_attachments(raw, user_id)
|
||||||
|
raw = replace_question_links(raw)
|
||||||
|
raw = ReverseMarkdown.convert(raw) || ""
|
||||||
|
|
||||||
|
attachments&.each do |attachment|
|
||||||
|
path = attachment['path']
|
||||||
|
next if embedded_paths.include?(path)
|
||||||
|
|
||||||
|
if File.exist?(path)
|
||||||
|
filename = File.basename(path)
|
||||||
|
upload = @uploader.create_upload(user_id, path, filename)
|
||||||
|
|
||||||
|
if upload.present? && upload.persisted? && !upload_ids.include?(upload.id)
|
||||||
|
raw << "\n" << @uploader.html_for_upload(upload, filename)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
STDERR.puts "Could not find file: #{path}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
raw
|
||||||
|
end
|
||||||
|
|
||||||
|
def replace_embedded_attachments(raw, user_id)
|
||||||
|
paths = []
|
||||||
|
upload_ids = []
|
||||||
|
|
||||||
|
raw = raw.gsub(EMBEDDED_IMAGE_REGEX) do
|
||||||
|
path = File.join(@path, Regexp.last_match['path'])
|
||||||
|
filename = File.basename(path)
|
||||||
|
path = find_image_path(filename)
|
||||||
|
|
||||||
|
if path
|
||||||
|
upload = @uploader.create_upload(user_id, path, filename)
|
||||||
|
|
||||||
|
if upload.present? && upload.persisted?
|
||||||
|
paths << path
|
||||||
|
upload_ids << upload.id
|
||||||
|
@uploader.html_for_upload(upload, filename)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
STDERR.puts "Could not find file: #{path}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
[raw, paths, upload_ids]
|
||||||
|
end
|
||||||
|
|
||||||
|
def find_image_path(filename)
|
||||||
|
[QUESTION_IMAGE_DIRECTORY, ANSWER_IMAGE_DIRECTORY].each do |directory|
|
||||||
|
path = File.join(@path, directory, filename)
|
||||||
|
return path if File.exist?(path)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def replace_question_links(raw)
|
||||||
|
raw.gsub(QUESTION_LINK_REGEX) do
|
||||||
|
topic_id = Regexp.last_match("id")
|
||||||
|
topic = topic_lookup_from_imported_post_id(topic_id)
|
||||||
|
return Regexp.last_match.to_s unless topic
|
||||||
|
|
||||||
|
url = File.join(NEW_DOMAIN, topic[:url])
|
||||||
|
text = Regexp.last_match("text")
|
||||||
|
text.include?(OLD_DOMAIN) ? url : "<a href='#{url}'>#{text}</a>"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def add_permalink_normalizations
|
||||||
|
normalizations = SiteSetting.permalink_normalizations
|
||||||
|
normalizations = normalizations.blank? ? [] : normalizations.split('|')
|
||||||
|
|
||||||
|
add_normalization(normalizations, TOPIC_LINK_NORMALIZATION)
|
||||||
|
|
||||||
|
SiteSetting.permalink_normalizations = normalizations.join('|')
|
||||||
|
end
|
||||||
|
|
||||||
|
def add_normalization(normalizations, normalization)
|
||||||
|
normalizations << normalization unless normalizations.include?(normalization)
|
||||||
|
end
|
||||||
|
|
||||||
|
def permalink_exists?(url)
|
||||||
|
Permalink.find_by(url: url)
|
||||||
|
end
|
||||||
|
|
||||||
|
def csv_parse(table_name)
|
||||||
|
CSV.foreach(File.join(@path, "#{table_name}.csv"),
|
||||||
|
headers: true,
|
||||||
|
header_converters: :symbol,
|
||||||
|
skip_blanks: true,
|
||||||
|
encoding: 'bom|utf-8') { |row| yield row }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
unless ARGV[0] && Dir.exist?(ARGV[0])
|
||||||
|
puts "", "Usage:", "", "bundle exec ruby script/import_scripts/answerbase.rb DIRNAME", ""
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
|
||||||
|
ImportScripts::Answerbase.new(ARGV[0]).perform
|
|
@ -2,12 +2,13 @@ require 'sqlite3'
|
||||||
|
|
||||||
module ImportScripts
|
module ImportScripts
|
||||||
class GenericDatabase
|
class GenericDatabase
|
||||||
def initialize(directory, batch_size:, recreate: false)
|
def initialize(directory, batch_size:, recreate: false, numeric_keys: false)
|
||||||
filename = "#{directory}/index.db"
|
filename = "#{directory}/index.db"
|
||||||
File.delete(filename) if recreate && File.exists?(filename)
|
File.delete(filename) if recreate && File.exists?(filename)
|
||||||
|
|
||||||
@db = SQLite3::Database.new(filename, results_as_hash: true)
|
@db = SQLite3::Database.new(filename, results_as_hash: true)
|
||||||
@batch_size = batch_size
|
@batch_size = batch_size
|
||||||
|
@numeric_keys = numeric_keys
|
||||||
|
|
||||||
configure_database
|
configure_database
|
||||||
create_category_table
|
create_category_table
|
||||||
|
@ -25,36 +26,72 @@ module ImportScripts
|
||||||
|
|
||||||
def insert_user(user)
|
def insert_user(user)
|
||||||
@db.execute(<<-SQL, prepare(user))
|
@db.execute(<<-SQL, prepare(user))
|
||||||
INSERT OR REPLACE INTO user (id, email, username, name, created_at, last_seen_at, active)
|
INSERT OR REPLACE
|
||||||
VALUES (:id, :email, :username, :name, :created_at, :last_seen_at, :active)
|
INTO user (id, email, username, name, bio, avatar_path, created_at, last_seen_at, active)
|
||||||
|
VALUES (:id, :email, :username, :name, :bio, :avatar_path, :created_at, :last_seen_at, :active)
|
||||||
SQL
|
SQL
|
||||||
end
|
end
|
||||||
|
|
||||||
def insert_topic(topic)
|
def insert_topic(topic)
|
||||||
|
attachments = topic.delete(:attachments)
|
||||||
|
topic[:upload_count] = attachments&.size || 0
|
||||||
|
|
||||||
@db.execute(<<-SQL, prepare(topic))
|
@db.execute(<<-SQL, prepare(topic))
|
||||||
INSERT OR REPLACE INTO topic (id, title, raw, category_id, closed, user_id, created_at, url)
|
INSERT OR REPLACE INTO topic (id, title, raw, category_id, closed, user_id, created_at, url, upload_count)
|
||||||
VALUES (:id, :title, :raw, :category_id, :closed, :user_id, :created_at, :url)
|
VALUES (:id, :title, :raw, :category_id, :closed, :user_id, :created_at, :url, :upload_count)
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
attachments&.each do |attachment|
|
||||||
|
@db.execute(<<-SQL, topic_id: topic[:id], path: attachment)
|
||||||
|
INSERT OR REPLACE INTO topic_upload (topic_id, path)
|
||||||
|
VALUES (:topic_id, :path)
|
||||||
|
SQL
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def insert_post(post)
|
def insert_post(post)
|
||||||
|
attachments = post.delete(:attachments)
|
||||||
|
post[:upload_count] = attachments&.size || 0
|
||||||
|
|
||||||
@db.execute(<<-SQL, prepare(post))
|
@db.execute(<<-SQL, prepare(post))
|
||||||
INSERT OR REPLACE INTO post (id, raw, topic_id, user_id, created_at, reply_to_post_id, url)
|
INSERT OR REPLACE INTO post (id, raw, topic_id, user_id, created_at, reply_to_post_id, url, upload_count)
|
||||||
VALUES (:id, :raw, :topic_id, :user_id, :created_at, :reply_to_post_id, :url)
|
VALUES (:id, :raw, :topic_id, :user_id, :created_at, :reply_to_post_id, :url, :upload_count)
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
attachments&.each do |attachment|
|
||||||
|
@db.execute(<<-SQL, post_id: post[:id], path: attachment)
|
||||||
|
INSERT OR REPLACE INTO post_upload (post_id, path)
|
||||||
|
VALUES (:post_id, :path)
|
||||||
|
SQL
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def sort_posts_by_created_at
|
def sort_posts_by_created_at
|
||||||
@db.execute 'DELETE FROM post_order'
|
@db.execute 'DELETE FROM post_order'
|
||||||
|
|
||||||
@db.execute <<-SQL
|
@db.execute <<-SQL
|
||||||
INSERT INTO post_order (id)
|
INSERT INTO post_order (post_id)
|
||||||
SELECT id
|
SELECT id
|
||||||
FROM post
|
FROM post
|
||||||
ORDER BY created_at, topic_id, id
|
ORDER BY created_at, topic_id, id
|
||||||
SQL
|
SQL
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def delete_unused_users
|
||||||
|
@db.execute <<~SQL
|
||||||
|
DELETE FROM user
|
||||||
|
WHERE NOT EXISTS(
|
||||||
|
SELECT 1
|
||||||
|
FROM topic
|
||||||
|
WHERE topic.user_id = user.id
|
||||||
|
) AND NOT EXISTS(
|
||||||
|
SELECT 1
|
||||||
|
FROM post
|
||||||
|
WHERE post.user_id = user.id
|
||||||
|
)
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
def fetch_categories
|
def fetch_categories
|
||||||
@db.execute(<<-SQL)
|
@db.execute(<<-SQL)
|
||||||
SELECT *
|
SELECT *
|
||||||
|
@ -82,6 +119,14 @@ module ImportScripts
|
||||||
add_last_column_value(rows, 'id')
|
add_last_column_value(rows, 'id')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def get_user_id(username)
|
||||||
|
@db.get_first_value(<<-SQL, username)
|
||||||
|
SELECT id
|
||||||
|
FROM user
|
||||||
|
WHERE username = :username
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
def count_topics
|
def count_topics
|
||||||
@db.get_first_value(<<-SQL)
|
@db.get_first_value(<<-SQL)
|
||||||
SELECT COUNT(*)
|
SELECT COUNT(*)
|
||||||
|
@ -101,6 +146,14 @@ module ImportScripts
|
||||||
add_last_column_value(rows, 'id')
|
add_last_column_value(rows, 'id')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def fetch_topic_attachments(topic_id)
|
||||||
|
@db.execute(<<-SQL, topic_id)
|
||||||
|
SELECT path
|
||||||
|
FROM topic_upload
|
||||||
|
WHERE topic_id = :topic_id
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
def count_posts
|
def count_posts
|
||||||
@db.get_first_value(<<-SQL)
|
@db.get_first_value(<<-SQL)
|
||||||
SELECT COUNT(*)
|
SELECT COUNT(*)
|
||||||
|
@ -110,9 +163,21 @@ module ImportScripts
|
||||||
|
|
||||||
def fetch_posts(last_row_id)
|
def fetch_posts(last_row_id)
|
||||||
rows = @db.execute(<<-SQL, last_row_id)
|
rows = @db.execute(<<-SQL, last_row_id)
|
||||||
SELECT o.ROWID, p.*
|
SELECT ROWID AS rowid, *
|
||||||
|
FROM post
|
||||||
|
WHERE ROWID > :last_row_id
|
||||||
|
ORDER BY ROWID
|
||||||
|
LIMIT #{@batch_size}
|
||||||
|
SQL
|
||||||
|
|
||||||
|
add_last_column_value(rows, 'rowid')
|
||||||
|
end
|
||||||
|
|
||||||
|
def fetch_sorted_posts(last_row_id)
|
||||||
|
rows = @db.execute(<<-SQL, last_row_id)
|
||||||
|
SELECT o.ROWID AS rowid, p.*
|
||||||
FROM post p
|
FROM post p
|
||||||
JOIN post_order o USING (id)
|
JOIN post_order o ON (p.id = o.post_id)
|
||||||
WHERE o.ROWID > :last_row_id
|
WHERE o.ROWID > :last_row_id
|
||||||
ORDER BY o.ROWID
|
ORDER BY o.ROWID
|
||||||
LIMIT #{@batch_size}
|
LIMIT #{@batch_size}
|
||||||
|
@ -121,6 +186,14 @@ module ImportScripts
|
||||||
add_last_column_value(rows, 'rowid')
|
add_last_column_value(rows, 'rowid')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def fetch_post_attachments(post_id)
|
||||||
|
@db.execute(<<-SQL, post_id)
|
||||||
|
SELECT path
|
||||||
|
FROM post_upload
|
||||||
|
WHERE post_id = :post_id
|
||||||
|
SQL
|
||||||
|
end
|
||||||
|
|
||||||
def execute_sql(sql)
|
def execute_sql(sql)
|
||||||
@db.execute(sql)
|
@db.execute(sql)
|
||||||
end
|
end
|
||||||
|
@ -136,10 +209,14 @@ module ImportScripts
|
||||||
@db.execute 'PRAGMA locking_mode = EXCLUSIVE'
|
@db.execute 'PRAGMA locking_mode = EXCLUSIVE'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def key_data_type
|
||||||
|
@numeric_keys ? 'INTEGER' : 'TEXT'
|
||||||
|
end
|
||||||
|
|
||||||
def create_category_table
|
def create_category_table
|
||||||
@db.execute <<-SQL
|
@db.execute <<-SQL
|
||||||
CREATE TABLE IF NOT EXISTS category (
|
CREATE TABLE IF NOT EXISTS category (
|
||||||
id TEXT NOT NULL PRIMARY KEY,
|
id #{key_data_type} NOT NULL PRIMARY KEY,
|
||||||
name TEXT NOT NULL,
|
name TEXT NOT NULL,
|
||||||
description TEXT,
|
description TEXT,
|
||||||
position INTEGER,
|
position INTEGER,
|
||||||
|
@ -151,44 +228,59 @@ module ImportScripts
|
||||||
def create_user_table
|
def create_user_table
|
||||||
@db.execute <<-SQL
|
@db.execute <<-SQL
|
||||||
CREATE TABLE IF NOT EXISTS user (
|
CREATE TABLE IF NOT EXISTS user (
|
||||||
id TEXT NOT NULL PRIMARY KEY,
|
id #{key_data_type} NOT NULL PRIMARY KEY,
|
||||||
email TEXT,
|
email TEXT,
|
||||||
username TEXT,
|
username TEXT,
|
||||||
name TEXT,
|
name TEXT,
|
||||||
|
bio TEXT,
|
||||||
|
avatar_path TEXT,
|
||||||
created_at DATETIME,
|
created_at DATETIME,
|
||||||
last_seen_at DATETIME,
|
last_seen_at DATETIME,
|
||||||
active BOOLEAN NOT NULL DEFAULT true
|
active BOOLEAN NOT NULL DEFAULT true
|
||||||
)
|
)
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
@db.execute 'CREATE INDEX IF NOT EXISTS user_by_username ON user (username)'
|
||||||
end
|
end
|
||||||
|
|
||||||
def create_topic_table
|
def create_topic_table
|
||||||
@db.execute <<-SQL
|
@db.execute <<-SQL
|
||||||
CREATE TABLE IF NOT EXISTS topic (
|
CREATE TABLE IF NOT EXISTS topic (
|
||||||
id TEXT NOT NULL PRIMARY KEY,
|
id #{key_data_type} NOT NULL PRIMARY KEY,
|
||||||
title TEXT,
|
title TEXT,
|
||||||
raw TEXT,
|
raw TEXT,
|
||||||
category_id TEXT NOT NULL,
|
category_id #{key_data_type},
|
||||||
closed BOOLEAN NOT NULL DEFAULT false,
|
closed BOOLEAN NOT NULL DEFAULT false,
|
||||||
user_id TEXT NOT NULL,
|
user_id #{key_data_type} NOT NULL,
|
||||||
created_at DATETIME,
|
created_at DATETIME,
|
||||||
url TEXT
|
url TEXT,
|
||||||
|
upload_count INTEGER DEFAULT 0
|
||||||
)
|
)
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
@db.execute 'CREATE INDEX IF NOT EXISTS topic_by_user_id ON topic (user_id)'
|
@db.execute 'CREATE INDEX IF NOT EXISTS topic_by_user_id ON topic (user_id)'
|
||||||
|
|
||||||
|
@db.execute <<-SQL
|
||||||
|
CREATE TABLE IF NOT EXISTS topic_upload (
|
||||||
|
topic_id #{key_data_type} NOT NULL,
|
||||||
|
path TEXT NOT NULL
|
||||||
|
)
|
||||||
|
SQL
|
||||||
|
|
||||||
|
@db.execute 'CREATE UNIQUE INDEX IF NOT EXISTS topic_upload_unique ON topic_upload(topic_id, path)'
|
||||||
end
|
end
|
||||||
|
|
||||||
def create_post_table
|
def create_post_table
|
||||||
@db.execute <<-SQL
|
@db.execute <<-SQL
|
||||||
CREATE TABLE IF NOT EXISTS post (
|
CREATE TABLE IF NOT EXISTS post (
|
||||||
id TEXT NOT NULL PRIMARY KEY,
|
id #{key_data_type} NOT NULL PRIMARY KEY,
|
||||||
raw TEXT,
|
raw TEXT,
|
||||||
topic_id TEXT NOT NULL,
|
topic_id #{key_data_type} NOT NULL,
|
||||||
user_id TEXT NOT NULL,
|
user_id #{key_data_type} NOT NULL,
|
||||||
created_at DATETIME,
|
created_at DATETIME,
|
||||||
reply_to_post_id TEXT,
|
reply_to_post_id #{key_data_type},
|
||||||
url TEXT
|
url TEXT,
|
||||||
|
upload_count INTEGER DEFAULT 0
|
||||||
)
|
)
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
@ -196,9 +288,18 @@ module ImportScripts
|
||||||
|
|
||||||
@db.execute <<-SQL
|
@db.execute <<-SQL
|
||||||
CREATE TABLE IF NOT EXISTS post_order (
|
CREATE TABLE IF NOT EXISTS post_order (
|
||||||
id TEXT NOT NULL PRIMARY KEY
|
post_id #{key_data_type} NOT NULL PRIMARY KEY
|
||||||
)
|
)
|
||||||
SQL
|
SQL
|
||||||
|
|
||||||
|
@db.execute <<-SQL
|
||||||
|
CREATE TABLE IF NOT EXISTS post_upload (
|
||||||
|
post_id #{key_data_type} NOT NULL,
|
||||||
|
path TEXT NOT NULL
|
||||||
|
)
|
||||||
|
SQL
|
||||||
|
|
||||||
|
@db.execute 'CREATE UNIQUE INDEX IF NOT EXISTS post_upload_unique ON post_upload(post_id, path)'
|
||||||
end
|
end
|
||||||
|
|
||||||
def prepare(hash)
|
def prepare(hash)
|
||||||
|
|
|
@ -82,19 +82,7 @@ class ImportScripts::Zendesk < ImportScripts::Base
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
@db.execute_sql(<<~SQL)
|
@db.delete_unused_users
|
||||||
DELETE FROM user
|
|
||||||
WHERE NOT EXISTS(
|
|
||||||
SELECT 1
|
|
||||||
FROM topic
|
|
||||||
WHERE topic.user_id = user.id
|
|
||||||
) AND NOT EXISTS(
|
|
||||||
SELECT 1
|
|
||||||
FROM post
|
|
||||||
WHERE post.user_id = user.id
|
|
||||||
)
|
|
||||||
SQL
|
|
||||||
|
|
||||||
@db.sort_posts_by_created_at
|
@db.sort_posts_by_created_at
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -188,7 +176,7 @@ class ImportScripts::Zendesk < ImportScripts::Base
|
||||||
last_row_id = 0
|
last_row_id = 0
|
||||||
|
|
||||||
batches do |offset|
|
batches do |offset|
|
||||||
rows, last_row_id = @db.fetch_posts(last_row_id)
|
rows, last_row_id = @db.fetch_sorted_posts(last_row_id)
|
||||||
break if rows.empty?
|
break if rows.empty?
|
||||||
|
|
||||||
next if all_records_exist?(:posts, rows.map { |row| row['id'] })
|
next if all_records_exist?(:posts, rows.map { |row| row['id'] })
|
||||||
|
|
|
@ -8,7 +8,7 @@ require_relative 'base/generic_database'
|
||||||
|
|
||||||
# Call it like this:
|
# Call it like this:
|
||||||
# RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN
|
# RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk_api.rb SOURCE_URL DIRNAME AUTH_EMAIL AUTH_TOKEN
|
||||||
class ImportScripts::Zendesk < ImportScripts::Base
|
class ImportScripts::ZendeskApi < ImportScripts::Base
|
||||||
BATCH_SIZE = 1000
|
BATCH_SIZE = 1000
|
||||||
|
|
||||||
def initialize(source_url, path, auth_email, auth_token)
|
def initialize(source_url, path, auth_email, auth_token)
|
||||||
|
@ -193,7 +193,7 @@ class ImportScripts::Zendesk < ImportScripts::Base
|
||||||
last_row_id = 0
|
last_row_id = 0
|
||||||
|
|
||||||
batches do |offset|
|
batches do |offset|
|
||||||
rows, last_row_id = @db.fetch_posts(last_row_id)
|
rows, last_row_id = @db.fetch_sorted_posts(last_row_id)
|
||||||
break if rows.empty?
|
break if rows.empty?
|
||||||
|
|
||||||
create_posts(rows, total: total_count, offset: offset) do |row|
|
create_posts(rows, total: total_count, offset: offset) do |row|
|
||||||
|
@ -303,4 +303,4 @@ unless ARGV.length == 4 && Dir.exist?(ARGV[1])
|
||||||
exit 1
|
exit 1
|
||||||
end
|
end
|
||||||
|
|
||||||
ImportScripts::Zendesk.new(ARGV[0], ARGV[1], ARGV[2], ARGV[3]).perform
|
ImportScripts::ZendeskApi.new(ARGV[0], ARGV[1], ARGV[2], ARGV[3]).perform
|
||||||
|
|
Loading…
Reference in New Issue