discourse/script/import_scripts/bespoke_1.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

266 lines
6.2 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
# bespoke importer for a customer, feel free to borrow ideas
require "csv"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/bespoke_1.rb
class ImportScripts::Bespoke < ImportScripts::Base
BATCH_SIZE = 1000
def initialize(path)
@path = path
super()
@bbcode_to_md = true
puts "loading post mappings..."
@post_number_map = {}
Post
.pluck(:id, :post_number)
.each { |post_id, post_number| @post_number_map[post_id] = post_number }
end
def created_post(post)
@post_number_map[post.id] = post.post_number
super
end
def execute
2014-11-19 22:53:30 -05:00
import_users
import_categories
import_posts
end
class RowResolver
def load(row)
@row = row
end
def self.create(cols)
Class.new(RowResolver).new(cols)
end
def initialize(cols)
2019-05-06 21:27:05 -04:00
cols.each_with_index { |col, idx| self.class.public_send(:define_method, col) { @row[idx] } }
end
end
def load_user_batch!(users, offset, total)
if users.length > 0
create_users(users, offset: offset, total: total) { |user| user }
users.clear
end
end
def csv_parse(name)
filename = "#{@path}/#{name}.csv"
first = true
row = nil
current_row = +""
double_quote_count = 0
File
.open(filename)
.each_line do |line|
# escaping is mental here
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
line.strip!
current_row << "\n" unless current_row.empty?
current_row << line
double_quote_count += line.scan('"').count
next if double_quote_count % 2 == 1
raw =
begin
CSV.parse(current_row)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
current_row = ""
double_quote_count = 0
next
end[
0
]
if first
row = RowResolver.create(raw)
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
end
end
def total_rows(table)
File.foreach("#{@path}/#{table}.csv").inject(0) { |c, line| c + 1 } - 1
end
def import_users
puts "", "creating users"
count = 0
users = []
total = total_rows("users")
csv_parse("users") do |row|
id = row.id
email = row.email
# fake it
email = fake_email if row.email.blank? || row.email !~ /@/
name = row.display_name
username = row.key_custom
created_at = DateTime.parse(row.dcreate)
username = name if username == "NULL"
2014-11-19 22:53:30 -05:00
username = email.split("@")[0] if username.blank?
name = email.split("@")[0] if name.blank?
users << { id: id, email: email, name: name, username: username, created_at: created_at }
count += 1
load_user_batch! users, count - users.length, total if count % BATCH_SIZE == 0
end
load_user_batch! users, count, total
end
def import_categories
rows = []
csv_parse("categories") do |row|
rows << { id: row.id, name: row.name, description: row.description }
end
create_categories(rows) { |row| row }
end
def normalize_raw!(raw)
return "<missing>" if raw.blank?
# purple and #1223f3
raw.gsub!(/\[color=[#a-z0-9]+\]/i, "")
raw.gsub!(%r{\[/color\]}i, "")
2014-11-19 22:53:30 -05:00
raw.gsub!(%r{\[signature\].+\[/signature\]}im, "")
raw
end
def import_post_batch!(posts, topics, offset, total)
create_posts(posts, total: total, offset: offset) do |post|
mapped = {}
mapped[:id] = post[:id]
mapped[:user_id] = user_id_from_imported_user_id(post[:user_id]) || -1
mapped[:raw] = post[:body]
mapped[:created_at] = post[:created_at]
topic = topics[post[:topic_id]]
if topic[:post_id]
parent = topic_lookup_from_imported_post_id(topic[:post_id])
2014-11-01 03:25:03 -04:00
next unless parent
mapped[:topic_id] = parent[:topic_id]
reply_to_post_id = post_id_from_imported_post_id(post[:reply_id])
if reply_to_post_id
reply_to_post_number = @post_number_map[reply_to_post_id]
2014-11-01 03:25:03 -04:00
if reply_to_post_number && reply_to_post_number > 1
mapped[:reply_to_post_number] = reply_to_post_number
end
end
else
mapped[:category] = category_id_from_imported_category_id(topic[:category_id])
mapped[:title] = post[:title]
topic[:post_id] = post[:id]
2017-07-27 21:20:09 -04:00
end
2014-11-01 03:25:03 -04:00
next if topic[:deleted] || post[:deleted]
mapped
end
posts.clear
end
def import_posts
puts "", "creating topics and posts"
topic_map = {}
csv_parse("topics") do |topic|
topic_map[topic.id] = {
id: topic.id,
category_id: topic.forum_category_id,
deleted: topic.is_deleted.to_i == 1,
locked: topic.is_locked.to_i == 1,
pinned: topic.is_pinned.to_i == 1,
}
end
total = total_rows("posts")
posts = []
count = 0
csv_parse("posts") do |row|
unless row.dcreate
puts "NO CREATION DATE FOR POST"
p row
next
end
row = {
id: row.id,
topic_id: row.forum_topic_id,
reply_id: row.reply_id,
user_id: row.user_id,
title: row.title,
body: normalize_raw!(row.body),
deleted: row.is_deleted.to_i == 1,
created_at: DateTime.parse(row.dcreate),
}
posts << row
count += 1
if posts.length > 0 && posts.length % BATCH_SIZE == 0
import_post_batch!(posts, topic_map, count - posts.length, total)
end
end
import_post_batch!(posts, topic_map, count - posts.length, total) if posts.length > 0
exit
end
end
unless ARGV[0] && Dir.exist?(ARGV[0])
puts "", "Usage:", "", "bundle exec ruby script/import_scripts/bespoke_1.rb DIRNAME", ""
exit 1
end
ImportScripts::Bespoke.new(ARGV[0]).perform