FIX: improved category merging in discourse_merger. Use full paths to look for uniqueness instead of category names.

This commit is contained in:
Neil Lalonde 2018-09-20 12:33:51 -04:00
parent e43a05fa10
commit 8af6d81891
1 changed files with 18 additions and 2 deletions

View File

@ -177,9 +177,25 @@ class BulkImport::DiscourseMerger < BulkImport::Base
sql = "COPY categories (#{columns.map { |c| "\"#{c}\"" }.join(', ')}) FROM STDIN"
@raw_connection.copy_data(sql, @encoder) do
source_raw_connection.exec("SELECT #{columns.map { |c| "\"#{c}\"" }.join(', ')} FROM categories").each do |row|
source_raw_connection.exec(
"SELECT concat('/c/', x.parent_slug, '/', x.slug) as path,
#{columns.map { |c| "c.\"#{c}\"" }.join(', ')}
FROM categories c
INNER JOIN (
SELECT c1.id AS id,
c2.slug AS parent_slug,
c1.slug AS slug
FROM categories c1
LEFT OUTER JOIN categories c2 ON c1.parent_category_id = c2.id
) x ON c.id = x.id"
).each do |row|
if existing = Category.where(name: row['name']).first
source_category_path = row.delete('path')&.squeeze('/')
existing = Category.where(slug: row['slug']).first
parent_slug = existing&.parent_category&.slug
if existing &&
source_category_path == "/c/#{parent_slug}/#{existing.slug}".squeeze('/')
@categories[row['id']] = existing.id
next
end