657 lines
18 KiB
Ruby
657 lines
18 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# Use http://tatiyants.com/pev/#/plans/new if you want to optimize a query
|
|
|
|
task "import:ensure_consistency" => :environment do
|
|
log "Starting..."
|
|
|
|
insert_post_timings
|
|
insert_post_replies
|
|
insert_topic_users
|
|
insert_topic_views
|
|
insert_user_actions
|
|
insert_user_options
|
|
insert_user_profiles
|
|
insert_user_stats
|
|
insert_user_visits
|
|
insert_draft_sequences
|
|
|
|
update_user_stats
|
|
update_posts
|
|
update_topics
|
|
update_categories
|
|
update_users
|
|
update_groups
|
|
update_tag_stats
|
|
update_topic_users
|
|
create_category_definitions
|
|
|
|
log "Done!"
|
|
end
|
|
|
|
MS_SPEND_CREATING_POST ||= 5000
|
|
|
|
# -- TODO: We need to check the queries are actually adding/updating the necessary
|
|
# data, post migration. The ON CONFLICT DO NOTHING may cause the clauses to be ignored
|
|
# when we actually need them to run.
|
|
|
|
def insert_post_timings
|
|
log "Inserting post timings..."
|
|
|
|
DB.exec <<-SQL
|
|
INSERT INTO post_timings (topic_id, post_number, user_id, msecs)
|
|
SELECT topic_id, post_number, user_id, #{MS_SPEND_CREATING_POST}
|
|
FROM posts
|
|
WHERE user_id > 0
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_post_replies
|
|
log "Inserting post replies..."
|
|
|
|
DB.exec <<-SQL
|
|
INSERT INTO post_replies (post_id, reply_post_id, created_at, updated_at)
|
|
SELECT p2.id, p.id, p.created_at, p.created_at
|
|
FROM posts p
|
|
INNER JOIN posts p2 ON p2.post_number = p.reply_to_post_number AND p2.topic_id = p.topic_id
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_topic_users
|
|
log "Inserting topic users..."
|
|
|
|
DB.exec <<-SQL
|
|
INSERT INTO topic_users (user_id, topic_id, posted, last_read_post_number, first_visited_at, last_visited_at, total_msecs_viewed)
|
|
SELECT user_id, topic_id, 't' , MAX(post_number), MIN(created_at), MAX(created_at), COUNT(id) * #{MS_SPEND_CREATING_POST}
|
|
FROM posts
|
|
WHERE user_id > 0
|
|
GROUP BY user_id, topic_id
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_topic_views
|
|
log "Inserting topic views..."
|
|
|
|
DB.exec <<-SQL
|
|
WITH X AS (
|
|
SELECT topic_id, user_id, DATE(p.created_at) posted_at
|
|
FROM posts p
|
|
JOIN users u ON u.id = p.user_id
|
|
WHERE user_id > 0
|
|
GROUP BY topic_id, user_id, DATE(p.created_at)
|
|
)
|
|
INSERT INTO topic_views (topic_id, user_id, viewed_at, ip_address)
|
|
SELECT X.topic_id, X.user_id, X.posted_at, ip_address
|
|
FROM X
|
|
JOIN users u ON u.id = X.user_id
|
|
WHERE ip_address IS NOT NULL
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_user_actions
|
|
log "Inserting user actions for NEW_TOPIC = 4..."
|
|
|
|
DB.exec <<-SQL
|
|
INSERT INTO user_actions (action_type, user_id, target_topic_id, target_post_id, acting_user_id, created_at, updated_at)
|
|
SELECT 4, p.user_id, topic_id, p.id, p.user_id, p.created_at, p.created_at
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id
|
|
WHERE post_number = 1
|
|
AND archetype <> 'private_message'
|
|
AND p.deleted_at IS NULL
|
|
AND t.deleted_at IS NULL
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
|
|
log "Inserting user actions for REPLY = 5..."
|
|
|
|
DB.exec <<-SQL
|
|
INSERT INTO user_actions (action_type, user_id, target_topic_id, target_post_id, acting_user_id, created_at, updated_at)
|
|
SELECT 5, p.user_id, topic_id, p.id, p.user_id, p.created_at, p.created_at
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id
|
|
WHERE post_number > 1
|
|
AND archetype <> 'private_message'
|
|
AND p.deleted_at IS NULL
|
|
AND t.deleted_at IS NULL
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
|
|
log "Inserting user actions for RESPONSE = 6..."
|
|
|
|
DB.exec <<-SQL
|
|
INSERT INTO user_actions (action_type, user_id, target_topic_id, target_post_id, acting_user_id, created_at, updated_at)
|
|
SELECT 6, p.user_id, p.topic_id, p.id, p2.user_id, p.created_at, p.created_at
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id
|
|
INNER JOIN posts p2 ON p2.post_number = p.reply_to_post_number
|
|
WHERE p.post_number > 1
|
|
AND archetype <> 'private_message'
|
|
AND p.deleted_at IS NULL
|
|
AND t.deleted_at IS NULL
|
|
AND p2.topic_id = p.topic_id
|
|
AND p2.user_id <> p.user_id
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
|
|
# TODO:
|
|
# - NEW_PRIVATE_MESSAGE
|
|
# - GOT_PRIVATE_MESSAGE
|
|
end
|
|
|
|
def insert_user_options
|
|
log "Inserting user options..."
|
|
|
|
DB.exec <<-SQL
|
|
INSERT INTO user_options (
|
|
user_id,
|
|
mailing_list_mode,
|
|
mailing_list_mode_frequency,
|
|
email_level,
|
|
email_messages_level,
|
|
email_previous_replies,
|
|
email_in_reply_to,
|
|
email_digests,
|
|
digest_after_minutes,
|
|
include_tl0_in_digests,
|
|
automatically_unpin_topics,
|
|
enable_quoting,
|
|
external_links_in_new_tab,
|
|
dynamic_favicon,
|
|
new_topic_duration_minutes,
|
|
auto_track_topics_after_msecs,
|
|
notification_level_when_replying,
|
|
like_notification_frequency,
|
|
skip_new_user_tips,
|
|
hide_profile_and_presence,
|
|
sidebar_link_to_filtered_list,
|
|
sidebar_show_count_of_new_items
|
|
)
|
|
SELECT u.id
|
|
, #{SiteSetting.default_email_mailing_list_mode}
|
|
, #{SiteSetting.default_email_mailing_list_mode_frequency}
|
|
, #{SiteSetting.default_email_level}
|
|
, #{SiteSetting.default_email_messages_level}
|
|
, #{SiteSetting.default_email_previous_replies}
|
|
, #{SiteSetting.default_email_in_reply_to}
|
|
, #{SiteSetting.default_email_digest_frequency.to_i > 0}
|
|
, #{SiteSetting.default_email_digest_frequency}
|
|
, #{SiteSetting.default_include_tl0_in_digests}
|
|
, #{SiteSetting.default_topics_automatic_unpin}
|
|
, #{SiteSetting.default_other_enable_quoting}
|
|
, #{SiteSetting.default_other_external_links_in_new_tab}
|
|
, #{SiteSetting.default_other_dynamic_favicon}
|
|
, #{SiteSetting.default_other_new_topic_duration_minutes}
|
|
, #{SiteSetting.default_other_auto_track_topics_after_msecs}
|
|
, #{SiteSetting.default_other_notification_level_when_replying}
|
|
, #{SiteSetting.default_other_like_notification_frequency}
|
|
, #{SiteSetting.default_other_skip_new_user_tips}
|
|
, #{SiteSetting.default_hide_profile_and_presence}
|
|
, #{SiteSetting.default_sidebar_link_to_filtered_list}
|
|
, #{SiteSetting.default_sidebar_show_count_of_new_items}
|
|
FROM users u
|
|
LEFT JOIN user_options uo ON uo.user_id = u.id
|
|
WHERE uo.user_id IS NULL
|
|
SQL
|
|
end
|
|
|
|
def insert_user_profiles
|
|
log "Inserting user profiles..."
|
|
|
|
DB.exec <<-SQL
|
|
INSERT INTO user_profiles (user_id)
|
|
SELECT id
|
|
FROM users
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_user_stats
|
|
log "Inserting user stats..."
|
|
|
|
DB.exec <<-SQL
|
|
INSERT INTO user_stats (user_id, new_since)
|
|
SELECT id, created_at
|
|
FROM users
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_user_visits
|
|
log "Inserting user visits..."
|
|
|
|
DB.exec <<-SQL
|
|
INSERT INTO user_visits (user_id, visited_at, posts_read)
|
|
SELECT user_id, DATE(created_at), COUNT(*)
|
|
FROM posts
|
|
WHERE user_id > 0
|
|
GROUP BY user_id, DATE(created_at)
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def insert_draft_sequences
|
|
log "Inserting draft sequences..."
|
|
|
|
DB.exec <<-SQL
|
|
INSERT INTO draft_sequences (user_id, draft_key, sequence)
|
|
SELECT user_id, CONCAT('#{Draft::EXISTING_TOPIC}', id), 1
|
|
FROM topics
|
|
WHERE user_id > 0
|
|
AND archetype = 'regular'
|
|
ON CONFLICT DO NOTHING
|
|
SQL
|
|
end
|
|
|
|
def update_user_stats
|
|
log "Updating user stats..."
|
|
|
|
# TODO: topic_count is counting all topics you replied in as if you started the topic.
|
|
# TODO: post_count is counting first posts.
|
|
DB.exec <<-SQL
|
|
WITH X AS (
|
|
SELECT p.user_id
|
|
, COUNT(p.id) posts
|
|
, COUNT(DISTINCT p.topic_id) topics
|
|
, MIN(p.created_at) min_created_at
|
|
, COALESCE(COUNT(DISTINCT DATE(p.created_at)), 0) days
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id
|
|
WHERE p.deleted_at IS NULL
|
|
AND NOT COALESCE(p.hidden, 't')
|
|
AND p.post_type = 1
|
|
AND t.deleted_at IS NULL
|
|
AND COALESCE(t.visible, 't')
|
|
AND t.archetype <> 'private_message'
|
|
AND p.user_id > 0
|
|
GROUP BY p.user_id
|
|
)
|
|
UPDATE user_stats
|
|
SET post_count = X.posts
|
|
, posts_read_count = X.posts
|
|
, time_read = X.posts * 5
|
|
, topic_count = X.topics
|
|
, topics_entered = X.topics
|
|
, first_post_created_at = X.min_created_at
|
|
, days_visited = X.days
|
|
FROM X
|
|
WHERE user_stats.user_id = X.user_id
|
|
AND (post_count <> X.posts
|
|
OR posts_read_count <> X.posts
|
|
OR time_read <> X.posts * 5
|
|
OR topic_count <> X.topics
|
|
OR topics_entered <> X.topics
|
|
OR COALESCE(first_post_created_at, '1970-01-01') <> X.min_created_at
|
|
OR days_visited <> X.days
|
|
)
|
|
SQL
|
|
end
|
|
|
|
def update_posts
|
|
log "Updating posts..."
|
|
|
|
DB.exec <<-SQL
|
|
WITH Y AS (
|
|
SELECT post_id, COUNT(*) replies FROM post_replies GROUP BY post_id
|
|
)
|
|
UPDATE posts
|
|
SET reply_count = Y.replies
|
|
FROM Y
|
|
WHERE posts.id = Y.post_id
|
|
AND reply_count <> Y.replies
|
|
SQL
|
|
|
|
# -- TODO: ensure this is how this works!
|
|
# WITH X AS (
|
|
# SELECT pr.post_id, p.user_id
|
|
# FROM post_replies pr
|
|
# JOIN posts p ON p.id = pr.reply_post_id
|
|
# )
|
|
# UPDATE posts
|
|
# SET reply_to_user_id = X.user_id
|
|
# FROM X
|
|
# WHERE id = X.post_id
|
|
# AND COALESCE(reply_to_user_id, -9999) <> X.user_id
|
|
end
|
|
|
|
def update_topics
|
|
log "Updating topics..."
|
|
|
|
DB.exec <<-SQL
|
|
WITH X AS (
|
|
SELECT topic_id
|
|
, COUNT(*) posts
|
|
, MAX(created_at) last_post_date
|
|
, COALESCE(SUM(word_count), 0) words
|
|
, COALESCE(SUM(reply_count), 0) replies
|
|
, ( SELECT user_id
|
|
FROM posts
|
|
WHERE NOT hidden
|
|
AND deleted_at IS NULL
|
|
AND topic_id = p.topic_id
|
|
ORDER BY post_number DESC
|
|
LIMIT 1) last_poster
|
|
FROM posts p
|
|
WHERE NOT hidden
|
|
AND deleted_at IS NULL
|
|
GROUP BY topic_id
|
|
)
|
|
UPDATE topics
|
|
SET posts_count = X.posts
|
|
, last_posted_at = X.last_post_date
|
|
, bumped_at = X.last_post_date
|
|
, word_count = X.words
|
|
, reply_count = X.replies
|
|
, last_post_user_id = X.last_poster
|
|
FROM X
|
|
WHERE id = X.topic_id
|
|
AND (posts_count <> X.posts
|
|
OR COALESCE(last_posted_at, '1970-01-01') <> X.last_post_date
|
|
OR bumped_at <> X.last_post_date
|
|
OR COALESCE(word_count, -1) <> X.words
|
|
OR COALESCE(reply_count, -1) <> X.replies
|
|
OR COALESCE(last_post_user_id, -9999) <> X.last_poster)
|
|
SQL
|
|
end
|
|
|
|
def update_categories
|
|
log "Updating categories..."
|
|
|
|
DB.exec <<-SQL
|
|
WITH X AS (
|
|
SELECT category_id
|
|
, MAX(p.id) post_id
|
|
, MAX(t.id) topic_id
|
|
, COUNT(p.id) posts
|
|
, COUNT(DISTINCT p.topic_id) topics
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id
|
|
WHERE p.deleted_at IS NULL
|
|
AND t.deleted_at IS NULL
|
|
AND NOT p.hidden
|
|
AND t.visible
|
|
GROUP BY category_id
|
|
)
|
|
UPDATE categories
|
|
SET latest_post_id = X.post_id
|
|
, latest_topic_id = X.topic_id
|
|
, post_count = X.posts
|
|
, topic_count = X.topics
|
|
FROM X
|
|
WHERE id = X.category_id
|
|
AND (COALESCE(latest_post_id, -1) <> X.post_id
|
|
OR COALESCE(latest_topic_id, -1) <> X.topic_id
|
|
OR post_count <> X.posts
|
|
OR topic_count <> X.topics)
|
|
SQL
|
|
end
|
|
|
|
def update_users
|
|
log "Updating users..."
|
|
|
|
DB.exec(<<~SQL, Archetype.private_message)
|
|
WITH X AS (
|
|
SELECT p.user_id
|
|
, MIN(p.created_at) min_created_at
|
|
, MAX(p.created_at) max_created_at
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id AND t.archetype <> ?
|
|
WHERE p.deleted_at IS NULL
|
|
GROUP BY p.user_id
|
|
)
|
|
UPDATE users
|
|
SET first_seen_at = X.min_created_at
|
|
, last_seen_at = X.max_created_at
|
|
, last_posted_at = X.max_created_at
|
|
FROM X
|
|
WHERE id = X.user_id
|
|
AND (COALESCE(first_seen_at, '1970-01-01') <> X.min_created_at
|
|
OR COALESCE(last_seen_at, '1970-01-01') <> X.max_created_at
|
|
OR COALESCE(last_posted_at, '1970-01-01') <> X.max_created_at)
|
|
SQL
|
|
end
|
|
|
|
def update_groups
|
|
log "Updating groups..."
|
|
|
|
DB.exec <<-SQL
|
|
WITH X AS (
|
|
SELECT group_id, COUNT(*) count
|
|
FROM group_users
|
|
GROUP BY group_id
|
|
)
|
|
UPDATE groups
|
|
SET user_count = X.count
|
|
FROM X
|
|
WHERE id = X.group_id
|
|
AND user_count <> X.count
|
|
SQL
|
|
end
|
|
|
|
def update_tag_stats
|
|
Tag.ensure_consistency!
|
|
end
|
|
|
|
def update_topic_users
|
|
log "Updating topic users..."
|
|
|
|
DB.exec <<-SQL
|
|
WITH X AS (
|
|
SELECT p.topic_id
|
|
, p.user_id
|
|
FROM posts p
|
|
JOIN topics t ON t.id = p.topic_id
|
|
WHERE p.deleted_at IS NULL
|
|
AND t.deleted_at IS NULL
|
|
AND NOT p.hidden
|
|
AND t.visible
|
|
)
|
|
UPDATE topic_users tu
|
|
SET posted = 't'
|
|
FROM X
|
|
WHERE tu.topic_id = X.topic_id
|
|
AND tu.user_id = X.user_id
|
|
AND posted = 'f'
|
|
SQL
|
|
end
|
|
|
|
def create_category_definitions
|
|
log "Creating category definitions"
|
|
Category.ensure_consistency!
|
|
end
|
|
|
|
def log(message)
|
|
puts "[#{DateTime.now.strftime("%Y-%m-%d %H:%M:%S")}] #{message}"
|
|
end
|
|
|
|
task "import:create_phpbb_permalinks" => :environment do
|
|
log "Creating Permalinks..."
|
|
|
|
# /[^\/]+\/.*-t(\d+).html/
|
|
SiteSetting.permalink_normalizations = '/[^\/]+\/.*-t(\d+).html/thread/\1'
|
|
|
|
Topic.listable_topics.find_each do |topic|
|
|
tcf = topic.custom_fields
|
|
if tcf && tcf["import_id"]
|
|
begin
|
|
Permalink.create(url: "thread/#{tcf["import_id"]}", topic_id: topic.id)
|
|
rescue StandardError
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
|
|
log "Done!"
|
|
end
|
|
|
|
task "import:remap_old_phpbb_permalinks" => :environment do
|
|
log "Remapping Permalinks..."
|
|
|
|
i = 0
|
|
Post
|
|
.where("raw LIKE ?", "%discussions.example.com%")
|
|
.each do |p|
|
|
begin
|
|
new_raw = p.raw.dup
|
|
# \((https?:\/\/discussions\.example\.com\/\S*-t\d+.html)\)
|
|
new_raw.gsub!(%r{\((https?://discussions\.example\.com/\S*-t\d+.html)\)}) do
|
|
normalized_url = Permalink.normalize_url($1)
|
|
permalink =
|
|
begin
|
|
Permalink.find_by_url(normalized_url)
|
|
rescue StandardError
|
|
nil
|
|
end
|
|
if permalink && permalink.target_url
|
|
"(#{permalink.target_url})"
|
|
else
|
|
"(#{$1})"
|
|
end
|
|
end
|
|
|
|
if new_raw != p.raw
|
|
p.revise(Discourse.system_user, { raw: new_raw }, bypass_bump: true, skip_revision: true)
|
|
putc "."
|
|
i += 1
|
|
end
|
|
rescue StandardError
|
|
# skip
|
|
end
|
|
end
|
|
|
|
log "Done! #{i} posts remapped."
|
|
end
|
|
|
|
task "import:create_vbulletin_permalinks" => :environment do
|
|
log "Creating Permalinks..."
|
|
|
|
# /showthread.php\?t=(\d+).*/
|
|
SiteSetting.permalink_normalizations = '/showthread.php\?t=(\d+).*/showthread.php?t=\1'
|
|
|
|
Topic.listable_topics.find_each do |topic|
|
|
tcf = topic.custom_fields
|
|
if tcf && tcf["import_id"]
|
|
begin
|
|
Permalink.create(url: "showthread.php?t=#{tcf["import_id"]}", topic_id: topic.id)
|
|
rescue StandardError
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
|
|
Category.find_each do |cat|
|
|
ccf = cat.custom_fields
|
|
if ccf && ccf["import_id"]
|
|
begin
|
|
Permalink.create(url: "forumdisplay.php?f=#{ccf["import_id"]}", category_id: cat.id)
|
|
rescue StandardError
|
|
nil
|
|
end
|
|
end
|
|
end
|
|
|
|
log "Done!"
|
|
end
|
|
|
|
desc "Import existing exported file"
|
|
task "import:file", [:file_name] => [:environment] do |_, args|
|
|
require "import_export"
|
|
|
|
ImportExport.import(args[:file_name])
|
|
puts "", "Done", ""
|
|
end
|
|
|
|
desc "Update first_post_created_at column in user_stats table"
|
|
task "import:update_first_post_created_at" => :environment do
|
|
log "Updating first_post_created_at..."
|
|
|
|
DB.exec <<~SQL
|
|
WITH sub AS (
|
|
SELECT user_id, MIN(posts.created_at) AS first_post_created_at
|
|
FROM posts
|
|
GROUP BY user_id
|
|
)
|
|
UPDATE user_stats
|
|
SET first_post_created_at = sub.first_post_created_at
|
|
FROM user_stats u1
|
|
JOIN sub ON sub.user_id = u1.user_id
|
|
WHERE u1.user_id = user_stats.user_id
|
|
AND user_stats.first_post_created_at IS DISTINCT FROM sub.first_post_created_at
|
|
SQL
|
|
|
|
log "Done"
|
|
end
|
|
|
|
desc "Update avatars from external_avatar_url in SSO records"
|
|
task "import:update_avatars_from_sso" => :environment do
|
|
log "Updating avatars from SSO records"
|
|
|
|
sql = <<~SQL
|
|
SELECT user_id, external_avatar_url
|
|
FROM single_sign_on_records s
|
|
WHERE NOT EXISTS (
|
|
SELECT 1
|
|
FROM user_avatars a
|
|
WHERE a.user_id = s.user_id
|
|
)
|
|
SQL
|
|
|
|
queue = SizedQueue.new(1000)
|
|
threads = []
|
|
|
|
threads << Thread.new do ||
|
|
DB.query_each(sql) { |row| queue << { user_id: row.user_id, url: row.external_avatar_url } }
|
|
queue.close
|
|
end
|
|
|
|
max_count = DB.query_single(<<~SQL).first
|
|
SELECT COUNT(*)
|
|
FROM single_sign_on_records s
|
|
WHERE NOT EXISTS (
|
|
SELECT 1
|
|
FROM user_avatars a
|
|
WHERE a.user_id = s.user_id
|
|
)
|
|
SQL
|
|
|
|
status_queue = Queue.new
|
|
status_thread =
|
|
Thread.new do
|
|
error_count = 0
|
|
current_count = 0
|
|
|
|
while !(status = status_queue.pop).nil?
|
|
error_count += 1 if !status
|
|
current_count += 1
|
|
|
|
print "\r%7d / %7d (%d errors)" % [current_count, max_count, error_count]
|
|
end
|
|
end
|
|
|
|
20.times do
|
|
threads << Thread.new do
|
|
while row = queue.pop
|
|
begin
|
|
UserAvatar.import_url_for_user(
|
|
row[:url],
|
|
User.find(row[:user_id]),
|
|
override_gravatar: true,
|
|
skip_rate_limit: true,
|
|
)
|
|
status_queue << true
|
|
rescue StandardError
|
|
status_queue << false
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
threads.each(&:join)
|
|
status_queue.close
|
|
status_thread.join
|
|
end
|