2019-05-02 18:17:27 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2022-03-21 10:28:52 -04:00
|
|
|
require "migration/base_dropper"
|
2019-05-09 12:01:35 -04:00
|
|
|
|
2015-06-12 06:02:36 -04:00
|
|
|
class DbHelper
|
2024-10-15 22:09:07 -04:00
|
|
|
REMAP_SQL = <<~SQL
|
2021-04-21 05:36:32 -04:00
|
|
|
SELECT table_name::text, column_name::text, character_maximum_length
|
2015-06-12 06:02:36 -04:00
|
|
|
FROM information_schema.columns
|
|
|
|
WHERE table_schema = 'public'
|
|
|
|
AND is_updatable = 'YES'
|
|
|
|
AND (data_type LIKE 'char%' OR data_type LIKE 'text%')
|
2018-12-26 11:34:49 -05:00
|
|
|
ORDER BY table_name, column_name
|
|
|
|
SQL
|
2015-06-12 06:02:36 -04:00
|
|
|
|
2024-10-15 22:09:07 -04:00
|
|
|
TRIGGERS_SQL = <<~SQL
|
2021-04-21 05:36:32 -04:00
|
|
|
SELECT trigger_name::text
|
2019-05-03 14:30:23 -04:00
|
|
|
FROM information_schema.triggers
|
|
|
|
WHERE trigger_name LIKE '%_readonly'
|
|
|
|
SQL
|
|
|
|
|
2024-10-15 22:09:07 -04:00
|
|
|
TRUNCATABLE_COLUMNS = ["topic_links.url"]
|
2023-01-09 07:10:19 -05:00
|
|
|
|
2019-07-31 11:30:08 -04:00
|
|
|
def self.remap(
|
|
|
|
from,
|
|
|
|
to,
|
|
|
|
anchor_left: false,
|
|
|
|
anchor_right: false,
|
|
|
|
excluded_tables: [],
|
2024-11-15 05:42:25 -05:00
|
|
|
verbose: false,
|
|
|
|
skip_max_length_violations: false
|
2019-07-31 11:30:08 -04:00
|
|
|
)
|
2019-08-12 10:49:22 -04:00
|
|
|
text_columns = find_text_columns(excluded_tables)
|
2018-11-07 20:57:01 -05:00
|
|
|
|
2024-11-15 05:42:25 -05:00
|
|
|
return if text_columns.empty?
|
2023-01-09 07:10:19 -05:00
|
|
|
|
2024-11-25 06:39:53 -05:00
|
|
|
transforms = {
|
|
|
|
replacement: ->(column_name) { %|REPLACE("#{column_name}", :from, :to)| },
|
|
|
|
condition: ->(column_name) do
|
|
|
|
%|"#{column_name}" IS NOT NULL AND "#{column_name}" LIKE :pattern|
|
|
|
|
end,
|
|
|
|
}
|
|
|
|
|
|
|
|
query_params = {
|
|
|
|
from: from,
|
|
|
|
to: to,
|
|
|
|
pattern: "#{anchor_left ? "" : "%"}#{from}#{anchor_right ? "" : "%"}",
|
|
|
|
}
|
2018-11-07 20:57:01 -05:00
|
|
|
|
2024-11-15 05:42:25 -05:00
|
|
|
text_columns.each do |table, columns|
|
2024-11-25 06:39:53 -05:00
|
|
|
query_parts =
|
|
|
|
build_transform_query_parts(table, columns, skip_max_length_violations, transforms)
|
2024-11-15 05:42:25 -05:00
|
|
|
|
|
|
|
begin
|
2024-11-25 06:39:53 -05:00
|
|
|
rows_updated = execute_transform(table, query_parts, query_params)
|
2024-11-15 05:42:25 -05:00
|
|
|
rescue PG::StringDataRightTruncation => e
|
|
|
|
# Provide more context in the exeption message
|
2024-11-25 06:39:53 -05:00
|
|
|
raise_contextualized_transform_exception(e, table, query_parts[:length_constrained_columns])
|
2024-11-15 05:42:25 -05:00
|
|
|
end
|
2019-07-31 11:30:08 -04:00
|
|
|
|
2024-11-15 05:42:25 -05:00
|
|
|
if verbose
|
|
|
|
skipped_counts =
|
2024-11-25 06:39:53 -05:00
|
|
|
skipped_transform_counts(table, query_parts, skip_max_length_violations, query_params)
|
2024-11-15 05:42:25 -05:00
|
|
|
|
2024-11-25 06:39:53 -05:00
|
|
|
log_transform_result(table, rows_updated, skipped_counts)
|
2024-11-15 05:42:25 -05:00
|
|
|
end
|
2018-12-26 11:34:49 -05:00
|
|
|
end
|
|
|
|
|
2019-07-29 12:43:40 -04:00
|
|
|
finish!
|
2018-12-26 11:34:49 -05:00
|
|
|
end
|
|
|
|
|
2019-07-31 11:30:08 -04:00
|
|
|
def self.regexp_replace(
|
|
|
|
pattern,
|
|
|
|
replacement,
|
|
|
|
flags: "gi",
|
|
|
|
match: "~*",
|
|
|
|
excluded_tables: [],
|
2024-11-25 06:39:53 -05:00
|
|
|
verbose: false,
|
|
|
|
skip_max_length_violations: false
|
2019-07-31 11:30:08 -04:00
|
|
|
)
|
2019-08-12 10:49:22 -04:00
|
|
|
text_columns = find_text_columns(excluded_tables)
|
2018-12-26 11:34:49 -05:00
|
|
|
|
2024-11-25 06:39:53 -05:00
|
|
|
return if text_columns.empty?
|
2023-01-09 07:10:19 -05:00
|
|
|
|
2024-11-25 06:39:53 -05:00
|
|
|
transforms = {
|
|
|
|
replacement: ->(column_name) do
|
|
|
|
%|REGEXP_REPLACE("#{column_name}", :pattern, :replacement, :flags)|
|
|
|
|
end,
|
|
|
|
condition: ->(column_name) do
|
|
|
|
%|"#{column_name}" IS NOT NULL AND "#{column_name}" #{match} :pattern|
|
|
|
|
end,
|
|
|
|
}
|
2018-12-26 11:34:49 -05:00
|
|
|
|
2024-11-25 06:39:53 -05:00
|
|
|
query_params = { pattern: pattern, replacement: replacement, flags: flags }
|
|
|
|
|
|
|
|
text_columns.each do |table, columns|
|
|
|
|
query_parts =
|
|
|
|
build_transform_query_parts(table, columns, skip_max_length_violations, transforms)
|
|
|
|
|
|
|
|
begin
|
|
|
|
rows_updated = execute_transform(table, query_parts, query_params)
|
|
|
|
rescue PG::StringDataRightTruncation => e
|
|
|
|
# Provide more context in the exeption message
|
|
|
|
raise_contextualized_transform_exception(e, table, query_parts[:length_constrained_columns])
|
|
|
|
end
|
2019-07-31 11:30:08 -04:00
|
|
|
|
2024-11-25 06:39:53 -05:00
|
|
|
if verbose
|
|
|
|
skipped_counts =
|
|
|
|
skipped_transform_counts(table, query_parts, skip_max_length_violations, query_params)
|
|
|
|
log_transform_result(table, rows_updated, skipped_counts)
|
|
|
|
end
|
2015-06-12 06:02:36 -04:00
|
|
|
end
|
2018-04-23 04:26:33 -04:00
|
|
|
|
2019-07-29 12:43:40 -04:00
|
|
|
finish!
|
2015-06-12 06:02:36 -04:00
|
|
|
end
|
|
|
|
|
2018-12-26 11:34:49 -05:00
|
|
|
def self.find(needle, anchor_left: false, anchor_right: false, excluded_tables: [])
|
2018-06-07 10:51:16 -04:00
|
|
|
found = {}
|
2018-12-26 11:34:49 -05:00
|
|
|
like = "#{anchor_left ? "" : "%"}#{needle}#{anchor_right ? "" : "%"}"
|
|
|
|
|
|
|
|
DB
|
|
|
|
.query(REMAP_SQL)
|
|
|
|
.each do |r|
|
|
|
|
next if excluded_tables.include?(r.table_name)
|
2018-06-07 10:51:16 -04:00
|
|
|
|
2018-12-26 11:34:49 -05:00
|
|
|
rows = DB.query(<<~SQL, like: like)
|
2020-03-30 14:16:10 -04:00
|
|
|
SELECT \"#{r.column_name}\"
|
|
|
|
FROM \"#{r.table_name}\"
|
2023-01-10 15:38:52 -05:00
|
|
|
WHERE \"#{r.column_name}\" LIKE :like
|
2018-12-26 11:34:49 -05:00
|
|
|
SQL
|
|
|
|
|
|
|
|
if rows.size > 0
|
2019-05-06 21:27:05 -04:00
|
|
|
found["#{r.table_name}.#{r.column_name}"] = rows.map do |row|
|
|
|
|
row.public_send(r.column_name)
|
2023-01-09 07:10:19 -05:00
|
|
|
end
|
2019-05-06 21:27:05 -04:00
|
|
|
end
|
2018-06-07 10:51:16 -04:00
|
|
|
end
|
2018-12-26 11:34:49 -05:00
|
|
|
|
2018-06-07 10:51:16 -04:00
|
|
|
found
|
|
|
|
end
|
|
|
|
|
2019-07-29 12:43:40 -04:00
|
|
|
private
|
|
|
|
|
|
|
|
def self.finish!
|
|
|
|
SiteSetting.refresh!
|
|
|
|
Theme.expire_site_cache!
|
|
|
|
SiteIconManager.ensure_optimized!
|
2019-08-15 06:24:20 -04:00
|
|
|
ApplicationController.banner_json_cache.clear
|
2019-07-29 12:43:40 -04:00
|
|
|
end
|
|
|
|
|
2019-08-12 10:49:22 -04:00
|
|
|
def self.find_text_columns(excluded_tables)
|
|
|
|
triggers = DB.query(TRIGGERS_SQL).map(&:trigger_name).to_set
|
|
|
|
text_columns = Hash.new { |h, k| h[k] = [] }
|
|
|
|
|
|
|
|
DB
|
|
|
|
.query(REMAP_SQL)
|
|
|
|
.each do |r|
|
|
|
|
if excluded_tables.include?(r.table_name) ||
|
2020-08-18 03:53:12 -04:00
|
|
|
triggers.include?(
|
|
|
|
Migration::BaseDropper.readonly_trigger_name(r.table_name, r.column_name),
|
|
|
|
) || triggers.include?(Migration::BaseDropper.readonly_trigger_name(r.table_name))
|
2023-01-09 07:10:19 -05:00
|
|
|
next
|
|
|
|
end
|
2019-08-12 10:49:22 -04:00
|
|
|
|
2019-08-12 11:12:06 -04:00
|
|
|
text_columns[r.table_name] << {
|
|
|
|
name: r.column_name,
|
|
|
|
max_length: r.character_maximum_length,
|
|
|
|
}
|
2019-08-12 10:49:22 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
text_columns
|
|
|
|
end
|
2019-08-12 11:12:06 -04:00
|
|
|
|
|
|
|
def self.truncate(sql, table, column)
|
|
|
|
if column[:max_length] && TRUNCATABLE_COLUMNS.include?("#{table}.#{column[:name]}")
|
|
|
|
"LEFT(#{sql}, #{column[:max_length]})"
|
|
|
|
else
|
|
|
|
sql
|
|
|
|
end
|
|
|
|
end
|
2024-11-15 05:42:25 -05:00
|
|
|
|
2024-11-25 06:39:53 -05:00
|
|
|
def self.build_transform_query_parts(table, columns, skip_max_length_violations, transforms)
|
2024-11-15 05:42:25 -05:00
|
|
|
columns.each_with_object(
|
|
|
|
{ updates: [], conditions: [], skipped_sums: [], length_constrained_columns: [] },
|
|
|
|
) do |column, parts|
|
2024-11-25 06:39:53 -05:00
|
|
|
replace = transforms[:replacement].call(column[:name])
|
2024-11-15 05:42:25 -05:00
|
|
|
replace = truncate(replace, table, column)
|
|
|
|
|
|
|
|
if column[:max_length].present?
|
|
|
|
# Keep track of columns with length constraints for error messages
|
|
|
|
parts[:length_constrained_columns] << "#{column[:name]}(#{column[:max_length]})"
|
|
|
|
end
|
|
|
|
|
|
|
|
# Build SQL update statements for each column
|
2024-11-25 06:39:53 -05:00
|
|
|
parts[:updates] << %|"#{column[:name]}" = #{replace}|
|
2024-11-15 05:42:25 -05:00
|
|
|
|
|
|
|
# Build the base SQL condition clause for each column
|
2024-11-25 06:39:53 -05:00
|
|
|
basic_condition = transforms[:condition].call(column[:name])
|
2024-11-15 05:42:25 -05:00
|
|
|
|
|
|
|
if skip_max_length_violations && column[:max_length].present?
|
|
|
|
# Extend base condition to skip updates that would violate the column length constraint
|
|
|
|
parts[
|
|
|
|
:conditions
|
|
|
|
] << "(#{basic_condition} AND LENGTH(#{replace}) <= #{column[:max_length]})"
|
|
|
|
|
|
|
|
# Build SQL sum statements for each column to count skipped updates.
|
|
|
|
# This will helps us know the number of updates skipped due to length constraints
|
|
|
|
# violations on this column
|
|
|
|
parts[:skipped_sums] << <<~SQL
|
|
|
|
SUM (
|
|
|
|
CASE
|
|
|
|
WHEN #{basic_condition} AND LENGTH(#{replace}) > #{column[:max_length]} THEN 1 ELSE 0
|
|
|
|
END
|
|
|
|
) AS #{column[:name]}_skipped
|
|
|
|
SQL
|
|
|
|
else
|
|
|
|
parts[:conditions] << "(#{basic_condition})"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2024-11-25 06:39:53 -05:00
|
|
|
def self.skipped_transform_counts(table, query_parts, skip_max_length_violations, params)
|
|
|
|
return unless skip_max_length_violations && query_parts[:skipped_sums].any?
|
|
|
|
|
|
|
|
skipped = DB.query_hash(<<~SQL, params).first
|
|
|
|
SELECT #{query_parts[:skipped_sums].join(", ")}
|
|
|
|
FROM "#{table}"
|
|
|
|
SQL
|
|
|
|
|
|
|
|
skipped.select { |_, count| count.to_i > 0 }
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.log_transform_result(table, rows_updated, skipped_counts)
|
2024-11-15 05:42:25 -05:00
|
|
|
return if rows_updated == 0 && skipped_counts.blank?
|
|
|
|
|
|
|
|
message = +"#{table}=#{rows_updated}"
|
|
|
|
|
|
|
|
if skipped_counts&.any?
|
|
|
|
message << " SKIPPED: "
|
|
|
|
message << skipped_counts
|
|
|
|
.map do |column, count|
|
|
|
|
"#{column.delete_suffix("_skipped")}: #{count} #{"update".pluralize(count)}"
|
|
|
|
end
|
|
|
|
.join(", ")
|
|
|
|
end
|
|
|
|
|
|
|
|
puts message
|
|
|
|
end
|
|
|
|
|
2024-11-25 06:39:53 -05:00
|
|
|
def self.execute_transform(table, query_parts, params)
|
|
|
|
DB.exec(<<~SQL, params)
|
|
|
|
UPDATE "#{table}"
|
|
|
|
SET #{query_parts[:updates].join(", ")}
|
|
|
|
WHERE #{query_parts[:conditions].join(" OR ")}
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
2024-11-15 05:42:25 -05:00
|
|
|
def self.skipped_remap_counts(table, from, to, pattern, query_parts, skip_max_length_violations)
|
|
|
|
return unless skip_max_length_violations && query_parts[:skipped_sums].any?
|
|
|
|
|
|
|
|
skipped = DB.query_hash(<<~SQL, from: from, to: to, pattern: pattern).first
|
|
|
|
SELECT #{query_parts[:skipped_sums].join(", ")}
|
|
|
|
FROM \"#{table}\"
|
|
|
|
SQL
|
|
|
|
|
|
|
|
skipped.select { |_, count| count.to_i > 0 }
|
|
|
|
end
|
|
|
|
|
2024-11-25 06:39:53 -05:00
|
|
|
def self.raise_contextualized_transform_exception(error, table, columns)
|
2024-11-15 05:42:25 -05:00
|
|
|
details = "columns with length constraints: #{columns.join(", ")}"
|
|
|
|
|
|
|
|
raise PG::StringDataRightTruncation, " #{error.message.strip} (table: #{table}, #{details})"
|
|
|
|
end
|
2015-06-12 06:02:36 -04:00
|
|
|
end
|