DEV: Gracefully handle remaps which violate DB column constraints (#29501)
* DEV: Gracefully handle remaps which violate DB column constraints This change implements length constraint enforcement to skip remaps which exceed column max lengths * DEV: Only perform skipped column stats lookup when verbose is true * DEV: Tidy up specs * DEV: Make skipping violating remap behaviour opt-in This change introduces a new `skip_max_length_violations` param for `remap`, set to `false` by default to ensure we still continue to fail hard when max lenth constraints are violated. To aid in quick resolution when remaps fail, this change also adds more context to the exception message to include the offending table and column information * Apply suggestions from code review Co-authored-by: Gerhard Schlager <gerhard.schlager@discourse.org> * FIX: Various fixes - Linter errors - Remap status "logger" early return condition --------- Co-authored-by: Gerhard Schlager <gerhard.schlager@discourse.org>
This commit is contained in:
parent
a9d6aba427
commit
9b0cfa99c5
116
lib/db_helper.rb
116
lib/db_helper.rb
|
@ -26,33 +26,35 @@ class DbHelper
|
|||
anchor_left: false,
|
||||
anchor_right: false,
|
||||
excluded_tables: [],
|
||||
verbose: false
|
||||
verbose: false,
|
||||
skip_max_length_violations: false
|
||||
)
|
||||
like = "#{anchor_left ? "" : "%"}#{from}#{anchor_right ? "" : "%"}"
|
||||
text_columns = find_text_columns(excluded_tables)
|
||||
|
||||
return if text_columns.empty?
|
||||
|
||||
pattern = "#{anchor_left ? "" : "%"}#{from}#{anchor_right ? "" : "%"}"
|
||||
|
||||
text_columns.each do |table, columns|
|
||||
set =
|
||||
columns
|
||||
.map do |column|
|
||||
replace = "REPLACE(\"#{column[:name]}\", :from, :to)"
|
||||
replace = truncate(replace, table, column)
|
||||
"\"#{column[:name]}\" = #{replace}"
|
||||
end
|
||||
.join(", ")
|
||||
query_parts = build_remap_query_parts(table, columns, skip_max_length_violations)
|
||||
|
||||
where =
|
||||
columns
|
||||
.map { |column| "\"#{column[:name]}\" IS NOT NULL AND \"#{column[:name]}\" LIKE :like" }
|
||||
.join(" OR ")
|
||||
begin
|
||||
rows_updated = DB.exec(<<~SQL, from: from, to: to, pattern: pattern)
|
||||
UPDATE \"#{table}\"
|
||||
SET #{query_parts[:updates].join(", ")}
|
||||
WHERE #{query_parts[:conditions].join(" OR ")}
|
||||
SQL
|
||||
rescue PG::StringDataRightTruncation => e
|
||||
# Provide more context in the exeption message
|
||||
raise_contextualized_remap_exception(e, table, query_parts[:length_constrained_columns])
|
||||
end
|
||||
|
||||
rows = DB.exec(<<~SQL, from: from, to: to, like: like)
|
||||
UPDATE \"#{table}\"
|
||||
SET #{set}
|
||||
WHERE #{where}
|
||||
SQL
|
||||
if verbose
|
||||
skipped_counts =
|
||||
skipped_remap_counts(table, from, to, pattern, query_parts, skip_max_length_violations)
|
||||
|
||||
puts "#{table}=#{rows}" if verbose && rows > 0
|
||||
log_remap_message(table, rows_updated, skipped_counts)
|
||||
end
|
||||
end
|
||||
|
||||
finish!
|
||||
|
@ -161,4 +163,78 @@ class DbHelper
|
|||
sql
|
||||
end
|
||||
end
|
||||
|
||||
def self.build_remap_query_parts(table, columns, skip_max_length_violations)
|
||||
columns.each_with_object(
|
||||
{ updates: [], conditions: [], skipped_sums: [], length_constrained_columns: [] },
|
||||
) do |column, parts|
|
||||
replace = %|REPLACE("#{column[:name]}", :from, :to)|
|
||||
replace = truncate(replace, table, column)
|
||||
|
||||
if column[:max_length].present?
|
||||
# Keep track of columns with length constraints for error messages
|
||||
parts[:length_constrained_columns] << "#{column[:name]}(#{column[:max_length]})"
|
||||
end
|
||||
|
||||
# Build SQL update statements for each column
|
||||
parts[:updates] << %("#{column[:name]}" = #{replace})
|
||||
|
||||
# Build the base SQL condition clause for each column
|
||||
basic_condition = %("#{column[:name]}" IS NOT NULL AND "#{column[:name]}" LIKE :pattern)
|
||||
|
||||
if skip_max_length_violations && column[:max_length].present?
|
||||
# Extend base condition to skip updates that would violate the column length constraint
|
||||
parts[
|
||||
:conditions
|
||||
] << "(#{basic_condition} AND LENGTH(#{replace}) <= #{column[:max_length]})"
|
||||
|
||||
# Build SQL sum statements for each column to count skipped updates.
|
||||
# This will helps us know the number of updates skipped due to length constraints
|
||||
# violations on this column
|
||||
parts[:skipped_sums] << <<~SQL
|
||||
SUM (
|
||||
CASE
|
||||
WHEN #{basic_condition} AND LENGTH(#{replace}) > #{column[:max_length]} THEN 1 ELSE 0
|
||||
END
|
||||
) AS #{column[:name]}_skipped
|
||||
SQL
|
||||
else
|
||||
parts[:conditions] << "(#{basic_condition})"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def self.log_remap_message(table, rows_updated, skipped_counts)
|
||||
return if rows_updated == 0 && skipped_counts.blank?
|
||||
|
||||
message = +"#{table}=#{rows_updated}"
|
||||
|
||||
if skipped_counts&.any?
|
||||
message << " SKIPPED: "
|
||||
message << skipped_counts
|
||||
.map do |column, count|
|
||||
"#{column.delete_suffix("_skipped")}: #{count} #{"update".pluralize(count)}"
|
||||
end
|
||||
.join(", ")
|
||||
end
|
||||
|
||||
puts message
|
||||
end
|
||||
|
||||
def self.skipped_remap_counts(table, from, to, pattern, query_parts, skip_max_length_violations)
|
||||
return unless skip_max_length_violations && query_parts[:skipped_sums].any?
|
||||
|
||||
skipped = DB.query_hash(<<~SQL, from: from, to: to, pattern: pattern).first
|
||||
SELECT #{query_parts[:skipped_sums].join(", ")}
|
||||
FROM \"#{table}\"
|
||||
SQL
|
||||
|
||||
skipped.select { |_, count| count.to_i > 0 }
|
||||
end
|
||||
|
||||
def self.raise_contextualized_remap_exception(error, table, columns)
|
||||
details = "columns with length constraints: #{columns.join(", ")}"
|
||||
|
||||
raise PG::StringDataRightTruncation, " #{error.message.strip} (table: #{table}, #{details})"
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,6 +2,11 @@
|
|||
|
||||
RSpec.describe DbHelper do
|
||||
describe ".remap" do
|
||||
fab!(:bookmark1) { Fabricate(:bookmark, name: "short-bookmark") }
|
||||
fab!(:bookmark2) { Fabricate(:bookmark, name: "another-bookmark") }
|
||||
let(:bookmark_name_limit) { Bookmark.columns_hash["name"].limit }
|
||||
let(:long_bookmark_name) { "a" * (bookmark_name_limit + 1) }
|
||||
|
||||
it "should remap columns properly" do
|
||||
post = Fabricate(:post, cooked: "this is a specialcode that I included")
|
||||
post_attributes = post.reload.attributes
|
||||
|
@ -44,6 +49,44 @@ RSpec.describe DbHelper do
|
|||
|
||||
DB.exec "DROP FUNCTION #{Migration::BaseDropper.readonly_function_name("posts", "cooked")} CASCADE"
|
||||
end
|
||||
|
||||
context "when skip_max_length_violations is false" do
|
||||
it "raises an exception if remap exceeds column length constraint by default" do
|
||||
expect { DbHelper.remap("bookmark", long_bookmark_name) }.to raise_error(
|
||||
PG::StringDataRightTruncation,
|
||||
/value too long.*table: bookmarks,.*name/,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
context "when skip_max_length_violations is true" do
|
||||
it "skips a remap eligible row if new value exceeds column length constraint" do
|
||||
DbHelper.remap("bookmark", long_bookmark_name, skip_max_length_violations: true)
|
||||
|
||||
bookmark1.reload
|
||||
bookmark2.reload
|
||||
|
||||
expect(bookmark1.name).to eq("short-bookmark")
|
||||
expect(bookmark2.name).to eq("another-bookmark")
|
||||
end
|
||||
|
||||
it "logs skipped remaps due to max length constraints when verbose is true" do
|
||||
expect {
|
||||
DbHelper.remap(
|
||||
"bookmark",
|
||||
long_bookmark_name,
|
||||
verbose: true,
|
||||
skip_max_length_violations: true,
|
||||
)
|
||||
}.to output(/SKIPPED:/).to_stdout
|
||||
|
||||
bookmark1.reload
|
||||
bookmark2.reload
|
||||
|
||||
expect(bookmark1.name).to eq("short-bookmark")
|
||||
expect(bookmark2.name).to eq("another-bookmark")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe ".regexp_replace" do
|
||||
|
|
Loading…
Reference in New Issue