DEV: Gracefully handle remaps which violate DB column constraints (#29501)

* DEV: Gracefully handle remaps which violate DB column constraints

This change implements length constraint enforcement to skip remaps
which exceed column max lengths

* DEV: Only perform skipped column stats lookup when verbose is true

* DEV: Tidy up specs

* DEV: Make skipping violating remap behaviour opt-in

This change introduces a new `skip_max_length_violations` param for
`remap`, set to `false` by default to ensure we still continue to fail
hard when max lenth constraints are violated.

To aid in quick resolution when remaps fail, this change also
adds more context to the exception message to include the offending table
and column information

* Apply suggestions from code review

Co-authored-by: Gerhard Schlager <gerhard.schlager@discourse.org>

* FIX: Various fixes

- Linter errors
- Remap status "logger" early return condition

---------

Co-authored-by: Gerhard Schlager <gerhard.schlager@discourse.org>
This commit is contained in:
Selase Krakani 2024-11-15 10:42:25 +00:00 committed by GitHub
parent a9d6aba427
commit 9b0cfa99c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 139 additions and 20 deletions

View File

@ -26,33 +26,35 @@ class DbHelper
anchor_left: false,
anchor_right: false,
excluded_tables: [],
verbose: false
verbose: false,
skip_max_length_violations: false
)
like = "#{anchor_left ? "" : "%"}#{from}#{anchor_right ? "" : "%"}"
text_columns = find_text_columns(excluded_tables)
return if text_columns.empty?
pattern = "#{anchor_left ? "" : "%"}#{from}#{anchor_right ? "" : "%"}"
text_columns.each do |table, columns|
set =
columns
.map do |column|
replace = "REPLACE(\"#{column[:name]}\", :from, :to)"
replace = truncate(replace, table, column)
"\"#{column[:name]}\" = #{replace}"
end
.join(", ")
query_parts = build_remap_query_parts(table, columns, skip_max_length_violations)
where =
columns
.map { |column| "\"#{column[:name]}\" IS NOT NULL AND \"#{column[:name]}\" LIKE :like" }
.join(" OR ")
begin
rows_updated = DB.exec(<<~SQL, from: from, to: to, pattern: pattern)
UPDATE \"#{table}\"
SET #{query_parts[:updates].join(", ")}
WHERE #{query_parts[:conditions].join(" OR ")}
SQL
rescue PG::StringDataRightTruncation => e
# Provide more context in the exeption message
raise_contextualized_remap_exception(e, table, query_parts[:length_constrained_columns])
end
rows = DB.exec(<<~SQL, from: from, to: to, like: like)
UPDATE \"#{table}\"
SET #{set}
WHERE #{where}
SQL
if verbose
skipped_counts =
skipped_remap_counts(table, from, to, pattern, query_parts, skip_max_length_violations)
puts "#{table}=#{rows}" if verbose && rows > 0
log_remap_message(table, rows_updated, skipped_counts)
end
end
finish!
@ -161,4 +163,78 @@ class DbHelper
sql
end
end
def self.build_remap_query_parts(table, columns, skip_max_length_violations)
columns.each_with_object(
{ updates: [], conditions: [], skipped_sums: [], length_constrained_columns: [] },
) do |column, parts|
replace = %|REPLACE("#{column[:name]}", :from, :to)|
replace = truncate(replace, table, column)
if column[:max_length].present?
# Keep track of columns with length constraints for error messages
parts[:length_constrained_columns] << "#{column[:name]}(#{column[:max_length]})"
end
# Build SQL update statements for each column
parts[:updates] << %("#{column[:name]}" = #{replace})
# Build the base SQL condition clause for each column
basic_condition = %("#{column[:name]}" IS NOT NULL AND "#{column[:name]}" LIKE :pattern)
if skip_max_length_violations && column[:max_length].present?
# Extend base condition to skip updates that would violate the column length constraint
parts[
:conditions
] << "(#{basic_condition} AND LENGTH(#{replace}) <= #{column[:max_length]})"
# Build SQL sum statements for each column to count skipped updates.
# This will helps us know the number of updates skipped due to length constraints
# violations on this column
parts[:skipped_sums] << <<~SQL
SUM (
CASE
WHEN #{basic_condition} AND LENGTH(#{replace}) > #{column[:max_length]} THEN 1 ELSE 0
END
) AS #{column[:name]}_skipped
SQL
else
parts[:conditions] << "(#{basic_condition})"
end
end
end
def self.log_remap_message(table, rows_updated, skipped_counts)
return if rows_updated == 0 && skipped_counts.blank?
message = +"#{table}=#{rows_updated}"
if skipped_counts&.any?
message << " SKIPPED: "
message << skipped_counts
.map do |column, count|
"#{column.delete_suffix("_skipped")}: #{count} #{"update".pluralize(count)}"
end
.join(", ")
end
puts message
end
def self.skipped_remap_counts(table, from, to, pattern, query_parts, skip_max_length_violations)
return unless skip_max_length_violations && query_parts[:skipped_sums].any?
skipped = DB.query_hash(<<~SQL, from: from, to: to, pattern: pattern).first
SELECT #{query_parts[:skipped_sums].join(", ")}
FROM \"#{table}\"
SQL
skipped.select { |_, count| count.to_i > 0 }
end
def self.raise_contextualized_remap_exception(error, table, columns)
details = "columns with length constraints: #{columns.join(", ")}"
raise PG::StringDataRightTruncation, " #{error.message.strip} (table: #{table}, #{details})"
end
end

View File

@ -2,6 +2,11 @@
RSpec.describe DbHelper do
describe ".remap" do
fab!(:bookmark1) { Fabricate(:bookmark, name: "short-bookmark") }
fab!(:bookmark2) { Fabricate(:bookmark, name: "another-bookmark") }
let(:bookmark_name_limit) { Bookmark.columns_hash["name"].limit }
let(:long_bookmark_name) { "a" * (bookmark_name_limit + 1) }
it "should remap columns properly" do
post = Fabricate(:post, cooked: "this is a specialcode that I included")
post_attributes = post.reload.attributes
@ -44,6 +49,44 @@ RSpec.describe DbHelper do
DB.exec "DROP FUNCTION #{Migration::BaseDropper.readonly_function_name("posts", "cooked")} CASCADE"
end
context "when skip_max_length_violations is false" do
it "raises an exception if remap exceeds column length constraint by default" do
expect { DbHelper.remap("bookmark", long_bookmark_name) }.to raise_error(
PG::StringDataRightTruncation,
/value too long.*table: bookmarks,.*name/,
)
end
end
context "when skip_max_length_violations is true" do
it "skips a remap eligible row if new value exceeds column length constraint" do
DbHelper.remap("bookmark", long_bookmark_name, skip_max_length_violations: true)
bookmark1.reload
bookmark2.reload
expect(bookmark1.name).to eq("short-bookmark")
expect(bookmark2.name).to eq("another-bookmark")
end
it "logs skipped remaps due to max length constraints when verbose is true" do
expect {
DbHelper.remap(
"bookmark",
long_bookmark_name,
verbose: true,
skip_max_length_violations: true,
)
}.to output(/SKIPPED:/).to_stdout
bookmark1.reload
bookmark2.reload
expect(bookmark1.name).to eq("short-bookmark")
expect(bookmark2.name).to eq("another-bookmark")
end
end
end
describe ".regexp_replace" do