discourse/lib/db_helper.rb
Alan Guo Xiang Tan 17e1bfe069
SECURITY: Preload data only when rendering application layout
This commit drops the `before_action :preload_json` callback in `ApplicationController` as it adds unnecessary complexity to `ApplicationController` as well as other controllers which has to skip this callback. The source of the complexity comes mainly from the following two conditionals in the `preload_json` method:

```
    # We don't preload JSON on xhr or JSON request
    return if request.xhr? || request.format.json?

    # if we are posting in makes no sense to preload
    return if request.method != "GET"
```

Basically, the conditionals solely exists for optimization purposes to ensure that we don't run the preloading code when the request is not a GET request and the response is not expected to be HTML. The key problem here is that the conditionals are trying to expect what the content type of the response will be and this has proven to be hard to get right. Instead, we can simplify this problem by running the preloading code in a more deterministic way which is to preload only when the `application` layout is being rendered and this is main change that this commit introduces.
2025-02-04 13:32:30 -03:00

274 lines
8.1 KiB
Ruby

# frozen_string_literal: true
require "migration/base_dropper"
class DbHelper
REMAP_SQL = <<~SQL
SELECT table_name::text, column_name::text, character_maximum_length
FROM information_schema.columns
WHERE table_schema = 'public'
AND is_updatable = 'YES'
AND (data_type LIKE 'char%' OR data_type LIKE 'text%')
ORDER BY table_name, column_name
SQL
TRIGGERS_SQL = <<~SQL
SELECT trigger_name::text
FROM information_schema.triggers
WHERE trigger_name LIKE '%_readonly'
SQL
TRUNCATABLE_COLUMNS = %w[bookmarks.name topic_links.url]
def self.remap(
from,
to,
anchor_left: false,
anchor_right: false,
excluded_tables: [],
verbose: false,
skip_max_length_violations: false
)
text_columns = find_text_columns(excluded_tables)
return if text_columns.empty?
transforms = {
replacement: ->(column_name) { %|REPLACE("#{column_name}", :from, :to)| },
condition: ->(column_name) do
%|"#{column_name}" IS NOT NULL AND "#{column_name}" LIKE :pattern|
end,
}
query_params = {
from: from,
to: to,
pattern: "#{anchor_left ? "" : "%"}#{from}#{anchor_right ? "" : "%"}",
}
text_columns.each do |table, columns|
query_parts =
build_transform_query_parts(table, columns, skip_max_length_violations, transforms)
begin
rows_updated = execute_transform(table, query_parts, query_params)
rescue PG::StringDataRightTruncation => e
# Provide more context in the exeption message
raise_contextualized_transform_exception(e, table, query_parts[:length_constrained_columns])
end
if verbose
skipped_counts =
skipped_transform_counts(table, query_parts, skip_max_length_violations, query_params)
log_transform_result(table, rows_updated, skipped_counts)
end
end
finish!
end
def self.regexp_replace(
pattern,
replacement,
flags: "gi",
match: "~*",
excluded_tables: [],
verbose: false,
skip_max_length_violations: false
)
text_columns = find_text_columns(excluded_tables)
return if text_columns.empty?
transforms = {
replacement: ->(column_name) do
%|REGEXP_REPLACE("#{column_name}", :pattern, :replacement, :flags)|
end,
condition: ->(column_name) do
%|"#{column_name}" IS NOT NULL AND "#{column_name}" #{match} :pattern|
end,
}
query_params = { pattern: pattern, replacement: replacement, flags: flags }
text_columns.each do |table, columns|
query_parts =
build_transform_query_parts(table, columns, skip_max_length_violations, transforms)
begin
rows_updated = execute_transform(table, query_parts, query_params)
rescue PG::StringDataRightTruncation => e
# Provide more context in the exeption message
raise_contextualized_transform_exception(e, table, query_parts[:length_constrained_columns])
end
if verbose
skipped_counts =
skipped_transform_counts(table, query_parts, skip_max_length_violations, query_params)
log_transform_result(table, rows_updated, skipped_counts)
end
end
finish!
end
def self.find(needle, anchor_left: false, anchor_right: false, excluded_tables: [])
found = {}
like = "#{anchor_left ? "" : "%"}#{needle}#{anchor_right ? "" : "%"}"
DB
.query(REMAP_SQL)
.each do |r|
next if excluded_tables.include?(r.table_name)
rows = DB.query(<<~SQL, like: like)
SELECT \"#{r.column_name}\"
FROM \"#{r.table_name}\"
WHERE \"#{r.column_name}\" LIKE :like
SQL
if rows.size > 0
found["#{r.table_name}.#{r.column_name}"] = rows.map do |row|
row.public_send(r.column_name)
end
end
end
found
end
private
def self.finish!
SiteSetting.refresh!
Theme.expire_site_cache!
SiteIconManager.ensure_optimized!
ApplicationLayoutPreloader.banner_json_cache.clear
end
def self.find_text_columns(excluded_tables)
triggers = DB.query(TRIGGERS_SQL).map(&:trigger_name).to_set
text_columns = Hash.new { |h, k| h[k] = [] }
DB
.query(REMAP_SQL)
.each do |r|
if excluded_tables.include?(r.table_name) ||
triggers.include?(
Migration::BaseDropper.readonly_trigger_name(r.table_name, r.column_name),
) || triggers.include?(Migration::BaseDropper.readonly_trigger_name(r.table_name))
next
end
text_columns[r.table_name] << {
name: r.column_name,
max_length: r.character_maximum_length,
}
end
text_columns
end
def self.truncate(sql, table, column)
if column[:max_length] && TRUNCATABLE_COLUMNS.include?("#{table}.#{column[:name]}")
"LEFT(#{sql}, #{column[:max_length]})"
else
sql
end
end
def self.build_transform_query_parts(table, columns, skip_max_length_violations, transforms)
columns.each_with_object(
{ updates: [], conditions: [], skipped_sums: [], length_constrained_columns: [] },
) do |column, parts|
replace = transforms[:replacement].call(column[:name])
replace = truncate(replace, table, column)
if column[:max_length].present?
# Keep track of columns with length constraints for error messages
parts[:length_constrained_columns] << "#{column[:name]}(#{column[:max_length]})"
end
# Build SQL update statements for each column
parts[:updates] << %|"#{column[:name]}" = #{replace}|
# Build the base SQL condition clause for each column
basic_condition = transforms[:condition].call(column[:name])
if skip_max_length_violations && column[:max_length].present?
# Extend base condition to skip updates that would violate the column length constraint
parts[
:conditions
] << "(#{basic_condition} AND LENGTH(#{replace}) <= #{column[:max_length]})"
# Build SQL sum statements for each column to count skipped updates.
# This will helps us know the number of updates skipped due to length constraints
# violations on this column
parts[:skipped_sums] << <<~SQL
SUM (
CASE
WHEN #{basic_condition} AND LENGTH(#{replace}) > #{column[:max_length]} THEN 1 ELSE 0
END
) AS #{column[:name]}_skipped
SQL
else
parts[:conditions] << "(#{basic_condition})"
end
end
end
def self.skipped_transform_counts(table, query_parts, skip_max_length_violations, params)
return unless skip_max_length_violations && query_parts[:skipped_sums].any?
skipped = DB.query_hash(<<~SQL, params).first
SELECT #{query_parts[:skipped_sums].join(", ")}
FROM "#{table}"
SQL
skipped.select { |_, count| count.to_i > 0 }
end
def self.log_transform_result(table, rows_updated, skipped_counts)
return if rows_updated == 0 && skipped_counts.blank?
message = +"#{table}=#{rows_updated}"
if skipped_counts&.any?
message << " SKIPPED: "
message << skipped_counts
.map do |column, count|
"#{column.delete_suffix("_skipped")}: #{count} #{"update".pluralize(count)}"
end
.join(", ")
end
puts message
end
def self.execute_transform(table, query_parts, params)
DB.exec(<<~SQL, params)
UPDATE "#{table}"
SET #{query_parts[:updates].join(", ")}
WHERE #{query_parts[:conditions].join(" OR ")}
SQL
end
def self.skipped_remap_counts(table, from, to, pattern, query_parts, skip_max_length_violations)
return unless skip_max_length_violations && query_parts[:skipped_sums].any?
skipped = DB.query_hash(<<~SQL, from: from, to: to, pattern: pattern).first
SELECT #{query_parts[:skipped_sums].join(", ")}
FROM \"#{table}\"
SQL
skipped.select { |_, count| count.to_i > 0 }
end
def self.raise_contextualized_transform_exception(error, table, columns)
details = "columns with length constraints: #{columns.join(", ")}"
raise PG::StringDataRightTruncation, " #{error.message.strip} (table: #{table}, #{details})"
end
end