From d716e32a32351026a5774bb038ab472172845831 Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 29 Jul 2022 16:27:52 +1000 Subject: [PATCH] FIX: bots could generate errors when slug generation method is encoded (#17224) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * FIX: bots could generate errors when slug generation method is encoded When slug generation method is encoded (non default) then bots could cause errors in the logs for urls containing special chars. ó for example in a URL can be requested in a valid ASCII-8BIT string, and later when joined to UTF-8 would result in encoding issues. Fix here ensures we force encoding correctly for outlier cases. Browser tend to always encode these chars, hence we did not notice this. Co-authored-by: Jarek Radosz --- app/helpers/application_helper.rb | 15 ++++++++++----- spec/helpers/application_helper_spec.rb | 11 ++++++++++- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/app/helpers/application_helper.rb b/app/helpers/application_helper.rb index 487f59ca470..b478e56a309 100644 --- a/app/helpers/application_helper.rb +++ b/app/helpers/application_helper.rb @@ -264,6 +264,13 @@ module ApplicationHelper opts ||= {} opts[:url] ||= "#{Discourse.base_url_no_prefix}#{request.fullpath}" + # if slug generation method is encoded, non encoded urls can sneak in + # via bots + url = opts[:url] + if url.encoding.name != "UTF-8" || !url.valid_encoding? + opts[:url] = url.dup.force_encoding("UTF-8").scrub! + end + if opts[:image].blank? twitter_summary_large_image_url = SiteSetting.site_twitter_summary_large_image_url @@ -279,12 +286,12 @@ module ApplicationHelper opts[:twitter_summary_large_image] = get_absolute_image_url(opts[:twitter_summary_large_image]) if opts[:twitter_summary_large_image].present? - # Add opengraph & twitter tags result = [] result << tag(:meta, property: 'og:site_name', content: SiteSetting.title) result << tag(:meta, property: 'og:type', content: 'website') - result = generate_twitter_card_metadata(opts, result) + generate_twitter_card_metadata(result, opts) + result << tag(:meta, property: "og:image", content: opts[:image]) if opts[:image].present? [:url, :title, :description].each do |property| @@ -313,7 +320,7 @@ module ApplicationHelper result.join("\n") end - def generate_twitter_card_metadata(opts, result) + private def generate_twitter_card_metadata(result, opts) img_url = opts[:twitter_summary_large_image].present? ? \ opts[:twitter_summary_large_image] : opts[:image] @@ -332,8 +339,6 @@ module ApplicationHelper else result << tag(:meta, name: 'twitter:card', content: "summary") end - - result end def render_sitelinks_search_tag diff --git a/spec/helpers/application_helper_spec.rb b/spec/helpers/application_helper_spec.rb index a972e62cdcd..1dac777d6ec 100644 --- a/spec/helpers/application_helper_spec.rb +++ b/spec/helpers/application_helper_spec.rb @@ -161,7 +161,7 @@ RSpec.describe ApplicationHelper do context "when dark theme is present" do before do - dark_theme = Theme.create( + _dark_theme = Theme.create( name: "Dark", user_id: -1, color_scheme_id: ColorScheme.find_by(base_scheme_id: "Dark").id @@ -418,6 +418,15 @@ RSpec.describe ApplicationHelper do end describe 'crawlable_meta_data' do + + it 'Supports ASCII URLs with odd chars' do + result = helper.crawlable_meta_data( + url: (+"http://localhost/ión").force_encoding("ASCII-8BIT").freeze + ) + + expect(result).to include("ión") + end + context "opengraph image" do it 'returns the correct image' do SiteSetting.opengraph_image = Fabricate(:upload,