diff --git a/lib/text_cleaner.rb b/lib/text_cleaner.rb index 09160f3f3ef..0df1c099e13 100644 --- a/lib/text_cleaner.rb +++ b/lib/text_cleaner.rb @@ -20,7 +20,8 @@ class TextCleaner remove_extraneous_space: SiteSetting.title_prettify && SiteSetting.title_remove_extraneous_space, fixes_interior_spaces: true, strip_whitespaces: true, - strip_zero_width_spaces: true + strip_zero_width_spaces: true, + case_option: SiteSetting.default_locale == "tr_TR" ? :turkic : nil } end @@ -33,27 +34,37 @@ class TextCleaner # Remove invalid byte sequences text.scrub!("") + # Replace !!!!! with a single ! text.gsub!(/!+/, '!') if opts[:deduplicate_exclamation_marks] + # Replace ????? with a single ? text.gsub!(/\?+/, '?') if opts[:deduplicate_question_marks] + # Replace all-caps text with regular case letters - text = text.mb_chars.downcase.to_s if opts[:replace_all_upper_case] && (text == text.mb_chars.upcase) + text = downcase(text.mb_chars, opts).to_s if opts[:replace_all_upper_case] && (text == upcase(text.mb_chars, opts)) + # Capitalize first letter, but only when entire first word is lowercase first, rest = text.split(' ', 2) - if first && opts[:capitalize_first_letter] && first == first.mb_chars.downcase - text = +"#{first.mb_chars.capitalize}#{rest ? ' ' + rest : ''}" + if first && opts[:capitalize_first_letter] && first == downcase(first.mb_chars, opts) + text = +"#{capitalize(first.mb_chars, opts)}#{rest ? ' ' + rest : ''}" end + # Remove unnecessary periods at the end text.sub!(/([^.])\.+(\s*)\z/, '\1\2') if opts[:remove_all_periods_from_the_end] + # Remove extraneous space before the end punctuation text.sub!(/\s+([!?]\s*)\z/, '\1') if opts[:remove_extraneous_space] + # Fixes interior spaces text.gsub!(/ +/, ' ') if opts[:fixes_interior_spaces] + # Normalize whitespaces text = normalize_whitespaces(text) + # Strip whitespaces text.strip! if opts[:strip_whitespaces] + # Strip zero width spaces text.gsub!(/\u200b/, '') if opts[:strip_zero_width_spaces] @@ -66,4 +77,15 @@ class TextCleaner text&.gsub(@@whitespaces_regexp, ' ') end + def self.downcase(text, opts) + opts[:case_option] ? text.downcase(opts[:case_option]) : text.downcase + end + + def self.upcase(text, opts) + opts[:case_option] ? text.upcase(opts[:case_option]) : text.upcase + end + + def self.capitalize(text, opts) + opts[:case_option] ? text.capitalize(opts[:case_option]) : text.capitalize + end end diff --git a/spec/components/text_cleaner_spec.rb b/spec/components/text_cleaner_spec.rb index f5293991706..ad183a70112 100644 --- a/spec/components/text_cleaner_spec.rb +++ b/spec/components/text_cleaner_spec.rb @@ -215,6 +215,14 @@ describe TextCleaner do expect(TextCleaner.clean_title("épico encontro")).to eq("Épico encontro") end + it "correctly cleans Turkish characters" do + expect(TextCleaner.clean_title("GIDA")).to eq("Gida") + expect(TextCleaner.clean_title("istanbul")).to eq("Istanbul") + + SiteSetting.default_locale = "tr_TR" + expect(TextCleaner.clean_title("GIDA")).to eq("Gıda") + expect(TextCleaner.clean_title("istanbul")).to eq("İstanbul") + end end end