FEATURE: Correctly convert topic title to uppercase and lowercase for Turkish default locale (#13115)

This commit is contained in:
Gerhard Schlager 2021-05-24 10:13:30 +02:00 committed by GitHub
parent 567f9caf75
commit 332ae97555
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 4 deletions

View File

@ -20,7 +20,8 @@ class TextCleaner
remove_extraneous_space: SiteSetting.title_prettify && SiteSetting.title_remove_extraneous_space,
fixes_interior_spaces: true,
strip_whitespaces: true,
strip_zero_width_spaces: true
strip_zero_width_spaces: true,
case_option: SiteSetting.default_locale == "tr_TR" ? :turkic : nil
}
end
@ -33,27 +34,37 @@ class TextCleaner
# Remove invalid byte sequences
text.scrub!("")
# Replace !!!!! with a single !
text.gsub!(/!+/, '!') if opts[:deduplicate_exclamation_marks]
# Replace ????? with a single ?
text.gsub!(/\?+/, '?') if opts[:deduplicate_question_marks]
# Replace all-caps text with regular case letters
text = text.mb_chars.downcase.to_s if opts[:replace_all_upper_case] && (text == text.mb_chars.upcase)
text = downcase(text.mb_chars, opts).to_s if opts[:replace_all_upper_case] && (text == upcase(text.mb_chars, opts))
# Capitalize first letter, but only when entire first word is lowercase
first, rest = text.split(' ', 2)
if first && opts[:capitalize_first_letter] && first == first.mb_chars.downcase
text = +"#{first.mb_chars.capitalize}#{rest ? ' ' + rest : ''}"
if first && opts[:capitalize_first_letter] && first == downcase(first.mb_chars, opts)
text = +"#{capitalize(first.mb_chars, opts)}#{rest ? ' ' + rest : ''}"
end
# Remove unnecessary periods at the end
text.sub!(/([^.])\.+(\s*)\z/, '\1\2') if opts[:remove_all_periods_from_the_end]
# Remove extraneous space before the end punctuation
text.sub!(/\s+([!?]\s*)\z/, '\1') if opts[:remove_extraneous_space]
# Fixes interior spaces
text.gsub!(/ +/, ' ') if opts[:fixes_interior_spaces]
# Normalize whitespaces
text = normalize_whitespaces(text)
# Strip whitespaces
text.strip! if opts[:strip_whitespaces]
# Strip zero width spaces
text.gsub!(/\u200b/, '') if opts[:strip_zero_width_spaces]
@ -66,4 +77,15 @@ class TextCleaner
text&.gsub(@@whitespaces_regexp, ' ')
end
def self.downcase(text, opts)
opts[:case_option] ? text.downcase(opts[:case_option]) : text.downcase
end
def self.upcase(text, opts)
opts[:case_option] ? text.upcase(opts[:case_option]) : text.upcase
end
def self.capitalize(text, opts)
opts[:case_option] ? text.capitalize(opts[:case_option]) : text.capitalize
end
end

View File

@ -215,6 +215,14 @@ describe TextCleaner do
expect(TextCleaner.clean_title("épico encontro")).to eq("Épico encontro")
end
it "correctly cleans Turkish characters" do
expect(TextCleaner.clean_title("GIDA")).to eq("Gida")
expect(TextCleaner.clean_title("istanbul")).to eq("Istanbul")
SiteSetting.default_locale = "tr_TR"
expect(TextCleaner.clean_title("GIDA")).to eq("Gıda")
expect(TextCleaner.clean_title("istanbul")).to eq("İstanbul")
end
end
end