Merge pull request #4062 from xfalcox/patch-6
FIX: Properly downcase unicode chars
This commit is contained in:
commit
7c888c9a77
|
@ -1,6 +1,10 @@
|
|||
#
|
||||
# Clean up a text
|
||||
#
|
||||
|
||||
# Whe use ActiveSupport mb_chars from here to properly support non ascii downcase
|
||||
require 'active_support/core_ext/string/multibyte'
|
||||
|
||||
class TextCleaner
|
||||
|
||||
def self.title_options
|
||||
|
@ -27,9 +31,12 @@ class TextCleaner
|
|||
# Replace ????? with a single ?
|
||||
text.gsub!(/\?+/, '?') if opts[:deduplicate_question_marks]
|
||||
# Replace all-caps text with regular case letters
|
||||
text.tr!('A-Z', 'a-z') if opts[:replace_all_upper_case] && (text =~ /[A-Z]+/) && (text == text.upcase)
|
||||
text = text.mb_chars.downcase.to_s if opts[:replace_all_upper_case] && (text =~ /[A-Z]+/) && (text == text.upcase)
|
||||
# Capitalize first letter, but only when entire first word is lowercase
|
||||
text.sub!(/\A([a-z]*)\b/) { |first| first.capitalize } if opts[:capitalize_first_letter]
|
||||
first, rest = text.split(' ', 2)
|
||||
if first && opts[:capitalize_first_letter] && first == first.mb_chars.downcase
|
||||
text = "#{first.mb_chars.capitalize}#{rest ? ' ' + rest : ''}"
|
||||
end
|
||||
# Remove unnecessary periods at the end
|
||||
text.sub!(/([^.])\.+(\s*)\z/, '\1\2') if opts[:remove_all_periods_from_the_end]
|
||||
# Remove extraneous space before the end punctuation
|
||||
|
|
|
@ -187,6 +187,14 @@ describe TextCleaner do
|
|||
expect(TextCleaner.clean_title("Hello there ?")).to eq("Hello there?")
|
||||
end
|
||||
|
||||
it "replaces all upper case unicode text with regular unicode case letters" do
|
||||
expect(TextCleaner.clean_title("INVESTIGAÇÃO POLÍTICA NA CÂMARA")).to eq("Investigação política na câmara")
|
||||
end
|
||||
|
||||
it "capitalizes first unicode letter" do
|
||||
expect(TextCleaner.clean_title("épico encontro")).to eq("Épico encontro")
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue