Merge pull request #4062 from xfalcox/patch-6

FIX: Properly downcase unicode chars
This commit is contained in:
Sam 2016-05-08 21:39:00 +10:00
commit 7c888c9a77
2 changed files with 17 additions and 2 deletions

View File

@ -1,6 +1,10 @@
#
# Clean up a text
#
# Whe use ActiveSupport mb_chars from here to properly support non ascii downcase
require 'active_support/core_ext/string/multibyte'
class TextCleaner
def self.title_options
@ -27,9 +31,12 @@ class TextCleaner
# Replace ????? with a single ?
text.gsub!(/\?+/, '?') if opts[:deduplicate_question_marks]
# Replace all-caps text with regular case letters
text.tr!('A-Z', 'a-z') if opts[:replace_all_upper_case] && (text =~ /[A-Z]+/) && (text == text.upcase)
text = text.mb_chars.downcase.to_s if opts[:replace_all_upper_case] && (text =~ /[A-Z]+/) && (text == text.upcase)
# Capitalize first letter, but only when entire first word is lowercase
text.sub!(/\A([a-z]*)\b/) { |first| first.capitalize } if opts[:capitalize_first_letter]
first, rest = text.split(' ', 2)
if first && opts[:capitalize_first_letter] && first == first.mb_chars.downcase
text = "#{first.mb_chars.capitalize}#{rest ? ' ' + rest : ''}"
end
# Remove unnecessary periods at the end
text.sub!(/([^.])\.+(\s*)\z/, '\1\2') if opts[:remove_all_periods_from_the_end]
# Remove extraneous space before the end punctuation

View File

@ -187,6 +187,14 @@ describe TextCleaner do
expect(TextCleaner.clean_title("Hello there ?")).to eq("Hello there?")
end
it "replaces all upper case unicode text with regular unicode case letters" do
expect(TextCleaner.clean_title("INVESTIGAÇÃO POLÍTICA NA CÂMARA")).to eq("Investigação política na câmara")
end
it "capitalizes first unicode letter" do
expect(TextCleaner.clean_title("épico encontro")).to eq("Épico encontro")
end
end
end