discourse/spec/components/text_cleaner_spec.rb

require 'rails_helper'
require 'text_cleaner'

describe TextCleaner do

  context "exclamation marks" do

    let(:duplicated_string) { "my precious!!!!" }
    let(:deduplicated_string) { "my precious!" }

    it "ignores multiple ! by default" do
      expect(TextCleaner.clean(duplicated_string)).to eq(duplicated_string)
    end

    it "deduplicates ! when enabled" do
      expect(TextCleaner.clean(duplicated_string, deduplicate_exclamation_marks: true)).to eq(deduplicated_string)
    end

  end

  context "question marks" do

    let(:duplicated_string) { "please help me????" }
    let(:deduplicated_string) { "please help me?" }

    it "ignores multiple ? by default" do
      expect(TextCleaner.clean(duplicated_string)).to eq(duplicated_string)
    end

    it "deduplicates ? when enabled" do
      expect(TextCleaner.clean(duplicated_string, deduplicate_question_marks: true)).to eq(deduplicated_string)
    end

  end

  context "all upper case text" do

    let(:all_caps) { "ENTIRE TEXT IS ALL CAPS" }
    let(:almost_all_caps) { "ENTIRE TEXT iS ALL CAPS" }
    let(:regular_case) { "entire text is all caps" }

    it "ignores all upper case text by default" do
      expect(TextCleaner.clean(all_caps)).to eq(all_caps)
    end

    it "replaces all upper case text with regular case letters when enabled" do
      expect(TextCleaner.clean(all_caps, replace_all_upper_case: true)).to eq(regular_case)
    end

    it "ignores almost all upper case text when enabled" do
      expect(TextCleaner.clean(almost_all_caps, replace_all_upper_case: true)).to eq(almost_all_caps)
    end

  end

  context "first letter" do

    let(:lowercased) { "this is awesome" }
    let(:capitalized) { "This is awesome" }
    let(:iletter) { "iLetter" }

    it "ignores first letter case by default" do
      expect(TextCleaner.clean(lowercased)).to eq(lowercased)
      expect(TextCleaner.clean(capitalized)).to eq(capitalized)
      expect(TextCleaner.clean(iletter)).to eq(iletter)
    end

    it "capitalizes first letter when enabled" do
      expect(TextCleaner.clean(lowercased, capitalize_first_letter: true)).to eq(capitalized)
      expect(TextCleaner.clean(capitalized, capitalize_first_letter: true)).to eq(capitalized)
      expect(TextCleaner.clean(iletter, capitalize_first_letter: true)).to eq(iletter)
    end

  end

  context "periods at the end" do

    let(:with_one_period) { "oops." }
    let(:with_several_periods) { "oops..." }
    let(:without_period) { "oops" }

    it "ignores unnecessary periods at the end by default" do
      expect(TextCleaner.clean(with_one_period)).to eq(with_one_period)
      expect(TextCleaner.clean(with_several_periods)).to eq(with_several_periods)
    end

    it "removes unnecessary periods at the end when enabled" do
      expect(TextCleaner.clean(with_one_period, remove_all_periods_from_the_end: true)).to eq(without_period)
      expect(TextCleaner.clean(with_several_periods, remove_all_periods_from_the_end: true)).to eq(without_period)
    end

    it "keeps trailing whitespaces when enabled" do
      expect(TextCleaner.clean(with_several_periods + " ", remove_all_periods_from_the_end: true)).to eq(without_period + " ")
    end

  end

  context "extraneous space" do

    let(:with_space_exclamation) { "oops !" }
    let(:without_space_exclamation) { "oops!" }
    let(:with_space_question) { "oops ?" }
    let(:without_space_question) { "oops?" }

    it "ignores extraneous space before the end punctuation by default" do
      expect(TextCleaner.clean(with_space_exclamation)).to eq(with_space_exclamation)
      expect(TextCleaner.clean(with_space_question)).to eq(with_space_question)
    end

    it "removes extraneous space before the end punctuation when enabled" do
      expect(TextCleaner.clean(with_space_exclamation, remove_extraneous_space: true)).to eq(without_space_exclamation)
      expect(TextCleaner.clean(with_space_question, remove_extraneous_space: true)).to eq(without_space_question)
    end

    it "keep trailing whitespaces when enabled" do
      expect(TextCleaner.clean(with_space_exclamation + " ", remove_extraneous_space: true)).to eq(without_space_exclamation + " ")
      expect(TextCleaner.clean(with_space_question + " ", remove_extraneous_space: true)).to eq(without_space_question + " ")
    end

  end

  context "interior spaces" do

    let(:spacey_string) { "hello     there's weird     spaces here." }
    let(:unspacey_string) { "hello there's weird spaces here." }

    it "ignores interior spaces by default" do
      expect(TextCleaner.clean(spacey_string)).to eq(spacey_string)
    end

    it "fixes interior spaces when enabled" do
      expect(TextCleaner.clean(spacey_string, fixes_interior_spaces: true)).to eq(unspacey_string)
    end

  end

  context "leading and trailing whitespaces" do

    let(:spacey_string) { "   \t  test \n  " }
    let(:unspacey_string) { "test" }

    it "ignores leading and trailing whitespaces by default" do
      expect(TextCleaner.clean(spacey_string)).to eq(spacey_string)
    end

    it "strips leading and trailing whitespaces when enabled" do
      expect(TextCleaner.clean(spacey_string, strip_whitespaces: true)).to eq(unspacey_string)
    end

  end

  context "title" do

    it "fixes interior spaces" do
      expect(TextCleaner.clean_title("Hello   there")).to eq("Hello there")
    end

    it "strips leading and trailing whitespaces" do
      expect(TextCleaner.clean_title(" \t Hello there \n ")).to eq("Hello there")
    end

    context "title_prettify site setting is enabled" do

      before { SiteSetting.title_prettify = true }

      it "deduplicates !" do
        expect(TextCleaner.clean_title("Hello there!!!!")).to eq("Hello there!")
      end

      it "deduplicates ?" do
        expect(TextCleaner.clean_title("Hello there????")).to eq("Hello there?")
      end

      it "replaces all upper case text with regular case letters" do
        expect(TextCleaner.clean_title("HELLO THERE")).to eq("Hello there")
      end

      it "capitalizes first letter" do
        expect(TextCleaner.clean_title("hello there")).to eq("Hello there")
      end

      it "removes unnecessary period at the end" do
        expect(TextCleaner.clean_title("Hello there.")).to eq("Hello there")
      end

      it "removes extraneous space before the end punctuation" do
        expect(TextCleaner.clean_title("Hello there ?")).to eq("Hello there?")
      end

      it "replaces all upper case unicode text with regular unicode case letters" do
        expect(TextCleaner.clean_title("INVESTIGAÇÃO POLÍTICA NA CÂMARA")).to eq("Investigação política na câmara")
      end

      it "capitalizes first unicode letter" do
        expect(TextCleaner.clean_title("épico encontro")).to eq("Épico encontro")
      end
      
    end

  end

  describe "#normalize_whitespaces" do
    it "normalize whitespaces" do
      whitespaces = "\u0020\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000"
      expect(whitespaces.strip).not_to eq("")
      expect(TextCleaner.normalize_whitespaces(whitespaces).strip).to eq("")
    end

    it "does not muck with zero width white space" do
      # this is used for khmer, dont mess with it
      expect(TextCleaner.normalize_whitespaces("hello\u200Bworld").strip).to eq("hello\u200Bworld")
      expect(TextCleaner.normalize_whitespaces("hello\uFEFFworld").strip).to eq("hello\uFEFFworld")

    end
  end

end
Prepare for separation of RSpec helper files Since rspec-rails 3, the default installation creates two helper files: * `spec_helper.rb` * `rails_helper.rb` `spec_helper.rb` is intended as a way of running specs that do not require Rails, whereas `rails_helper.rb` loads Rails (as Discourse's current `spec_helper.rb` does). For more information: https://www.relishapp.com/rspec/rspec-rails/docs/upgrade#default-helper-files In this commit, I've simply replaced all instances of `spec_helper` with `rails_helper`, and renamed the original `spec_helper.rb`. This brings the Discourse project closer to the standard usage of RSpec in a Rails app. At present, every spec relies on loading Rails, but there are likely many that don't need to. In a future pull request, I hope to introduce a separate, minimal `spec_helper.rb` which can be used in tests which don't rely on Rails. 2015-10-11 05:41:23 -04:00			`require 'rails_helper'`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`require 'text_cleaner'`

			`describe TextCleaner do`

			`context "exclamation marks" do`

			`let(:duplicated_string) { "my precious!!!!" }`
			`let(:deduplicated_string) { "my precious!" }`

			`it "ignores multiple ! by default" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(duplicated_string)).to eq(duplicated_string)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "deduplicates ! when enabled" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(duplicated_string, deduplicate_exclamation_marks: true)).to eq(deduplicated_string)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`end`

			`context "question marks" do`

			`let(:duplicated_string) { "please help me????" }`
			`let(:deduplicated_string) { "please help me?" }`

			`it "ignores multiple ? by default" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(duplicated_string)).to eq(duplicated_string)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "deduplicates ? when enabled" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(duplicated_string, deduplicate_question_marks: true)).to eq(deduplicated_string)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`end`

			`context "all upper case text" do`

			`let(:all_caps) { "ENTIRE TEXT IS ALL CAPS" }`
			`let(:almost_all_caps) { "ENTIRE TEXT iS ALL CAPS" }`
			`let(:regular_case) { "entire text is all caps" }`

			`it "ignores all upper case text by default" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(all_caps)).to eq(all_caps)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "replaces all upper case text with regular case letters when enabled" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(all_caps, replace_all_upper_case: true)).to eq(regular_case)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "ignores almost all upper case text when enabled" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(almost_all_caps, replace_all_upper_case: true)).to eq(almost_all_caps)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`end`

			`context "first letter" do`

			`let(:lowercased) { "this is awesome" }`
			`let(:capitalized) { "This is awesome" }`
Thread title fixer should ignore special cases like iLetter 2013-05-23 15:31:08 -04:00			`let(:iletter) { "iLetter" }`
auto replace rules in titles 2013-04-10 05:00:50 -04:00
			`it "ignores first letter case by default" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(lowercased)).to eq(lowercased)`
			`expect(TextCleaner.clean(capitalized)).to eq(capitalized)`
			`expect(TextCleaner.clean(iletter)).to eq(iletter)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "capitalizes first letter when enabled" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(lowercased, capitalize_first_letter: true)).to eq(capitalized)`
			`expect(TextCleaner.clean(capitalized, capitalize_first_letter: true)).to eq(capitalized)`
			`expect(TextCleaner.clean(iletter, capitalize_first_letter: true)).to eq(iletter)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`end`

Text Cleaner now removes all periods from the end of the title 2013-04-17 18:19:42 -04:00			`context "periods at the end" do`
auto replace rules in titles 2013-04-10 05:00:50 -04:00
Text Cleaner now removes all periods from the end of the title 2013-04-17 18:19:42 -04:00			`let(:with_one_period) { "oops." }`
			`let(:with_several_periods) { "oops..." }`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`let(:without_period) { "oops" }`

Text Cleaner now removes all periods from the end of the title 2013-04-17 18:19:42 -04:00			`it "ignores unnecessary periods at the end by default" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(with_one_period)).to eq(with_one_period)`
			`expect(TextCleaner.clean(with_several_periods)).to eq(with_several_periods)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

Text Cleaner now removes all periods from the end of the title 2013-04-17 18:19:42 -04:00			`it "removes unnecessary periods at the end when enabled" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(with_one_period, remove_all_periods_from_the_end: true)).to eq(without_period)`
			`expect(TextCleaner.clean(with_several_periods, remove_all_periods_from_the_end: true)).to eq(without_period)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "keeps trailing whitespaces when enabled" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(with_several_periods + " ", remove_all_periods_from_the_end: true)).to eq(without_period + " ")`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`end`

			`context "extraneous space" do`

			`let(:with_space_exclamation) { "oops !" }`
			`let(:without_space_exclamation) { "oops!" }`
			`let(:with_space_question) { "oops ?" }`
			`let(:without_space_question) { "oops?" }`

			`it "ignores extraneous space before the end punctuation by default" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(with_space_exclamation)).to eq(with_space_exclamation)`
			`expect(TextCleaner.clean(with_space_question)).to eq(with_space_question)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "removes extraneous space before the end punctuation when enabled" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(with_space_exclamation, remove_extraneous_space: true)).to eq(without_space_exclamation)`
			`expect(TextCleaner.clean(with_space_question, remove_extraneous_space: true)).to eq(without_space_question)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "keep trailing whitespaces when enabled" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(with_space_exclamation + " ", remove_extraneous_space: true)).to eq(without_space_exclamation + " ")`
			`expect(TextCleaner.clean(with_space_question + " ", remove_extraneous_space: true)).to eq(without_space_question + " ")`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`end`

			`context "interior spaces" do`

			`let(:spacey_string) { "hello there's weird spaces here." }`
			`let(:unspacey_string) { "hello there's weird spaces here." }`

			`it "ignores interior spaces by default" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(spacey_string)).to eq(spacey_string)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "fixes interior spaces when enabled" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(spacey_string, fixes_interior_spaces: true)).to eq(unspacey_string)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`end`

			`context "leading and trailing whitespaces" do`

			`let(:spacey_string) { " \t test \n " }`
			`let(:unspacey_string) { "test" }`

			`it "ignores leading and trailing whitespaces by default" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(spacey_string)).to eq(spacey_string)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "strips leading and trailing whitespaces when enabled" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean(spacey_string, strip_whitespaces: true)).to eq(unspacey_string)`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`end`

			`context "title" do`

			`it "fixes interior spaces" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean_title("Hello there")).to eq("Hello there")`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "strips leading and trailing whitespaces" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean_title(" \t Hello there \n ")).to eq("Hello there")`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`context "title_prettify site setting is enabled" do`

			`before { SiteSetting.title_prettify = true }`

			`it "deduplicates !" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean_title("Hello there!!!!")).to eq("Hello there!")`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "deduplicates ?" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean_title("Hello there????")).to eq("Hello there?")`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "replaces all upper case text with regular case letters" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean_title("HELLO THERE")).to eq("Hello there")`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "capitalizes first letter" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean_title("hello there")).to eq("Hello there")`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "removes unnecessary period at the end" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean_title("Hello there.")).to eq("Hello there")`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`it "removes extraneous space before the end punctuation" do`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(TextCleaner.clean_title("Hello there ?")).to eq("Hello there?")`
auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

Add spec to unicode upcase and downcase 2016-03-07 20:10:49 -05:00			`it "replaces all upper case unicode text with regular unicode case letters" do`
			`expect(TextCleaner.clean_title("INVESTIGAÇÃO POLÍTICA NA CÂMARA")).to eq("Investigação política na câmara")`
			`end`

			`it "capitalizes first unicode letter" do`
			`expect(TextCleaner.clean_title("épico encontro")).to eq("Épico encontro")`
			`end`

auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`

			`end`

FEATURE: normalize whitespaces in topic title/post content 2014-08-11 18:01:58 -04:00			`describe "#normalize_whitespaces" do`
			`it "normalize whitespaces" do`
FIX: stop stripping zero-width-whitespace This char is used for formatting khmer words 2015-03-26 22:01:31 -04:00			`whitespaces = "\u0020\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000"`
few components with rspec3 syntax 2015-01-09 11:34:37 -05:00			`expect(whitespaces.strip).not_to eq("")`
			`expect(TextCleaner.normalize_whitespaces(whitespaces).strip).to eq("")`
FEATURE: normalize whitespaces in topic title/post content 2014-08-11 18:01:58 -04:00			`end`
FIX: stop stripping zero-width-whitespace This char is used for formatting khmer words 2015-03-26 22:01:31 -04:00
			`it "does not muck with zero width white space" do`
			`# this is used for khmer, dont mess with it`
			`expect(TextCleaner.normalize_whitespaces("hello\u200Bworld").strip).to eq("hello\u200Bworld")`
			`expect(TextCleaner.normalize_whitespaces("hello\uFEFFworld").strip).to eq("hello\uFEFFworld")`

			`end`
FEATURE: normalize whitespaces in topic title/post content 2014-08-11 18:01:58 -04:00			`end`

auto replace rules in titles 2013-04-10 05:00:50 -04:00			`end`