FIX: properly unescape HTML entities in excerpts

2014-12-10 12:52:51 +01:00 · 2014-12-10 12:52:51 +01:00 · 6027073547
parent 7e609e1834
commit 6027073547
2 changed files with 8 additions and 6 deletions
--- a/lib/excerpt_parser.rb
+++ b/lib/excerpt_parser.rb
@ -17,16 +17,15 @@ class ExcerptParser < Nokogiri::XML::SAX::Document
  def self.get_excerpt(html, length, options)
    html ||= ''
-    if (html.include? 'excerpt') && (SPAN_REGEX === html)
+    length = html.length if html.include?('excerpt') && SPAN_REGEX === html
      length = html.length
    end
    me = self.new(length, options)
    parser = Nokogiri::HTML::SAX::Parser.new(me)
    catch(:done) do
      parser.parse(html)
    end
-    me.excerpt.strip!
+    excerpt = me.excerpt.strip
-    me.excerpt
+    excerpt = CGI.unescapeHTML(excerpt) if options[:text_entities] == true
    excerpt
  end
  def escape_attribute(v)
--- a/spec/components/pretty_text_spec.rb
+++ b/spec/components/pretty_text_spec.rb
@ -84,7 +84,6 @@ describe PrettyText do
  describe "Excerpt" do
    it "sanitizes attempts to inject invalid attributes" do
      spinner = "<a href=\"http://thedailywtf.com/\" data-bbcode=\"' class='fa fa-spin\">WTF</a>"
      PrettyText.excerpt(spinner, 20).should match_html spinner
@ -216,6 +215,10 @@ describe PrettyText do
      post.excerpt.should == two_hundred
    end
    it "unescapes html entities when we want text entities" do
      PrettyText.excerpt("&#39;", 500, text_entities: true).should == "'"
    end
  end
  describe "strip links" do