FIX: properly unescape HTML entities in excerpts

This commit is contained in:
Régis Hanol 2014-12-10 12:52:51 +01:00
parent 7e609e1834
commit 6027073547
2 changed files with 8 additions and 6 deletions

View File

@ -17,16 +17,15 @@ class ExcerptParser < Nokogiri::XML::SAX::Document
def self.get_excerpt(html, length, options) def self.get_excerpt(html, length, options)
html ||= '' html ||= ''
if (html.include? 'excerpt') && (SPAN_REGEX === html) length = html.length if html.include?('excerpt') && SPAN_REGEX === html
length = html.length
end
me = self.new(length, options) me = self.new(length, options)
parser = Nokogiri::HTML::SAX::Parser.new(me) parser = Nokogiri::HTML::SAX::Parser.new(me)
catch(:done) do catch(:done) do
parser.parse(html) parser.parse(html)
end end
me.excerpt.strip! excerpt = me.excerpt.strip
me.excerpt excerpt = CGI.unescapeHTML(excerpt) if options[:text_entities] == true
excerpt
end end
def escape_attribute(v) def escape_attribute(v)

View File

@ -84,7 +84,6 @@ describe PrettyText do
describe "Excerpt" do describe "Excerpt" do
it "sanitizes attempts to inject invalid attributes" do it "sanitizes attempts to inject invalid attributes" do
spinner = "<a href=\"http://thedailywtf.com/\" data-bbcode=\"' class='fa fa-spin\">WTF</a>" spinner = "<a href=\"http://thedailywtf.com/\" data-bbcode=\"' class='fa fa-spin\">WTF</a>"
PrettyText.excerpt(spinner, 20).should match_html spinner PrettyText.excerpt(spinner, 20).should match_html spinner
@ -216,6 +215,10 @@ describe PrettyText do
post.excerpt.should == two_hundred post.excerpt.should == two_hundred
end end
it "unescapes html entities when we want text entities" do
PrettyText.excerpt("&#39;", 500, text_entities: true).should == "'"
end
end end
describe "strip links" do describe "strip links" do