From 7f78b6ec10fba821b34752ac3a5484eaf2d4b2de Mon Sep 17 00:00:00 2001 From: Krzysztof Kotlarek Date: Mon, 11 Jan 2021 10:40:41 +1100 Subject: [PATCH] FIX: broken emojis in topic excerpt (#11667) When a post is truncated into the excerpt, sometimes we are breaking code of emoji --- app/models/post.rb | 2 +- lib/excerpt_parser.rb | 11 ++++++++++- spec/components/pretty_text_spec.rb | 18 ++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/app/models/post.rb b/app/models/post.rb index 5da11450f0e..e53897ec045 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -486,7 +486,7 @@ class Post < ActiveRecord::Base end def excerpt_for_topic - Post.excerpt(cooked, SiteSetting.topic_excerpt_maxlength, strip_links: true, strip_images: true, post: self) + Post.excerpt(cooked, SiteSetting.topic_excerpt_maxlength, strip_links: true, strip_images: true, strip_truncated_emoji_code: true, post: self) end def is_first_post? diff --git a/lib/excerpt_parser.rb b/lib/excerpt_parser.rb index 054a1234013..43223bf05ca 100644 --- a/lib/excerpt_parser.rb +++ b/lib/excerpt_parser.rb @@ -13,6 +13,7 @@ class ExcerptParser < Nokogiri::XML::SAX::Document options || {} @strip_links = options[:strip_links] == true @strip_images = options[:strip_images] == true + @strip_truncated_emoji_code = options[:strip_truncated_emoji_code] == true @text_entities = options[:text_entities] == true @markdown_images = options[:markdown_images] == true @keep_newlines = options[:keep_newlines] == true @@ -207,7 +208,7 @@ class ExcerptParser < Nokogiri::XML::SAX::Document encode = encode ? lambda { |s| ERB::Util.html_escape(s) } : lambda { |s| s } if count_it && @current_length + string.length > @length length = [0, @length - @current_length - 1].max - @excerpt << encode.call(string[0..length]) if truncate + @excerpt << encode.call(string[0..length]) if truncate && !truncated_emoji_code?(string) @excerpt << (@text_entities ? "..." : "…") @excerpt << "" if @in_a @excerpt << after_string if after_string @@ -218,4 +219,12 @@ class ExcerptParser < Nokogiri::XML::SAX::Document @excerpt << after_string if after_string @current_length += string.length if count_it end + + def truncated_emoji_code?(string) + @strip_truncated_emoji_code && emoji?(string) + end + + def emoji?(string) + string.match?(/:\w+:/) + end end diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb index 274b00c62ce..7a600ab83d1 100644 --- a/spec/components/pretty_text_spec.rb +++ b/spec/components/pretty_text_spec.rb @@ -636,6 +636,24 @@ describe PrettyText do end end + context "emojis" do + it "should remove broken emoji" do + html = <<~EOS + \":bike:\" \":cat:\" \":discourse:\" + EOS + expect(PrettyText.excerpt(html, 10, strip_truncated_emoji_code: false)).to eq(":bike: :ca…") + + expect(PrettyText.excerpt(html, 7, strip_truncated_emoji_code: true)).to eq(":bike: …") + expect(PrettyText.excerpt(html, 8, strip_truncated_emoji_code: true)).to eq(":bike: …") + expect(PrettyText.excerpt(html, 9, strip_truncated_emoji_code: true)).to eq(":bike: …") + expect(PrettyText.excerpt(html, 10, strip_truncated_emoji_code: true)).to eq(":bike: …") + expect(PrettyText.excerpt(html, 11, strip_truncated_emoji_code: true)).to eq(":bike: …") + expect(PrettyText.excerpt(html, 12, strip_truncated_emoji_code: true)).to eq(":bike: :cat: …") + expect(PrettyText.excerpt(html, 13, strip_truncated_emoji_code: true)).to eq(":bike: :cat: …") + expect(PrettyText.excerpt(html, 14, strip_truncated_emoji_code: true)).to eq(":bike: :cat: …") + end + end + it "should have an option to strip links" do expect(PrettyText.excerpt("cnn", 100, strip_links: true)).to eq("cnn") end