discourse/spec/components/pretty_text_spec.rb

require 'rails_helper'
require 'pretty_text'

describe PrettyText do

  let(:wrapped_image) { "<div class=\"lightbox-wrapper\"><a href=\"//localhost:3000/uploads/default/4399/33691397e78b4d75.png\" class=\"lightbox\" title=\"Screen Shot 2014-04-14 at 9.47.10 PM.png\"><img src=\"//localhost:3000/uploads/default/_optimized/bd9/b20/bbbcd6a0c0_655x500.png\" width=\"655\" height=\"500\"><div class=\"meta\">\n<span class=\"filename\">Screen Shot 2014-04-14 at 9.47.10 PM.png</span><span class=\"informations\">966x737 1.47 MB</span><span class=\"expand\"></span>\n</div></a></div>" }
  let(:wrapped_image_excerpt) {  }

  describe "Cooking" do

    describe "off topic quoting" do
      it "can correctly populate topic title" do
        topic = Fabricate(:topic, title: "this is a test topic :slight_smile:")
        expected = <<HTML
<aside class="quote" data-post="2" data-topic="#{topic.id}"><div class="title">
<div class="quote-controls"></div><a href="http://test.localhost/t/this-is-a-test-topic-slight-smile/#{topic.id}/2">This is a test topic <img src="/images/emoji/emoji_one/slight_smile.png?v=3" title="slight_smile" alt="slight_smile" class="emoji"></a>
</div>
<blockquote><p>ddd</p></blockquote></aside>
HTML
        expect(PrettyText.cook("[quote=\"EvilTrout, post:2, topic:#{topic.id}\"]ddd\n[/quote]", topic_id: 1)).to match_html expected
      end
    end

    describe "with avatar" do
      let(:default_avatar) { "//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/{size}.png" }
      let(:user) { Fabricate(:user) }

      before do
        User.stubs(:default_template).returns(default_avatar)
      end

      it "produces a quote even with new lines in it" do
        expect(PrettyText.cook("[quote=\"#{user.username}, post:123, topic:456, full:true\"]ddd\n[/quote]")).to match_html "<aside class=\"quote\" data-post=\"123\" data-topic=\"456\" data-full=\"true\"><div class=\"title\">\n<div class=\"quote-controls\"></div>\n<img alt='' width=\"20\" height=\"20\" src=\"//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/40.png\" class=\"avatar\">#{user.username}:</div>\n<blockquote><p>ddd</p></blockquote></aside>"
      end

      it "should produce a quote" do
        expect(PrettyText.cook("[quote=\"#{user.username}, post:123, topic:456, full:true\"]ddd[/quote]")).to match_html "<aside class=\"quote\" data-post=\"123\" data-topic=\"456\" data-full=\"true\"><div class=\"title\">\n<div class=\"quote-controls\"></div>\n<img alt='' width=\"20\" height=\"20\" src=\"//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/40.png\" class=\"avatar\">#{user.username}:</div>\n<blockquote><p>ddd</p></blockquote></aside>"
      end

      it "trims spaces on quote params" do
        expect(PrettyText.cook("[quote=\"#{user.username}, post:555, topic: 666\"]ddd[/quote]")).to match_html "<aside class=\"quote\" data-post=\"555\" data-topic=\"666\"><div class=\"title\">\n<div class=\"quote-controls\"></div>\n<img alt='' width=\"20\" height=\"20\" src=\"//test.localhost/uploads/default/avatars/42d/57c/46ce7ee487/40.png\" class=\"avatar\">#{user.username}:</div>\n<blockquote><p>ddd</p></blockquote></aside>"
      end

    end

    it "should handle 3 mentions in a row" do
      expect(PrettyText.cook('@hello @hello @hello')).to match_html "<p><span class=\"mention\">@hello</span> <span class=\"mention\">@hello</span> <span class=\"mention\">@hello</span></p>"
    end

    it "should handle group mentions with a hyphen and without" do
      expect(PrettyText.cook('@hello @hello-hello')).to match_html "<p><span class=\"mention\">@hello</span> <span class=\"mention\">@hello-hello</span></p>"
    end


    it "should sanitize the html" do
      expect(PrettyText.cook("<script>alert(42)</script>")).to match_html "<p></p>"
    end

    it 'should allow for @mentions to have punctuation' do
      expect(PrettyText.cook("hello @bob's @bob,@bob; @bob\"")).to match_html(
        "<p>hello <span class=\"mention\">@bob</span>'s <span class=\"mention\">@bob</span>,<span class=\"mention\">@bob</span>; <span class=\"mention\">@bob</span>\"</p>"
      )
    end

    # see: https://github.com/sparklemotion/nokogiri/issues/1173
    skip 'allows html entities correctly' do
      expect(PrettyText.cook("&aleph;&pound;&#162;")).to eq("<p>&aleph;&pound;&#162;</p>")
    end

  end

  describe "rel nofollow" do
    before do
      SiteSetting.stubs(:add_rel_nofollow_to_user_content).returns(true)
      SiteSetting.stubs(:exclude_rel_nofollow_domains).returns("foo.com|bar.com")
    end

    it "should inject nofollow in all user provided links" do
      expect(PrettyText.cook('<a href="http://cnn.com">cnn</a>')).to match(/nofollow noopener/)
    end

    it "should not inject nofollow in all local links" do
      expect(PrettyText.cook("<a href='#{Discourse.base_url}/test.html'>cnn</a>") !~ /nofollow/).to eq(true)
    end

    it "should not inject nofollow in all subdomain links" do
      expect(PrettyText.cook("<a href='#{Discourse.base_url.sub('http://', 'http://bla.')}/test.html'>cnn</a>") !~ /nofollow/).to eq(true)
    end

    it "should inject nofollow in all non subdomain links" do
      expect(PrettyText.cook("<a href='#{Discourse.base_url.sub('http://', 'http://bla')}/test.html'>cnn</a>")).to match(/nofollow/)
    end

    it "should not inject nofollow for foo.com" do
      expect(PrettyText.cook("<a href='http://foo.com/test.html'>cnn</a>") !~ /nofollow/).to eq(true)
    end

    it "should inject nofollow for afoo.com" do
      expect(PrettyText.cook("<a href='http://afoo.com/test.html'>cnn</a>")).to match(/nofollow/)
    end

    it "should not inject nofollow for bar.foo.com" do
      expect(PrettyText.cook("<a href='http://bar.foo.com/test.html'>cnn</a>") !~ /nofollow/).to eq(true)
    end

    it "should not inject nofollow if omit_nofollow option is given" do
      expect(PrettyText.cook('<a href="http://cnn.com">cnn</a>', omit_nofollow: true) !~ /nofollow/).to eq(true)
    end
  end

  describe "Excerpt" do

    it "sanitizes attempts to inject invalid attributes" do
      spinner = "<a href=\"http://thedailywtf.com/\" data-bbcode=\"' class='fa fa-spin\">WTF</a>"
      expect(PrettyText.excerpt(spinner, 20)).to match_html spinner

      spinner = %q{<a href="http://thedailywtf.com/" title="' class=&quot;fa fa-spin&quot;&gt;&lt;img src='http://thedailywtf.com/Resources/Images/Primary/logo.gif"></a>}
      expect(PrettyText.excerpt(spinner, 20)).to match_html spinner
    end

    context "images" do

      it "should dump images" do
        expect(PrettyText.excerpt("<img src='http://cnn.com/a.gif'>",100)).to eq("[image]")
      end

      it "should keep alt tags" do
        expect(PrettyText.excerpt("<img src='http://cnn.com/a.gif' alt='car' title='my big car'>",100)).to eq("[car]")
      end

      it "should keep title tags" do
        expect(PrettyText.excerpt("<img src='http://cnn.com/a.gif' title='car'>",100)).to eq("[car]")
      end

      it "should convert images to markdown if the option is set" do
        expect(PrettyText.excerpt("<img src='http://cnn.com/a.gif' title='car'>", 100, markdown_images: true)).to eq("![car](http://cnn.com/a.gif)")
      end

      it "should keep spoilers" do
        expect(PrettyText.excerpt("<div class='spoiler'><img src='http://cnn.com/a.gif'></div>", 100)).to match_html "<span class='spoiler'>[image]</span>"
        expect(PrettyText.excerpt("<span class='spoiler'>spoiler</div>", 100)).to match_html "<span class='spoiler'>spoiler</span>"
      end

      it "should remove meta informations" do
        expect(PrettyText.excerpt(wrapped_image, 100)).to match_html "<a href='//localhost:3000/uploads/default/4399/33691397e78b4d75.png' class='lightbox' title='Screen Shot 2014-04-14 at 9.47.10 PM.png'>[image]</a>"
      end
    end

    it "should have an option to strip links" do
      expect(PrettyText.excerpt("<a href='http://cnn.com'>cnn</a>",100, strip_links: true)).to eq("cnn")
    end

    it "should preserve links" do
      expect(PrettyText.excerpt("<a href='http://cnn.com'>cnn</a>",100)).to match_html "<a href='http://cnn.com'>cnn</a>"
    end

    it "should deal with special keys properly" do
      expect(PrettyText.excerpt("<pre><b></pre>",100)).to eq("")
    end

    it "should truncate stuff properly" do
      expect(PrettyText.excerpt("hello world",5)).to eq("hello&hellip;")
      expect(PrettyText.excerpt("<p>hello</p><p>world</p>",6)).to eq("hello w&hellip;")
    end

    it "should insert a space between to Ps" do
      expect(PrettyText.excerpt("<p>a</p><p>b</p>",5)).to eq("a b")
    end

    it "should strip quotes" do
      expect(PrettyText.excerpt("<aside class='quote'><p>a</p><p>b</p></aside>boom",5)).to eq("boom")
    end

    it "should not count the surrounds of a link" do
      expect(PrettyText.excerpt("<a href='http://cnn.com'>cnn</a>",3)).to match_html "<a href='http://cnn.com'>cnn</a>"
    end

    it "uses an ellipsis instead of html entities if provided with the option" do
      expect(PrettyText.excerpt("<a href='http://cnn.com'>cnn</a>", 2, text_entities: true)).to match_html "<a href='http://cnn.com'>cn...</a>"
    end

    it "should truncate links" do
      expect(PrettyText.excerpt("<a href='http://cnn.com'>cnn</a>",2)).to match_html "<a href='http://cnn.com'>cn&hellip;</a>"
    end

    it "doesn't extract empty quotes as links" do
      expect(PrettyText.extract_links("<aside class='quote'>not a linked quote</aside>\n").to_a).to be_empty
    end

    it "doesn't extract links from elided parts" do
      expect(PrettyText.extract_links("<details class='elided'><a href='http://cnn.com'>cnn</a></details>\n").to_a).to be_empty
    end

    def extract_urls(text)
      PrettyText.extract_links(text).map(&:url).to_a
    end

    it "should be able to extract links" do
      expect(extract_urls("<a href='http://cnn.com'>http://bla.com</a>")).to eq(["http://cnn.com"])
    end

    it "should extract links to topics" do
      expect(extract_urls("<aside class=\"quote\" data-topic=\"321\">aside</aside>")).to eq(["/t/topic/321"])
    end

    it "should lazyYT videos" do
      expect(extract_urls("<div class=\"lazyYT\" data-youtube-id=\"yXEuEUQIP3Q\" data-youtube-title=\"Mister Rogers defending PBS to the US Senate\" data-width=\"480\" data-height=\"270\" data-parameters=\"feature=oembed&amp;wmode=opaque\"></div>")).to eq(["https://www.youtube.com/watch?v=yXEuEUQIP3Q"])
    end

    it "should extract links to posts" do
      expect(extract_urls("<aside class=\"quote\" data-topic=\"1234\" data-post=\"4567\">aside</aside>")).to eq(["/t/topic/1234/4567"])
    end

    it "should not extract links to anchors" do
      expect(extract_urls("<a href='#tos'>TOS</a>")).to eq([])
    end

    it "should not extract links inside quotes" do
      links = PrettyText.extract_links("
        <a href='http://body_only.com'>http://useless1.com</a>
        <aside class=\"quote\" data-topic=\"1234\">
          <a href='http://body_and_quote.com'>http://useless3.com</a>
          <a href='http://quote_only.com'>http://useless4.com</a>
        </aside>
        <a href='http://body_and_quote.com'>http://useless2.com</a>
        ")

      expect(links.map { |l| [l.url, l.is_quote] }.sort).to eq([
        ["http://body_only.com", false],
        ["http://body_and_quote.com", false],
        ["/t/topic/1234", true],
      ].sort)
    end

    it "should not preserve tags in code blocks" do
      expect(PrettyText.excerpt("<pre><code class='handlebars'>&lt;h3&gt;Hours&lt;/h3&gt;</code></pre>",100)).to eq("&lt;h3&gt;Hours&lt;/h3&gt;")
    end

    it "should handle nil" do
      expect(PrettyText.excerpt(nil,100)).to eq('')
    end

    it "handles span excerpt at the beginning of a post" do
      expect(PrettyText.excerpt("<span class='excerpt'>hi</span> test",100)).to eq('hi')
      post = Fabricate(:post, raw: "<span class='excerpt'>hi</span> test")
      expect(post.excerpt).to eq("hi")
    end

    it "ignores max excerpt length if a span excerpt is specified" do
      two_hundred = "123456789 " * 20 + "."
      text =  two_hundred + "<span class='excerpt'>#{two_hundred}</span>" + two_hundred
      expect(PrettyText.excerpt(text, 100)).to eq(two_hundred)
      post = Fabricate(:post, raw: text)
      expect(post.excerpt).to eq(two_hundred)
    end

    it "unescapes html entities when we want text entities" do
      expect(PrettyText.excerpt("&#39;", 500, text_entities: true)).to eq("'")
    end

    it "should have an option to preserve emoji images" do
      emoji_image = "<img src='/images/emoji/emoji_one/heart.png?v=1' title=':heart:' class='emoji' alt='heart'>"
      expect(PrettyText.excerpt(emoji_image, 100, { keep_emoji_images: true })).to match_html(emoji_image)
    end

    it "should have an option to remap emoji to code points" do
      emoji_image = "I <img src='/images/emoji/emoji_one/heart.png?v=1' title=':heart:' class='emoji' alt=':heart:'> you <img src='/images/emoji/emoji_one/heart.png?v=1' title=':unknown:' class='emoji' alt=':unknown:'> "
      expect(PrettyText.excerpt(emoji_image, 100, { remap_emoji: true })).to match_html("I ❤  you :unknown:")
    end

    it "should have an option to preserve emoji codes" do
      emoji_code = "<img src='/images/emoji/emoji_one/heart.png?v=1' title=':heart:' class='emoji' alt=':heart:'>"
      expect(PrettyText.excerpt(emoji_code, 100)).to eq(":heart:")
    end

  end

  describe "strip links" do
    it "returns blank for blank input" do
      expect(PrettyText.strip_links("")).to be_blank
    end

    it "does nothing to a string without links" do
      expect(PrettyText.strip_links("I'm the <b>batman</b>")).to eq("I'm the <b>batman</b>")
    end

    it "strips links but leaves the text content" do
      expect(PrettyText.strip_links("I'm the linked <a href='http://en.wikipedia.org/wiki/Batman'>batman</a>")).to eq("I'm the linked batman")
    end

    it "escapes the text content" do
      expect(PrettyText.strip_links("I'm the linked <a href='http://en.wikipedia.org/wiki/Batman'>&lt;batman&gt;</a>")).to eq("I'm the linked &lt;batman&gt;")
    end
  end

  describe "strip_image_wrapping" do
    def strip_image_wrapping(html)
      doc = Nokogiri::HTML.fragment(html)
      described_class.strip_image_wrapping(doc)
      doc.to_html
    end

    it "doesn't change HTML when there's no wrapped image" do
      html = "<img src=\"wat.png\">"
      expect(strip_image_wrapping(html)).to eq(html)
    end

    it "strips the metadata" do
      expect(strip_image_wrapping(wrapped_image)).to match_html "<div class=\"lightbox-wrapper\"><a href=\"//localhost:3000/uploads/default/4399/33691397e78b4d75.png\" class=\"lightbox\" title=\"Screen Shot 2014-04-14 at 9.47.10 PM.png\"><img src=\"//localhost:3000/uploads/default/_optimized/bd9/b20/bbbcd6a0c0_655x500.png\" width=\"655\" height=\"500\"></a></div>"
    end
  end

  describe 'format_for_email' do
    let(:base_url) { "http://baseurl.net" }
    let(:post) { Fabricate(:post) }

    before do
      Discourse.stubs(:base_url).returns(base_url)
    end

    it 'does not crash' do
      PrettyText.format_for_email('<a href="mailto:michael.brown@discourse.org?subject=Your%20post%20at%20http://try.discourse.org/t/discussion-happens-so-much/127/1000?u=supermathie">test</a>', post)
    end

    it "adds base url to relative links" do
      html = "<p><a class=\"mention\" href=\"/u/wiseguy\">@wiseguy</a>, <a class=\"mention\" href=\"/u/trollol\">@trollol</a> what do you guys think? </p>"
      output = described_class.format_for_email(html, post)
      expect(output).to eq("<p><a class=\"mention\" href=\"#{base_url}/u/wiseguy\">@wiseguy</a>, <a class=\"mention\" href=\"#{base_url}/u/trollol\">@trollol</a> what do you guys think? </p>")
    end

    it "doesn't change external absolute links" do
      html = "<p>Check out <a href=\"http://mywebsite.com/users/boss\">this guy</a>.</p>"
      expect(described_class.format_for_email(html, post)).to eq(html)
    end

    it "doesn't change internal absolute links" do
      html = "<p>Check out <a href=\"#{base_url}/users/boss\">this guy</a>.</p>"
      expect(described_class.format_for_email(html, post)).to eq(html)
    end

    it "can tolerate invalid URLs" do
      html = "<p>Check out <a href=\"not a real url\">this guy</a>.</p>"
      expect { described_class.format_for_email(html, post) }.to_not raise_error
    end
  end

  it 'can escape *' do
    expect(PrettyText.cook("***a***a")).to match_html("<p><strong><em>a</em></strong>a</p>")
    expect(PrettyText.cook("***\\****a")).to match_html("<p><strong><em>*</em></strong>a</p>")
  end

  it 'can include code class correctly' do
    expect(PrettyText.cook("```cpp\ncpp\n```")).to match_html("<p></p><pre><code class='lang-cpp'>cpp</code></pre>")
  end

  it 'indents code correctly' do
    code = "X\n```\n\n    #\n    x\n```"
    cooked = PrettyText.cook(code)
    expect(cooked).to match_html("<p>X<br></p>\n\n<p></p><pre><code class=\"lang-auto\">    #\n    x</code></pre>")
  end

  it 'can substitute s3 cdn correctly' do
    SiteSetting.enable_s3_uploads = true
    SiteSetting.s3_access_key_id = "XXX"
    SiteSetting.s3_secret_access_key = "XXX"
    SiteSetting.s3_upload_bucket = "test"
    SiteSetting.s3_cdn_url = "https://awesome.cdn"

    # add extra img tag to ensure it does not blow up
    raw = <<HTML
  <img>
  <img src='https:#{Discourse.store.absolute_base_url}/original/9/9/99c9384b8b6d87f8509f8395571bc7512ca3cad1.jpg'>
  <img src='http:#{Discourse.store.absolute_base_url}/original/9/9/99c9384b8b6d87f8509f8395571bc7512ca3cad1.jpg'>
  <img src='#{Discourse.store.absolute_base_url}/original/9/9/99c9384b8b6d87f8509f8395571bc7512ca3cad1.jpg'>

HTML

    cooked = <<HTML
<p>  <img><br>  <img src="https://awesome.cdn/original/9/9/99c9384b8b6d87f8509f8395571bc7512ca3cad1.jpg"><br>  <img src="https://awesome.cdn/original/9/9/99c9384b8b6d87f8509f8395571bc7512ca3cad1.jpg"><br>  <img src="https://awesome.cdn/original/9/9/99c9384b8b6d87f8509f8395571bc7512ca3cad1.jpg"></p>
HTML

    expect(PrettyText.cook(raw)).to match_html(cooked)
  end

  describe 'tables' do
    it 'allows table html' do
      SiteSetting.allow_html_tables = true
      table = "<table class='fa-spin'><thead><tr>\n<th class='fa-spin'>test</th></tr></thead><tbody><tr><td>a</td></tr></tbody></table>"
      match = "<table class=\"md-table\"><thead><tr> <th>test</th> </tr></thead><tbody><tr><td>a</td></tr></tbody></table>"
      expect(PrettyText.cook(table)).to match_html(match)
    end

    it 'allows no tables when not enabled' do
      SiteSetting.allow_html_tables = false
      table = "<table><thead><tr><th>test</th></tr></thead><tbody><tr><td>a</td></tr></tbody></table>"
      expect(PrettyText.cook(table)).to match_html("")
    end
  end

  describe "emoji" do
    it "replaces unicode emoji with our emoji sets if emoji is enabled" do
      expect(PrettyText.cook("💣")).to match(/\:bomb\:/)
    end

    it "doesn't replace emoji in inline code blocks with our emoji sets if emoji is enabled" do
      expect(PrettyText.cook("`💣`")).not_to match(/\:bomb\:/)
    end

    it "doesn't replace emoji in code blocks with our emoji sets if emoji is enabled" do
      expect(PrettyText.cook("```\n💣`\n```\n")).not_to match(/\:bomb\:/)
    end

    it "replaces some glyphs that are not in the emoji range" do
      expect(PrettyText.cook("☺")).to match(/\:slight_smile\:/)
    end

    it "doesn't replace unicode emoji if emoji is disabled" do
      SiteSetting.enable_emoji = false
      expect(PrettyText.cook("💣")).not_to match(/\:bomb\:/)
    end
  end

  describe "tag and category links" do
    it "produces tag links" do
      Fabricate(:topic, {tags: [Fabricate(:tag, name: 'known')]})
      expect(PrettyText.cook(" #unknown::tag #known::tag")).to match_html("<p> <span class=\"hashtag\">#unknown::tag</span> <a class=\"hashtag\" href=\"http://test.localhost/tags/known\">#<span>known</span></a></p>")
    end

    # TODO does it make sense to generate hashtags for tags that are missing in action?
  end

  describe "custom emoji" do
    it "replaces the custom emoji" do
      CustomEmoji.create!(name: 'trout', upload: Fabricate(:upload))
      Emoji.clear_cache

      expect(PrettyText.cook("hello :trout:")).to match(/<img src[^>]+trout[^>]+>/)
    end
  end

  describe "censored_pattern site setting" do
    it "can be cleared if it causes cooking to timeout" do
      SiteSetting.censored_pattern = "evilregex"
      described_class.stubs(:markdown).raises(MiniRacer::ScriptTerminatedError)
      PrettyText.cook("Protect against it plz.") rescue nil
      expect(SiteSetting.censored_pattern).to be_blank
    end
  end

end