discourse/spec/components/retrieve_title_spec.rb

require 'rails_helper'
require_dependency 'retrieve_title'

describe RetrieveTitle do

  context "extract_title" do

    it "will extract the value from the title tag" do
      title = RetrieveTitle.extract_title(
        "<html><title>My Cool Title</title></html>"
      )

      expect(title).to eq("My Cool Title")
    end

    it "will strip whitespace" do
      title = RetrieveTitle.extract_title(
        "<html><title>   Another Title\n\n </title></html>"
      )

      expect(title).to eq("Another Title")
    end

    it "will pick og:title if title is missing" do
      title = RetrieveTitle.extract_title(<<~HTML
        <html>
          <meta property="og:title" content="Good Title"
        </html>
        HTML
      )

      expect(title).to eq("Good Title")
    end

    it "will prefer the title from an opengraph tag" do
      title = RetrieveTitle.extract_title(<<~HTML
        <html>
          <title>Good Title</title>
          <meta property="og:title" content="Bad Title"
        </html>
        HTML
      )

      expect(title).to eq("Good Title")
    end

    it "will parse a YouTube url from javascript" do
      title = RetrieveTitle.extract_title(<<~HTML
        <html>
          <title>YouTube</title>
          <script>document.title = "Video Title";</script>
        </html>
        HTML
      )
      expect(title).to eq("Video Title")
    end
  end

  context "crawl" do
    it "can properly extract a title from a url" do
      stub_request(:get, "https://brelksdjflaskfj.com/amazing")
        .to_return(status: 200, body: "<html><title>very amazing</title>")

      # we still resolve the IP address for every host
      IPSocket.stubs(:getaddress).returns('100.2.3.4')

      expect(RetrieveTitle.crawl("https://brelksdjflaskfj.com/amazing")).to eq("very amazing")
    end
  end

end
FEATURE: Whitelists for inline oneboxing 2017-07-21 15:29:04 -04:00			`require 'rails_helper'`
			`require_dependency 'retrieve_title'`

			`describe RetrieveTitle do`

			`context "extract_title" do`

			`it "will extract the value from the title tag" do`
			`title = RetrieveTitle.extract_title(`
			`"<html><title>My Cool Title</title></html>"`
			`)`

			`expect(title).to eq("My Cool Title")`
			`end`

			`it "will strip whitespace" do`
			`title = RetrieveTitle.extract_title(`
			`"<html><title> Another Title\n\n </title></html>"`
			`)`

			`expect(title).to eq("Another Title")`
			`end`

FEATURE: option to enable inline oneboxes for all domains Also, change to prefer title over open graph which is often way too sparse 2017-08-02 14:27:21 -04:00			`it "will pick og:title if title is missing" do`
			`title = RetrieveTitle.extract_title(<<~HTML`
			`<html>`
			`<meta property="og:title" content="Good Title"`
			`</html>`
			`HTML`
			`)`

			`expect(title).to eq("Good Title")`
			`end`

FEATURE: Whitelists for inline oneboxing 2017-07-21 15:29:04 -04:00			`it "will prefer the title from an opengraph tag" do`
			`title = RetrieveTitle.extract_title(<<~HTML`
			`<html>`
FEATURE: option to enable inline oneboxes for all domains Also, change to prefer title over open graph which is often way too sparse 2017-08-02 14:27:21 -04:00			`<title>Good Title</title>`
			`<meta property="og:title" content="Bad Title"`
FEATURE: Whitelists for inline oneboxing 2017-07-21 15:29:04 -04:00			`</html>`
			`HTML`
			`)`

			`expect(title).to eq("Good Title")`
			`end`

FIX: Hack our title retriever so that it parses YouTube URLs 2017-09-28 09:29:50 -04:00			`it "will parse a YouTube url from javascript" do`
			`title = RetrieveTitle.extract_title(<<~HTML`
			`<html>`
			`<title>YouTube</title>`
			`<script>document.title = "Video Title";</script>`
			`</html>`
			`HTML`
			`)`
			`expect(title).to eq("Video Title")`
			`end`
PERF: ability to crawl for titles without extra HEAD req Also, introduces a much more aggressive timeout for title crawling and introduces gzip to body that is crawled 2018-01-28 23:36:52 -05:00			`end`

			`context "crawl" do`
			`it "can properly extract a title from a url" do`
			`stub_request(:get, "https://brelksdjflaskfj.com/amazing")`
			`.to_return(status: 200, body: "<html><title>very amazing</title>")`
FIX: Hack our title retriever so that it parses YouTube URLs 2017-09-28 09:29:50 -04:00
PERF: ability to crawl for titles without extra HEAD req Also, introduces a much more aggressive timeout for title crawling and introduces gzip to body that is crawled 2018-01-28 23:36:52 -05:00			`# we still resolve the IP address for every host`
			`IPSocket.stubs(:getaddress).returns('100.2.3.4')`

			`expect(RetrieveTitle.crawl("https://brelksdjflaskfj.com/amazing")).to eq("very amazing")`
			`end`
FEATURE: Whitelists for inline oneboxing 2017-07-21 15:29:04 -04:00			`end`

			`end`