discourse/spec/components/retrieve_title_spec.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

94 lines
2.8 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
require 'rails_helper'
describe RetrieveTitle do
context "extract_title" do
it "will extract the value from the title tag" do
title = RetrieveTitle.extract_title(
"<html><title>My Cool Title</title></html>"
)
expect(title).to eq("My Cool Title")
end
it "will strip whitespace" do
title = RetrieveTitle.extract_title(
"<html><title> Another Title\n\n </title></html>"
)
expect(title).to eq("Another Title")
end
it "will pick og:title if title is missing" do
title = RetrieveTitle.extract_title(<<~HTML
<html>
<meta property="og:title" content="Good Title"
</html>
HTML
)
expect(title).to eq("Good Title")
end
it "will prefer the title from an opengraph tag" do
title = RetrieveTitle.extract_title(<<~HTML
<html>
<title>Good Title</title>
<meta property="og:title" content="Bad Title"
</html>
HTML
)
expect(title).to eq("Good Title")
end
it "will parse a YouTube url from javascript" do
title = RetrieveTitle.extract_title(<<~HTML
<html>
<title>YouTube</title>
<script>document.title = "Video Title";</script>
</html>
HTML
)
expect(title).to eq("Video Title")
end
end
context "crawl" do
it "can properly extract a title from a url" do
stub_request(:get, "https://brelksdjflaskfj.com/amazing")
.to_return(status: 200, body: "<html><title>very amazing</title>")
# we still resolve the IP address for every host
IPSocket.stubs(:getaddress).returns('100.2.3.4')
expect(RetrieveTitle.crawl("https://brelksdjflaskfj.com/amazing")).to eq("very amazing")
end
it "detects and uses encoding from Content-Type header" do
stub_request(:get, "https://brelksdjflaskfj.com/amazing")
.to_return(
status: 200,
body: "<html><title>fancy apostrophes </title>".dup.force_encoding('ASCII-8BIT'),
headers: { 'Content-Type' => 'text/html; charset="utf-8"' }
)
IPSocket.stubs(:getaddress).returns('100.2.3.4')
expect(RetrieveTitle.crawl("https://brelksdjflaskfj.com/amazing")).to eq("fancy apostrophes ")
stub_request(:get, "https://brelksdjflaskfj.com/amazing")
.to_return(
status: 200,
body: "<html><title>japanese こんにちは website</title>".encode('EUC-JP').force_encoding('ASCII-8BIT'),
headers: { 'Content-Type' => 'text/html;charset=euc-jp' }
)
IPSocket.stubs(:getaddress).returns('100.2.3.4')
expect(RetrieveTitle.crawl("https://brelksdjflaskfj.com/amazing")).to eq("japanese こんにちは website")
end
end
end