# frozen_string_literal: true require 'rails_helper' describe RetrieveTitle do context "extract_title" do it "will extract the value from the title tag" do title = RetrieveTitle.extract_title( "My Cool Title" ) expect(title).to eq("My Cool Title") end it "will strip whitespace" do title = RetrieveTitle.extract_title( " Another Title\n\n " ) expect(title).to eq("Another Title") end it "will pick og:title if title is missing" do title = RetrieveTitle.extract_title(<<~HTML HTML ) expect(title).to eq("Good Title") end it "will prefer the title from an opengraph tag" do title = RetrieveTitle.extract_title(<<~HTML Good Title HTML ) expect(title).to eq("Good Title") end it "will parse a YouTube url from javascript" do title = RetrieveTitle.extract_title(<<~HTML YouTube HTML ) expect(title).to eq("Video Title") end end context "crawl" do it "can properly extract a title from a url" do stub_request(:get, "https://brelksdjflaskfj.com/amazing") .to_return(status: 200, body: "very amazing") # we still resolve the IP address for every host IPSocket.stubs(:getaddress).returns('100.2.3.4') expect(RetrieveTitle.crawl("https://brelksdjflaskfj.com/amazing")).to eq("very amazing") end it "detects and uses encoding from Content-Type header" do stub_request(:get, "https://brelksdjflaskfj.com/amazing") .to_return( status: 200, body: "fancy apostrophes ’’’".dup.force_encoding('ASCII-8BIT'), headers: { 'Content-Type' => 'text/html; charset="utf-8"' } ) IPSocket.stubs(:getaddress).returns('100.2.3.4') expect(RetrieveTitle.crawl("https://brelksdjflaskfj.com/amazing")).to eq("fancy apostrophes ’’’") stub_request(:get, "https://brelksdjflaskfj.com/amazing") .to_return( status: 200, body: "japanese こんにちは website".encode('EUC-JP').force_encoding('ASCII-8BIT'), headers: { 'Content-Type' => 'text/html;charset=euc-jp' } ) IPSocket.stubs(:getaddress).returns('100.2.3.4') expect(RetrieveTitle.crawl("https://brelksdjflaskfj.com/amazing")).to eq("japanese こんにちは website") end end end