FIX: Don't error out when trying to retrieve title and URL won't encode (#24660)
This commit is contained in:
parent
aadc104817
commit
54e813e964
|
@ -10,6 +10,8 @@ require "url_helper"
|
||||||
class FinalDestination
|
class FinalDestination
|
||||||
class SSRFError < SocketError
|
class SSRFError < SocketError
|
||||||
end
|
end
|
||||||
|
class UrlEncodingError < ArgumentError
|
||||||
|
end
|
||||||
|
|
||||||
MAX_REQUEST_TIME_SECONDS = 10
|
MAX_REQUEST_TIME_SECONDS = 10
|
||||||
MAX_REQUEST_SIZE_BYTES = 5_242_880 # 1024 * 1024 * 5
|
MAX_REQUEST_SIZE_BYTES = 5_242_880 # 1024 * 1024 * 5
|
||||||
|
@ -457,6 +459,8 @@ class FinalDestination
|
||||||
|
|
||||||
def normalized_url
|
def normalized_url
|
||||||
UrlHelper.normalized_encode(@url)
|
UrlHelper.normalized_encode(@url)
|
||||||
|
rescue ArgumentError => e
|
||||||
|
raise UrlEncodingError, e.message
|
||||||
end
|
end
|
||||||
|
|
||||||
def log(log_level, message)
|
def log(log_level, message)
|
||||||
|
|
|
@ -2,6 +2,11 @@
|
||||||
|
|
||||||
module RetrieveTitle
|
module RetrieveTitle
|
||||||
CRAWL_TIMEOUT = 1
|
CRAWL_TIMEOUT = 1
|
||||||
|
UNRECOVERABLE_ERRORS = [
|
||||||
|
Net::ReadTimeout,
|
||||||
|
FinalDestination::SSRFError,
|
||||||
|
FinalDestination::UrlEncodingError,
|
||||||
|
]
|
||||||
|
|
||||||
def self.crawl(url, max_redirects: nil, initial_https_redirect_ignore_limit: false)
|
def self.crawl(url, max_redirects: nil, initial_https_redirect_ignore_limit: false)
|
||||||
fetch_title(
|
fetch_title(
|
||||||
|
@ -9,8 +14,8 @@ module RetrieveTitle
|
||||||
max_redirects: max_redirects,
|
max_redirects: max_redirects,
|
||||||
initial_https_redirect_ignore_limit: initial_https_redirect_ignore_limit,
|
initial_https_redirect_ignore_limit: initial_https_redirect_ignore_limit,
|
||||||
)
|
)
|
||||||
rescue Net::ReadTimeout, FinalDestination::SSRFError
|
rescue *UNRECOVERABLE_ERRORS
|
||||||
# do nothing for Net::ReadTimeout errors
|
# ¯\_(ツ)_/¯
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.extract_title(html, encoding = nil)
|
def self.extract_title(html, encoding = nil)
|
||||||
|
|
|
@ -60,6 +60,12 @@ RSpec.describe FinalDestination do
|
||||||
expect(fd.ignored).to eq(%w[test.localhost google.com meta.discourse.org])
|
expect(fd.ignored).to eq(%w[test.localhost google.com meta.discourse.org])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "raises an error when URL is too long to encode" do
|
||||||
|
expect {
|
||||||
|
FinalDestination.new("https://meta.discourse.org/" + "x" * UrlHelper::MAX_URL_LENGTH)
|
||||||
|
}.to raise_error(FinalDestination::UrlEncodingError)
|
||||||
|
end
|
||||||
|
|
||||||
describe ".resolve" do
|
describe ".resolve" do
|
||||||
it "has a ready status code before anything happens" do
|
it "has a ready status code before anything happens" do
|
||||||
expect(fd("https://eviltrout.com").status).to eq(:ready)
|
expect(fd("https://eviltrout.com").status).to eq(:ready)
|
||||||
|
|
|
@ -207,6 +207,12 @@ RSpec.describe RetrieveTitle do
|
||||||
|
|
||||||
expect(RetrieveTitle.crawl("https://example.com")).to eq(nil)
|
expect(RetrieveTitle.crawl("https://example.com")).to eq(nil)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "ignores URL encoding errors" do
|
||||||
|
described_class.stubs(:fetch_title).raises(FinalDestination::UrlEncodingError)
|
||||||
|
|
||||||
|
expect(RetrieveTitle.crawl("https://example.com")).to eq(nil)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe ".fetch_title" do
|
describe ".fetch_title" do
|
||||||
|
|
Loading…
Reference in New Issue