FIX: Don't error out when trying to retrieve title and URL won't encode (#24660)
This commit is contained in:
parent
aadc104817
commit
54e813e964
|
@ -10,6 +10,8 @@ require "url_helper"
|
|||
class FinalDestination
|
||||
class SSRFError < SocketError
|
||||
end
|
||||
class UrlEncodingError < ArgumentError
|
||||
end
|
||||
|
||||
MAX_REQUEST_TIME_SECONDS = 10
|
||||
MAX_REQUEST_SIZE_BYTES = 5_242_880 # 1024 * 1024 * 5
|
||||
|
@ -457,6 +459,8 @@ class FinalDestination
|
|||
|
||||
def normalized_url
|
||||
UrlHelper.normalized_encode(@url)
|
||||
rescue ArgumentError => e
|
||||
raise UrlEncodingError, e.message
|
||||
end
|
||||
|
||||
def log(log_level, message)
|
||||
|
|
|
@ -2,6 +2,11 @@
|
|||
|
||||
module RetrieveTitle
|
||||
CRAWL_TIMEOUT = 1
|
||||
UNRECOVERABLE_ERRORS = [
|
||||
Net::ReadTimeout,
|
||||
FinalDestination::SSRFError,
|
||||
FinalDestination::UrlEncodingError,
|
||||
]
|
||||
|
||||
def self.crawl(url, max_redirects: nil, initial_https_redirect_ignore_limit: false)
|
||||
fetch_title(
|
||||
|
@ -9,8 +14,8 @@ module RetrieveTitle
|
|||
max_redirects: max_redirects,
|
||||
initial_https_redirect_ignore_limit: initial_https_redirect_ignore_limit,
|
||||
)
|
||||
rescue Net::ReadTimeout, FinalDestination::SSRFError
|
||||
# do nothing for Net::ReadTimeout errors
|
||||
rescue *UNRECOVERABLE_ERRORS
|
||||
# ¯\_(ツ)_/¯
|
||||
end
|
||||
|
||||
def self.extract_title(html, encoding = nil)
|
||||
|
|
|
@ -60,6 +60,12 @@ RSpec.describe FinalDestination do
|
|||
expect(fd.ignored).to eq(%w[test.localhost google.com meta.discourse.org])
|
||||
end
|
||||
|
||||
it "raises an error when URL is too long to encode" do
|
||||
expect {
|
||||
FinalDestination.new("https://meta.discourse.org/" + "x" * UrlHelper::MAX_URL_LENGTH)
|
||||
}.to raise_error(FinalDestination::UrlEncodingError)
|
||||
end
|
||||
|
||||
describe ".resolve" do
|
||||
it "has a ready status code before anything happens" do
|
||||
expect(fd("https://eviltrout.com").status).to eq(:ready)
|
||||
|
|
|
@ -207,6 +207,12 @@ RSpec.describe RetrieveTitle do
|
|||
|
||||
expect(RetrieveTitle.crawl("https://example.com")).to eq(nil)
|
||||
end
|
||||
|
||||
it "ignores URL encoding errors" do
|
||||
described_class.stubs(:fetch_title).raises(FinalDestination::UrlEncodingError)
|
||||
|
||||
expect(RetrieveTitle.crawl("https://example.com")).to eq(nil)
|
||||
end
|
||||
end
|
||||
|
||||
describe ".fetch_title" do
|
||||
|
|
Loading…
Reference in New Issue