FIX: Try respecting charset in HTTP header of RSS feed
This commit is contained in:
parent
ff942ed2f3
commit
5d421fb946
|
@ -88,8 +88,9 @@ module Jobs
|
||||||
private
|
private
|
||||||
|
|
||||||
def parsed_feed
|
def parsed_feed
|
||||||
raw_feed = fetch_rss
|
raw_feed, encoding = fetch_rss
|
||||||
encoded_feed = Encodings.to_utf8(raw_feed)
|
encoded_feed = Encodings.try_utf8(raw_feed, encoding) if encoding
|
||||||
|
encoded_feed = Encodings.to_utf8(raw_feed, encoding_hint: encoding) unless encoded_feed
|
||||||
|
|
||||||
return nil if encoded_feed.blank?
|
return nil if encoded_feed.blank?
|
||||||
|
|
||||||
|
@ -107,10 +108,19 @@ module Jobs
|
||||||
feed_final_url = final_destination.resolve
|
feed_final_url = final_destination.resolve
|
||||||
return nil unless final_destination.status == :resolved
|
return nil unless final_destination.status == :resolved
|
||||||
|
|
||||||
Excon.new(feed_final_url.to_s).request(method: :get, expects: 200).body
|
response = Excon.new(feed_final_url.to_s).request(method: :get, expects: 200)
|
||||||
|
[response.body, detect_charset(response)]
|
||||||
rescue Excon::Error::HTTPStatus
|
rescue Excon::Error::HTTPStatus
|
||||||
nil
|
nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def detect_charset(response)
|
||||||
|
if response.headers['Content-Type'] =~ /charset\s*=\s*([a-z0-9\-]+)/i
|
||||||
|
Encoding.find($1)
|
||||||
|
end
|
||||||
|
rescue ArgumentError
|
||||||
|
nil
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class FeedTopic
|
class FeedTopic
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<rss version="2.0"
|
||||||
|
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||||
|
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:atom="http://www.w3.org/2005/Atom"
|
||||||
|
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
|
||||||
|
xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
|
||||||
|
xmlns:discourse="http://discourse.org/rss/modules/discourse/"
|
||||||
|
>
|
||||||
|
<channel>
|
||||||
|
<title>Discourse</title>
|
||||||
|
<atom:link href="https://blog.discourse.org/feed/" rel="self" type="application/rss+xml" />
|
||||||
|
<link>https://blog.discourse.org</link>
|
||||||
|
<description>Official blog for the open source Discourse project</description>
|
||||||
|
<lastBuildDate>Thu, 14 Sep 2017 15:22:33 +0000</lastBuildDate>
|
||||||
|
<language>en-US</language>
|
||||||
|
<sy:updatePeriod>hourly</sy:updatePeriod>
|
||||||
|
<sy:updateFrequency>1</sy:updateFrequency>
|
||||||
|
<generator>https://wordpress.org/?v=4.8.1</generator>
|
||||||
|
<item>
|
||||||
|
<title>Poll Feed Spec Fixture</title>
|
||||||
|
<link>https://blog.discourse.org/2017/09/poll-feed-spec-fixture/</link>
|
||||||
|
<pubDate>Thu, 14 Sep 2017 15:22:33 +0000</pubDate>
|
||||||
|
<dc:creator><![CDATA[xrav3nz]]></dc:creator>
|
||||||
|
<discourse:username><![CDATA[xrav3nz]]></discourse:username>
|
||||||
|
<category><![CDATA[spec]]></category>
|
||||||
|
<guid isPermaLink="false">https://blog.discourse.org/?p=pollfeedspec</guid>
|
||||||
|
<description><![CDATA[Here are some random descriptions... […]]]></description>
|
||||||
|
<content:encoded><![CDATA[<p>This is the body & content. 100¤ </p>]]></content:encoded>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
|
@ -155,6 +155,26 @@ describe Jobs::PollFeed do
|
||||||
expect { poller.poll_feed }.to change { Topic.count }.by(1)
|
expect { poller.poll_feed }.to change { Topic.count }.by(1)
|
||||||
expect(Topic.last.first_post.raw).to include('<p>This is the body & content. </p>')
|
expect(Topic.last.first_post.raw).to include('<p>This is the body & content. </p>')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'respects the charset in the Content-Type header' do
|
||||||
|
stub_request(:get, SiteSetting.feed_polling_url).to_return(
|
||||||
|
body: file_from_fixtures('iso-8859-15-feed.rss', 'feed').read,
|
||||||
|
headers: { "Content-Type" => "application/rss+xml; charset=ISO-8859-15" }
|
||||||
|
)
|
||||||
|
|
||||||
|
expect { poller.poll_feed }.to change { Topic.count }.by(1)
|
||||||
|
expect(Topic.last.first_post.raw).to include('<p>This is the body & content. 100€ </p>')
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'works when the charset in the Content-Type header is unknown' do
|
||||||
|
stub_request(:get, SiteSetting.feed_polling_url).to_return(
|
||||||
|
body: file_from_fixtures('feed.rss', 'feed').read,
|
||||||
|
headers: { "Content-Type" => "application/rss+xml; charset=foo" }
|
||||||
|
)
|
||||||
|
|
||||||
|
expect { poller.poll_feed }.to change { Topic.count }.by(1)
|
||||||
|
expect(Topic.last.first_post.raw).to include('<p>This is the body & content. </p>')
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue