FIX: Try respecting charset in HTTP header of RSS feed
This commit is contained in:
parent
ff942ed2f3
commit
5d421fb946
|
@ -88,8 +88,9 @@ module Jobs
|
|||
private
|
||||
|
||||
def parsed_feed
|
||||
raw_feed = fetch_rss
|
||||
encoded_feed = Encodings.to_utf8(raw_feed)
|
||||
raw_feed, encoding = fetch_rss
|
||||
encoded_feed = Encodings.try_utf8(raw_feed, encoding) if encoding
|
||||
encoded_feed = Encodings.to_utf8(raw_feed, encoding_hint: encoding) unless encoded_feed
|
||||
|
||||
return nil if encoded_feed.blank?
|
||||
|
||||
|
@ -107,10 +108,19 @@ module Jobs
|
|||
feed_final_url = final_destination.resolve
|
||||
return nil unless final_destination.status == :resolved
|
||||
|
||||
Excon.new(feed_final_url.to_s).request(method: :get, expects: 200).body
|
||||
response = Excon.new(feed_final_url.to_s).request(method: :get, expects: 200)
|
||||
[response.body, detect_charset(response)]
|
||||
rescue Excon::Error::HTTPStatus
|
||||
nil
|
||||
end
|
||||
|
||||
def detect_charset(response)
|
||||
if response.headers['Content-Type'] =~ /charset\s*=\s*([a-z0-9\-]+)/i
|
||||
Encoding.find($1)
|
||||
end
|
||||
rescue ArgumentError
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
class FeedTopic
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
<?xml version="1.0"?>
|
||||
<rss version="2.0"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:atom="http://www.w3.org/2005/Atom"
|
||||
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
|
||||
xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
|
||||
xmlns:discourse="http://discourse.org/rss/modules/discourse/"
|
||||
>
|
||||
<channel>
|
||||
<title>Discourse</title>
|
||||
<atom:link href="https://blog.discourse.org/feed/" rel="self" type="application/rss+xml" />
|
||||
<link>https://blog.discourse.org</link>
|
||||
<description>Official blog for the open source Discourse project</description>
|
||||
<lastBuildDate>Thu, 14 Sep 2017 15:22:33 +0000</lastBuildDate>
|
||||
<language>en-US</language>
|
||||
<sy:updatePeriod>hourly</sy:updatePeriod>
|
||||
<sy:updateFrequency>1</sy:updateFrequency>
|
||||
<generator>https://wordpress.org/?v=4.8.1</generator>
|
||||
<item>
|
||||
<title>Poll Feed Spec Fixture</title>
|
||||
<link>https://blog.discourse.org/2017/09/poll-feed-spec-fixture/</link>
|
||||
<pubDate>Thu, 14 Sep 2017 15:22:33 +0000</pubDate>
|
||||
<dc:creator><![CDATA[xrav3nz]]></dc:creator>
|
||||
<discourse:username><![CDATA[xrav3nz]]></discourse:username>
|
||||
<category><![CDATA[spec]]></category>
|
||||
<guid isPermaLink="false">https://blog.discourse.org/?p=pollfeedspec</guid>
|
||||
<description><![CDATA[Here are some random descriptions... […]]]></description>
|
||||
<content:encoded><![CDATA[<p>This is the body & content. 100¤ </p>]]></content:encoded>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -155,6 +155,26 @@ describe Jobs::PollFeed do
|
|||
expect { poller.poll_feed }.to change { Topic.count }.by(1)
|
||||
expect(Topic.last.first_post.raw).to include('<p>This is the body & content. </p>')
|
||||
end
|
||||
|
||||
it 'respects the charset in the Content-Type header' do
|
||||
stub_request(:get, SiteSetting.feed_polling_url).to_return(
|
||||
body: file_from_fixtures('iso-8859-15-feed.rss', 'feed').read,
|
||||
headers: { "Content-Type" => "application/rss+xml; charset=ISO-8859-15" }
|
||||
)
|
||||
|
||||
expect { poller.poll_feed }.to change { Topic.count }.by(1)
|
||||
expect(Topic.last.first_post.raw).to include('<p>This is the body & content. 100€ </p>')
|
||||
end
|
||||
|
||||
it 'works when the charset in the Content-Type header is unknown' do
|
||||
stub_request(:get, SiteSetting.feed_polling_url).to_return(
|
||||
body: file_from_fixtures('feed.rss', 'feed').read,
|
||||
headers: { "Content-Type" => "application/rss+xml; charset=foo" }
|
||||
)
|
||||
|
||||
expect { poller.poll_feed }.to change { Topic.count }.by(1)
|
||||
expect(Topic.last.first_post.raw).to include('<p>This is the body & content. </p>')
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue