FIX: Disable Twitter onebox without API support (#17519)

Twitter removed OpenGraph tags from their pages. We can no longer
extract all the information (for example, the quoted tweet) we need
to render Oneboxes without using their API.
This commit is contained in:
Bianca Nenciu 2022-08-17 18:32:48 +03:00 committed by GitHub
parent 2b43238973
commit 626d50c15c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 111 deletions

View File

@ -10,34 +10,24 @@ module Onebox
matches_regexp(/^https?:\/\/(mobile\.|www\.)?twitter\.com\/.+?\/status(es)?\/\d+(\/(video|photo)\/\d?+)?+(\/?\?.*)?\/?$/)
always_https
def self.===(other)
!Onebox.options.twitter_client.twitter_credentials_missing? && super
end
def http_params
{ 'User-Agent' => 'DiscourseBot/1.0' }
end
private
def to_html
raw.present? ? super : ''
end
def get_twitter_data
response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
html = Nokogiri::HTML(response)
twitter_data = {}
html.css('meta').each do |m|
if m.attribute('property') && m.attribute('property').to_s.match(/^og:/i)
m_content = m.attribute('content').to_s.strip
m_property = m.attribute('property').to_s.gsub('og:', '').gsub(':', '_')
twitter_data[m_property.to_sym] = m_content
end
end
twitter_data
end
private
def match
@match ||= @url.match(%r{twitter\.com/.+?/status(es)?/(?<id>\d+)})
end
def twitter_data
@twitter_data ||= get_twitter_data
end
def client
Onebox.options.twitter_client
end
@ -48,9 +38,7 @@ module Onebox
def raw
if twitter_api_credentials_present?
@raw ||= OpenStruct.new(client.status(match[:id]).to_hash)
else
super
@raw ||= client.status(match[:id]).to_hash
end
end
@ -62,104 +50,56 @@ module Onebox
end
def tweet
if twitter_api_credentials_present?
client.prettify_tweet(raw)&.strip
else
twitter_data[:description].gsub(/“(.+?)”/im) { $1 } if twitter_data[:description]
end
end
def timestamp
if twitter_api_credentials_present?
date = DateTime.strptime(access(:created_at), "%a %b %d %H:%M:%S %z %Y")
user_offset = access(:user, :utc_offset).to_i
offset = (user_offset >= 0 ? "+" : "-") + Time.at(user_offset.abs).gmtime.strftime("%H%M")
date.new_offset(offset).strftime("%-l:%M %p - %-d %b %Y")
else
attr_at_css(".tweet-timestamp", 'title')
end
end
def title
if twitter_api_credentials_present?
access(:user, :name)
else
attr_at_css('.tweet.permalink-tweet', 'data-name')
end
end
def screen_name
if twitter_api_credentials_present?
access(:user, :screen_name)
else
attr_at_css('.tweet.permalink-tweet', 'data-screen-name')
end
end
def avatar
if twitter_api_credentials_present?
access(:user, :profile_image_url_https).sub('normal', '400x400')
elsif twitter_data[:image]
twitter_data[:image] unless twitter_data[:image_user_generated]
end
end
def likes
if twitter_api_credentials_present?
prettify_number(access(:favorite_count).to_i)
else
attr_at_css(".request-favorited-popup", 'data-compact-localized-count')
end
end
def retweets
if twitter_api_credentials_present?
prettify_number(access(:retweet_count).to_i)
else
attr_at_css(".request-retweeted-popup", 'data-compact-localized-count')
end
end
def quoted_full_name
if twitter_api_credentials_present?
access(:quoted_status, :user, :name)
else
raw.css('.QuoteTweet-fullname')[0]&.text
end
end
def quoted_screen_name
if twitter_api_credentials_present?
access(:quoted_status, :user, :screen_name)
else
attr_at_css(".QuoteTweet-innerContainer", "data-screen-name")
end
end
def quoted_tweet
if twitter_api_credentials_present?
access(:quoted_status, :full_text)
else
raw.css('.QuoteTweet-text')[0]&.text
end
end
def quoted_link
if twitter_api_credentials_present?
"https://twitter.com/#{quoted_screen_name}/status/#{access(:quoted_status, :id)}"
else
"https://twitter.com#{attr_at_css(".QuoteTweet-innerContainer", "href")}"
end
end
def prettify_number(count)
count > 0 ? client.prettify_number(count) : nil
end
def attr_at_css(css_property, attribute_name)
raw.at_css(css_property)&.attr(attribute_name)
end
def data
@data ||= {
link: link,

View File

@ -125,7 +125,13 @@ class TwitterApi
def twitter_get(uri)
request = Net::HTTP::Get.new(uri)
request.add_field 'Authorization', "Bearer #{bearer_token}"
http(uri).request(request).body
response = http(uri).request(request)
if response.kind_of?(Net::HTTPTooManyRequests)
Rails.logger.warn("Twitter API rate limit has been reached")
end
response.body
end
def authorization

View File

@ -107,40 +107,28 @@ RSpec.describe Onebox::Engine::TwitterStatusOnebox do
end
end
context "with html" do
context "with a standard tweet" do
let(:tweet_content) { "I'm a sucker for pledges." }
context "without twitter client" do
let(:link) { "https://twitter.com/discourse/status/1428031057186627589" }
let(:html) { described_class.new(link).to_html }
include_context "with standard tweet info"
include_context "with engines"
it_behaves_like "an engine"
it_behaves_like "#to_html"
it "does not match the url" do
onebox = Onebox::Matcher.new(link, { allowed_iframe_regexes: [/.*/] }).oneboxed
expect(onebox).not_to be(described_class)
end
context "with a quoted tweet" do
let(:tweet_content) do
"Thank you to everyone who came out for #MetInParis last night for helping us support @EMMAUSolidarite &amp;"
end
it "logs a warn message if rate limited" do
SiteSetting.twitter_consumer_key = 'twitter_consumer_key'
SiteSetting.twitter_consumer_secret = 'twitter_consumer_secret'
include_context "with quoted tweet info"
include_context "with engines"
stub_request(:post, "https://api.twitter.com/oauth2/token")
.to_return(status: 200, body: "{\"access_token\":\"token\"}", headers: {})
it_behaves_like "an engine"
it_behaves_like '#to_html'
it_behaves_like "includes quoted tweet data"
end
stub_request(:get, "https://api.twitter.com/1.1/statuses/show.json?id=1428031057186627589&tweet_mode=extended")
.to_return(status: 429, body: "{}", headers: {})
context "with a featured image tweet" do
let(:tweet_content) do
"My first text message from my child! A moment that shall live on in infamy!"
end
Rails.logger.expects(:warn).with(regexp_matches(/rate limit/)).at_least_once
include_context "with featured image info"
include_context "with engines"
it_behaves_like "an engine"
it_behaves_like '#to_html'
expect(html).to eq('')
end
end

View File

@ -508,6 +508,29 @@ RSpec.describe Oneboxer do
end
end
describe 'Twitter' do
let(:url) { 'https://twitter.com/discourse/status/1428031057186627589' }
before do
SiteSetting.twitter_consumer_key = 'twitter_consumer_key'
SiteSetting.twitter_consumer_secret = 'twitter_consumer_secret'
end
it 'works with rate limit' do
stub_request(:head, "https://twitter.com/discourse/status/1428031057186627589")
.to_return(status: 200, body: "", headers: {})
stub_request(:get, "https://twitter.com/discourse/status/1428031057186627589")
.to_return(status: 200, body: "", headers: {})
stub_request(:post, "https://api.twitter.com/oauth2/token")
.to_return(status: 200, body: "{access_token: 'token'}", headers: {})
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to eq('')
expect(Oneboxer.onebox(url, invalidate_oneboxes: true)).to eq('')
end
end
describe '#apply' do
it 'generates valid HTML' do
raw = "Before Onebox\nhttps://example.com\nAfter Onebox"