From 6e925fee6ff04aa23cff2f8048be14c6bc3c5a4c Mon Sep 17 00:00:00 2001 From: jbrw Date: Fri, 17 Dec 2021 19:36:54 -0500 Subject: [PATCH] FIX: Use basic meta description if other description tags are missing (#15356) When attempting to Onebox a page if there is no `meta property="og:description"` tag but there is a `meta name="description"` tag, Onebox should try to use that value. --- lib/onebox/engine/standard_embed.rb | 16 +++++++++++++++- .../onebox/basic_description.response | 17 +++++++++++++++++ .../engine/allowlisted_generic_onebox_spec.rb | 19 +++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 spec/fixtures/onebox/basic_description.response diff --git a/lib/onebox/engine/standard_embed.rb b/lib/onebox/engine/standard_embed.rb index f19f060cb2d..79997ffb818 100644 --- a/lib/onebox/engine/standard_embed.rb +++ b/lib/onebox/engine/standard_embed.rb @@ -58,7 +58,12 @@ module Onebox end favicon = get_favicon - @raw["favicon".to_sym] = favicon unless Onebox::Helpers::blank?(favicon) + @raw[:favicon] = favicon unless Onebox::Helpers::blank?(favicon) + + unless @raw[:description] + description = get_description + @raw[:description] = description unless Onebox::Helpers::blank?(description) + end @raw end @@ -106,6 +111,15 @@ module Onebox Onebox::Helpers::get_absolute_image_url(favicon, url) end + def get_description + return nil unless html_doc + + description = html_doc.at("meta[name='description']").to_h['content'] + description ||= html_doc.at("meta[name='Description']").to_h['content'] + + description + end + def get_json_response oembed_url = get_oembed_url diff --git a/spec/fixtures/onebox/basic_description.response b/spec/fixtures/onebox/basic_description.response new file mode 100644 index 00000000000..b6b7d1f7204 --- /dev/null +++ b/spec/fixtures/onebox/basic_description.response @@ -0,0 +1,17 @@ + + + My Page Title + + + + + + + + + + +

Welcome

+

Body content goes here

+ + diff --git a/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb b/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb index 14cadc152c6..8c625211248 100644 --- a/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb +++ b/spec/lib/onebox/engine/allowlisted_generic_onebox_spec.rb @@ -184,6 +184,25 @@ describe Onebox::Engine::AllowlistedGenericOnebox do expect(onebox.to_html).to include("People are fostering and adopting pets during the pandemic") end end + + context 'uses basic meta description when necessary' do + before do + stub_request(:get, "https://www.reddit.com/r/colors/comments/b4d5xm/literally_nothing_black_edition/") + .to_return(status: 200, body: onebox_response('reddit_image')) + stub_request(:get, "https://www.example.com/content") + .to_return(status: 200, body: onebox_response('basic_description')) + end + + it 'uses opengraph tags when present' do + onebox = described_class.new("https://www.reddit.com/r/colors/comments/b4d5xm/literally_nothing_black_edition/") + expect(onebox.to_html).to include("4 votes and 1 comment so far on Reddit") + end + + it 'fallback to basic meta description if other description tags are missing' do + onebox = described_class.new("https://www.example.com/content") + expect(onebox.to_html).to include("basic meta description") + end + end end describe 'article html hosts' do