FEATURE: Cache embed contents in the database (#25133)

* FEATURE: Cache embed contents in the database

This will be useful for features that rely on the semantic content of topics, like the many AI features



Co-authored-by: Roman Rizzi <rizziromanalejandro@gmail.com>
This commit is contained in:
Rafael dos Santos Silva 2024-01-05 10:09:31 -03:00 committed by GitHub
parent ac4d90b3a6
commit 13735f35fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 64 additions and 10 deletions

View File

@ -3,10 +3,13 @@
class TopicEmbed < ActiveRecord::Base
include Trashable
EMBED_CONTENT_CACHE_MAX_LENGTH = 32_000
belongs_to :topic
belongs_to :post
validates_presence_of :embed_url
validates_uniqueness_of :embed_url
validates :embed_content_cache, length: { maximum: EMBED_CONTENT_CACHE_MAX_LENGTH }
before_validation(on: :create) do
unless (
@ -43,6 +46,7 @@ class TopicEmbed < ActiveRecord::Base
def self.import(user, url, title, contents, category_id: nil, cook_method: nil, tags: nil)
return unless url =~ %r{\Ahttps?\://}
original_contents = contents.dup.truncate(EMBED_CONTENT_CACHE_MAX_LENGTH)
contents = first_paragraph_from(contents) if SiteSetting.embed_truncate && cook_method.nil?
contents ||= ""
contents = contents.dup << imported_from_html(url)
@ -77,6 +81,7 @@ class TopicEmbed < ActiveRecord::Base
}
post = PostCreator.create(user, create_args)
post.topic.topic_embed.update!(embed_content_cache: original_contents)
end
else
absolutize_urls(url, contents)
@ -101,7 +106,7 @@ class TopicEmbed < ActiveRecord::Base
changes[:title] = title if title.present?
post.revise(user, changes, skip_validations: true, bypass_rate_limiter: true)
embed.update!(content_sha1: content_sha1)
embed.update!(content_sha1: content_sha1, embed_content_cache: original_contents)
end
end
end
@ -296,6 +301,11 @@ class TopicEmbed < ActiveRecord::Base
response = TopicEmbed.find_remote(url)
body = response.body
if post&.topic&.topic_embed && body.present?
post.topic.topic_embed.update!(
embed_content_cache: body.truncate(EMBED_CONTENT_CACHE_MAX_LENGTH),
)
end
body << TopicEmbed.imported_from_html(url)
body
end
@ -315,6 +325,7 @@ end
# updated_at :datetime not null
# deleted_at :datetime
# deleted_by_id :integer
# embed_content_cache :text
#
# Indexes
#

View File

@ -0,0 +1,7 @@
# frozen_string_literal: true
class AddEmbedContentCacheToTopicEmbed < ActiveRecord::Migration[7.0]
def change
add_column :topic_embeds, :embed_content_cache, :text
end
end

View File

@ -73,6 +73,9 @@ RSpec.describe TopicEmbed do
expect(post.cooked).to have_tag("a", with: { href: "http://eviltrout.com/hello" })
expect(post.cooked).to have_tag("img", with: { src: "http://eviltrout.com/images/wat.jpg" })
# It caches the embed content
expect(post.topic.topic_embed.embed_content_cache).to eq(contents)
# It converts relative URLs to absolute when expanded
stub_request(:get, url).to_return(status: 200, body: contents)
expect(TopicEmbed.expanded_for(post)).to have_tag(
@ -107,6 +110,13 @@ RSpec.describe TopicEmbed do
expect(topic_embed.post.topic.user).to eq(new_user)
end
it "Supports updating the embed content cache" do
expect do TopicEmbed.import(user, url, title, "new contents") end.to change {
topic_embed.reload.embed_content_cache
}
expect(topic_embed.embed_content_cache).to eq("new contents")
end
it "Should leave uppercase Feed Entry URL untouched in content" do
cased_url = "http://eviltrout.com/ABCD"
post = TopicEmbed.import(user, cased_url, title, "some random content")
@ -559,4 +569,30 @@ RSpec.describe TopicEmbed do
expect(html).to eq(expected_html)
end
end
describe ".expanded_for" do
fab!(:user)
let(:title) { "How to turn a fish from good to evil in 30 seconds" }
let(:url) { "http://eviltrout.com/123" }
let(:contents) { "<p>hello world new post :D</p>" }
fab!(:embeddable_host)
fab!(:category)
fab!(:tag)
it "returns embed content" do
stub_request(:get, url).to_return(status: 200, body: contents)
post = TopicEmbed.import(user, url, title, contents)
expect(TopicEmbed.expanded_for(post)).to include(contents)
end
it "updates the embed content cache" do
stub_request(:get, url)
.to_return(status: 200, body: contents)
.then
.to_return(status: 200, body: "contents changed")
post = TopicEmbed.import(user, url, title, contents)
TopicEmbed.expanded_for(post)
expect(post.topic.topic_embed.reload.embed_content_cache).to include("contents changed")
end
end
end