From a708d4dfa251cb12a8906cc8cd0fd29928abd4f0 Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Silva Date: Thu, 27 Jun 2024 16:24:44 -0300 Subject: [PATCH] FIX: Use base64 encoded images in AI Image Caption via LLaVa (#693) * FIX: Use base64 encoded images in AI Image Caption via LLaVa This fixed a regression introduced in #646 where we started sending schemaless URLs for our LLaVa service, which doesn't handle it well. Moving to base64 encoded images solves: - The service needing to download images Now the service running LLaVa doesn't need internet access - Secure uploads compat Every image is treated the same, less branching for secure uploads - Image Size problems Discourse is now responsible for ensure a max size for images - Troublesome dev env Previously to this commit you would need a dev env that was internet acessible to use llava image captions --- lib/ai_helper/assistant.rb | 11 ++++-- .../ai_helper/assistant_controller_spec.rb | 39 ++++++++++++++++--- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/lib/ai_helper/assistant.rb b/lib/ai_helper/assistant.rb index 32cbc248..07056f80 100644 --- a/lib/ai_helper/assistant.rb +++ b/lib/ai_helper/assistant.rb @@ -130,11 +130,16 @@ module DiscourseAi def generate_image_caption(upload, user) if SiteSetting.ai_helper_image_caption_model == "llava" - image_url = - upload.secure? ? Discourse.store.url_for(upload) : UrlHelper.absolute(upload.url) + image_base64 = + DiscourseAi::Completions::UploadEncoder.encode( + upload_ids: [upload.id], + max_pixels: 1_048_576, + ).first[ + :base64 + ] parameters = { input: { - image: image_url, + image: "data:image/#{upload.extension};base64, #{image_base64}", top_p: 1, max_tokens: 1024, temperature: 0.2, diff --git a/spec/requests/ai_helper/assistant_controller_spec.rb b/spec/requests/ai_helper/assistant_controller_spec.rb index 401c1ee2..f119f75d 100644 --- a/spec/requests/ai_helper/assistant_controller_spec.rb +++ b/spec/requests/ai_helper/assistant_controller_spec.rb @@ -104,7 +104,8 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do end describe "#caption_image" do - fab!(:upload) + let(:image) { plugin_file_from_fixtures("100x100.jpg") } + let(:upload) { UploadCreator.new(image, "image.jpg").create_for(Discourse.system_user.id) } let(:image_url) { "#{Discourse.base_url}#{upload.url}" } let(:caption) { "A picture of a cat sitting on a table" } let(:caption_with_attrs) do @@ -197,15 +198,40 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do context "for secure uploads" do fab!(:group) fab!(:private_category) { Fabricate(:private_category, group: group) } - fab!(:post_in_secure_context) do - Fabricate(:post, topic: Fabricate(:topic, category: private_category)) - end - fab!(:upload) { Fabricate(:secure_upload, access_control_post: post_in_secure_context) } + let(:image) { plugin_file_from_fixtures("100x100.jpg") } + let(:upload) { UploadCreator.new(image, "image.jpg").create_for(Discourse.system_user.id) } let(:image_url) { "#{Discourse.base_url}/secure-uploads/#{upload.url}" } - before { enable_secure_uploads } + before do + Jobs.run_immediately! + + # this is done so the after_save callbacks for site settings to make + # UploadReference records works + @original_provider = SiteSetting.provider + SiteSetting.provider = SiteSettings::DbProvider.new(SiteSetting) + setup_s3 + stub_s3_store + SiteSetting.secure_uploads = true + SiteSetting.ai_helper_allowed_groups = Group::AUTO_GROUPS[:trust_level_1] + SiteSetting.ai_llava_endpoint = "https://example.com" + Group.find(SiteSetting.ai_helper_allowed_groups_map.first).add(user) + user.reload + + stub_request( + :get, + "http://s3-upload-bucket.s3.dualstack.us-west-1.amazonaws.com/original/1X/#{upload.sha1}.#{upload.extension}", + ).to_return(status: 200, body: "", headers: {}) + end + after { SiteSetting.provider = @original_provider } it "returns a 403 error if the user cannot access the secure upload" do + create_post( + title: "Secure upload post", + raw: "This is a new post ", + category: private_category, + user: Discourse.system_user, + ) + post "/discourse-ai/ai-helper/caption_image", params: { image_url: image_url, @@ -221,6 +247,7 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do image_url: image_url, image_url_type: "long_url", } + expect(response.status).to eq(200) expect(response.parsed_body["caption"]).to eq(caption_with_attrs) end