diff --git a/app/controllers/discourse_ai/admin/ai_llms_controller.rb b/app/controllers/discourse_ai/admin/ai_llms_controller.rb index 9098f305..72cd7b13 100644 --- a/app/controllers/discourse_ai/admin/ai_llms_controller.rb +++ b/app/controllers/discourse_ai/admin/ai_llms_controller.rb @@ -106,6 +106,7 @@ module DiscourseAi :max_prompt_tokens, :api_key, :enabled_chat_bot, + :vision_enabled, ) provider = updating ? updating.provider : permitted[:provider] diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb index 180a7a84..73666dd5 100644 --- a/app/models/llm_model.rb +++ b/app/models/llm_model.rb @@ -124,4 +124,6 @@ end # api_key :string # user_id :integer # enabled_chat_bot :boolean default(FALSE), not null +# provider_params :jsonb +# vision_enabled :boolean default(FALSE), not null # diff --git a/app/serializers/llm_model_serializer.rb b/app/serializers/llm_model_serializer.rb index 268f41b2..71a1c1b9 100644 --- a/app/serializers/llm_model_serializer.rb +++ b/app/serializers/llm_model_serializer.rb @@ -13,7 +13,8 @@ class LlmModelSerializer < ApplicationSerializer :url, :enabled_chat_bot, :shadowed_by_srv, - :provider_params + :provider_params, + :vision_enabled has_one :user, serializer: BasicUserSerializer, embed: :object diff --git a/assets/javascripts/discourse/admin/models/ai-llm.js b/assets/javascripts/discourse/admin/models/ai-llm.js index e81d0d04..8545ee6b 100644 --- a/assets/javascripts/discourse/admin/models/ai-llm.js +++ b/assets/javascripts/discourse/admin/models/ai-llm.js @@ -13,7 +13,8 @@ export default class AiLlm extends RestModel { "url", "api_key", "enabled_chat_bot", - "provider_params" + "provider_params", + "vision_enabled" ); } diff --git a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs index 11a198fc..20ce95db 100644 --- a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs +++ b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs @@ -267,6 +267,14 @@ export default class AiLlmEditorForm extends Component { @content={{I18n.t "discourse_ai.llms.hints.max_prompt_tokens"}} /> +
+ + + +
Discourse.base_url, "Content-Type" => "application/json" } - body = content.to_json - - if SiteSetting.ai_llava_endpoint_srv.present? - service = DiscourseAi::Utils::DnsSrv.lookup(SiteSetting.ai_llava_endpoint_srv) - api_endpoint = "https://#{service.target}:#{service.port}" - else - api_endpoint = SiteSetting.ai_llava_endpoint - end - - headers["X-API-KEY"] = SiteSetting.ai_llava_api_key if SiteSetting.ai_llava_api_key.present? - - response = Faraday.post("#{api_endpoint}/predictions", body, headers) - - raise Net::HTTPBadResponse if ![200].include?(response.status) - - JSON.parse(response.body, symbolize_names: true) - end - - def self.configured? - SiteSetting.ai_llava_endpoint.present? || SiteSetting.ai_llava_endpoint_srv.present? - end - end - end -end diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb index 0c47f0e8..dfecbe1f 100644 --- a/spec/lib/completions/endpoints/anthropic_spec.rb +++ b/spec/lib/completions/endpoints/anthropic_spec.rb @@ -2,7 +2,18 @@ require_relative "endpoint_compliance" RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do - let(:llm) { DiscourseAi::Completions::Llm.proxy("anthropic:claude-3-opus") } + let(:url) { "https://api.anthropic.com/v1/messages" } + fab!(:model) do + Fabricate( + :llm_model, + url: "https://api.anthropic.com/v1/messages", + name: "claude-3-opus", + provider: "anthropic", + api_key: "123", + vision_enabled: true, + ) + end + let(:llm) { DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") } let(:image100x100) { plugin_file_from_fixtures("100x100.jpg") } let(:upload100x100) do UploadCreator.new(image100x100, "image.jpg").create_for(Discourse.system_user.id) @@ -45,8 +56,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do prompt_with_tools end - before { SiteSetting.ai_anthropic_api_key = "123" } - it "does not eat spaces with tool calls" do SiteSetting.ai_anthropic_native_tool_call_models = "claude-3-opus" body = <<~STRING @@ -108,10 +117,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do result = +"" body = body.scan(/.*\n/) EndpointMock.with_chunk_array_support do - stub_request(:post, "https://api.anthropic.com/v1/messages").to_return( - status: 200, - body: body, - ) + stub_request(:post, url).to_return(status: 200, body: body) llm.generate(prompt_with_google_tool, user: Discourse.system_user) do |partial| result << partial @@ -161,7 +167,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do parsed_body = nil - stub_request(:post, "https://api.anthropic.com/v1/messages").with( + stub_request(:post, url).with( body: proc do |req_body| parsed_body = JSON.parse(req_body, symbolize_names: true) @@ -244,7 +250,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do }, }.to_json - stub_request(:post, "https://api.anthropic.com/v1/messages").to_return(body: body) + stub_request(:post, url).to_return(body: body) result = proxy.generate(prompt, user: Discourse.system_user) @@ -314,7 +320,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do STRING requested_body = nil - stub_request(:post, "https://api.anthropic.com/v1/messages").with( + stub_request(:post, url).with( body: proc do |req_body| requested_body = JSON.parse(req_body, symbolize_names: true) @@ -351,7 +357,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do STRING parsed_body = nil - stub_request(:post, "https://api.anthropic.com/v1/messages").with( + stub_request(:post, url).with( body: proc do |req_body| parsed_body = JSON.parse(req_body, symbolize_names: true) diff --git a/spec/lib/completions/endpoints/gemini_spec.rb b/spec/lib/completions/endpoints/gemini_spec.rb index dfa0d7fc..bcd733f6 100644 --- a/spec/lib/completions/endpoints/gemini_spec.rb +++ b/spec/lib/completions/endpoints/gemini_spec.rb @@ -130,6 +130,17 @@ end RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do subject(:endpoint) { described_class.new("gemini-pro", DiscourseAi::Tokenizer::OpenAiTokenizer) } + fab!(:model) do + Fabricate( + :llm_model, + url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest", + name: "gemini-1.5-pro", + provider: "google", + api_key: "ABC", + vision_enabled: true, + ) + end + fab!(:user) let(:image100x100) { plugin_file_from_fixtures("100x100.jpg") } @@ -144,8 +155,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do end it "Supports Vision API" do - SiteSetting.ai_gemini_api_key = "ABC" - prompt = DiscourseAi::Completions::Prompt.new( "You are image bot", @@ -158,9 +167,8 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do req_body = nil - llm = DiscourseAi::Completions::Llm.proxy("google:gemini-1.5-pro") - url = - "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest:generateContent?key=ABC" + llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + url = "#{model.url}:generateContent?key=ABC" stub_request(:post, url).with( body: @@ -202,8 +210,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do end it "Can correctly handle streamed responses even if they are chunked badly" do - SiteSetting.ai_gemini_api_key = "ABC" - data = +"" data << "da|ta: |" data << gemini_mock.response("Hello").to_json @@ -214,9 +220,8 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do split = data.split("|") - llm = DiscourseAi::Completions::Llm.proxy("google:gemini-1.5-flash") - url = - "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:streamGenerateContent?alt=sse&key=ABC" + llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") + url = "#{model.url}:streamGenerateContent?alt=sse&key=ABC" output = +"" gemini_mock.with_chunk_array_support do diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb index b4d942f2..6703d7ec 100644 --- a/spec/lib/completions/endpoints/open_ai_spec.rb +++ b/spec/lib/completions/endpoints/open_ai_spec.rb @@ -258,7 +258,8 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do describe "image support" do it "can handle images" do - llm = DiscourseAi::Completions::Llm.proxy("open_ai:gpt-4-turbo") + model = Fabricate(:llm_model, provider: "open_ai", vision_enabled: true) + llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") prompt = DiscourseAi::Completions::Prompt.new( "You are image bot", diff --git a/spec/requests/ai_helper/assistant_controller_spec.rb b/spec/requests/ai_helper/assistant_controller_spec.rb index f119f75d..d6c00bb6 100644 --- a/spec/requests/ai_helper/assistant_controller_spec.rb +++ b/spec/requests/ai_helper/assistant_controller_spec.rb @@ -112,43 +112,40 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do "A picture of a cat sitting on a table (#{I18n.t("discourse_ai.ai_helper.image_caption.attribution")})" end + before { assign_fake_provider_to(:ai_helper_image_caption_model) } + + def request_caption(params) + DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do + post "/discourse-ai/ai-helper/caption_image", params: params + + yield(response) + end + end + context "when logged in as an allowed user" do fab!(:user) { Fabricate(:user, refresh_auto_groups: true) } before do sign_in(user) - SiteSetting.ai_helper_allowed_groups = Group::AUTO_GROUPS[:trust_level_1] - SiteSetting.ai_llava_endpoint = "https://example.com" - stub_request(:post, "https://example.com/predictions").to_return( - status: 200, - body: { output: caption.gsub(" ", " |").split("|") }.to_json, - ) + SiteSetting.ai_helper_allowed_groups = Group::AUTO_GROUPS[:trust_level_1] end it "returns the suggested caption for the image" do - post "/discourse-ai/ai-helper/caption_image", - params: { - image_url: image_url, - image_url_type: "long_url", - } - - expect(response.status).to eq(200) - expect(response.parsed_body["caption"]).to eq(caption_with_attrs) + request_caption({ image_url: image_url, image_url_type: "long_url" }) do |r| + expect(r.status).to eq(200) + expect(r.parsed_body["caption"]).to eq(caption_with_attrs) + end end context "when the image_url is a short_url" do let(:image_url) { upload.short_url } it "returns the suggested caption for the image" do - post "/discourse-ai/ai-helper/caption_image", - params: { - image_url: image_url, - image_url_type: "short_url", - } - - expect(response.status).to eq(200) - expect(response.parsed_body["caption"]).to eq(caption_with_attrs) + request_caption({ image_url: image_url, image_url_type: "short_url" }) do |r| + expect(r.status).to eq(200) + expect(r.parsed_body["caption"]).to eq(caption_with_attrs) + end end end @@ -156,27 +153,25 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do let(:image_url) { "#{Discourse.base_url}#{upload.short_path}" } it "returns the suggested caption for the image" do - post "/discourse-ai/ai-helper/caption_image", - params: { - image_url: image_url, - image_url_type: "short_path", - } - - expect(response.status).to eq(200) - expect(response.parsed_body["caption"]).to eq(caption_with_attrs) + request_caption({ image_url: image_url, image_url_type: "short_path" }) do |r| + expect(r.status).to eq(200) + expect(r.parsed_body["caption"]).to eq(caption_with_attrs) + end end end it "returns a 502 error when the completion call fails" do - stub_request(:post, "https://example.com/predictions").to_return(status: 502) + DiscourseAi::Completions::Llm.with_prepared_responses( + [DiscourseAi::Completions::Endpoints::Base::CompletionFailed.new], + ) do + post "/discourse-ai/ai-helper/caption_image", + params: { + image_url: image_url, + image_url_type: "long_url", + } - post "/discourse-ai/ai-helper/caption_image", - params: { - image_url: image_url, - image_url_type: "long_url", - } - - expect(response.status).to eq(502) + expect(response.status).to eq(502) + end end it "returns a 400 error when the image_url is blank" do @@ -211,9 +206,10 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do SiteSetting.provider = SiteSettings::DbProvider.new(SiteSetting) setup_s3 stub_s3_store + assign_fake_provider_to(:ai_helper_image_caption_model) SiteSetting.secure_uploads = true SiteSetting.ai_helper_allowed_groups = Group::AUTO_GROUPS[:trust_level_1] - SiteSetting.ai_llava_endpoint = "https://example.com" + Group.find(SiteSetting.ai_helper_allowed_groups_map.first).add(user) user.reload @@ -242,14 +238,11 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do it "returns a 200 message and caption if user can access the secure upload" do group.add(user) - post "/discourse-ai/ai-helper/caption_image", - params: { - image_url: image_url, - image_url_type: "long_url", - } - expect(response.status).to eq(200) - expect(response.parsed_body["caption"]).to eq(caption_with_attrs) + request_caption({ image_url: image_url, image_url_type: "long_url" }) do |r| + expect(r.status).to eq(200) + expect(r.parsed_body["caption"]).to eq(caption_with_attrs) + end end context "if the input URL is for a secure upload but not on the secure-uploads path" do @@ -257,13 +250,11 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do it "creates a signed URL properly and makes the caption" do group.add(user) - post "/discourse-ai/ai-helper/caption_image", - params: { - image_url: image_url, - image_url_type: "long_url", - } - expect(response.status).to eq(200) - expect(response.parsed_body["caption"]).to eq(caption_with_attrs) + + request_caption({ image_url: image_url, image_url_type: "long_url" }) do |r| + expect(r.status).to eq(200) + expect(r.parsed_body["caption"]).to eq(caption_with_attrs) + end end end end diff --git a/spec/system/ai_helper/ai_image_caption_spec.rb b/spec/system/ai_helper/ai_image_caption_spec.rb index c0f18d5c..82dad656 100644 --- a/spec/system/ai_helper/ai_image_caption_spec.rb +++ b/spec/system/ai_helper/ai_image_caption_spec.rb @@ -21,14 +21,9 @@ RSpec.describe "AI image caption", type: :system, js: true do before do Group.find_by(id: Group::AUTO_GROUPS[:admins]).add(user) assign_fake_provider_to(:ai_helper_model) - SiteSetting.ai_llava_endpoint = "https://example.com" + assign_fake_provider_to(:ai_helper_image_caption_model) SiteSetting.ai_helper_enabled_features = "image_caption" sign_in(user) - - stub_request(:post, "https://example.com/predictions").to_return( - status: 200, - body: { output: caption.gsub(" ", " |").split("|") }.to_json, - ) end shared_examples "shows no image caption button" do @@ -53,35 +48,41 @@ RSpec.describe "AI image caption", type: :system, js: true do context "when triggering caption with AI on desktop" do it "should show an image caption in an input field" do - visit("/latest") - page.find("#create-topic").click - attach_file([file_path]) { composer.click_toolbar_button("upload") } - popup.click_generate_caption - expect(popup.has_caption_popup_value?(caption_with_attrs)).to eq(true) - popup.save_caption - wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs } - expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs) + DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do + visit("/latest") + page.find("#create-topic").click + attach_file([file_path]) { composer.click_toolbar_button("upload") } + popup.click_generate_caption + expect(popup.has_caption_popup_value?(caption_with_attrs)).to eq(true) + popup.save_caption + wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs } + expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs) + end end it "should allow you to cancel a caption request" do - visit("/latest") - page.find("#create-topic").click - attach_file([file_path]) { composer.click_toolbar_button("upload") } - popup.click_generate_caption - popup.cancel_caption - expect(popup).to have_no_disabled_generate_button + DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do + visit("/latest") + page.find("#create-topic").click + attach_file([file_path]) { composer.click_toolbar_button("upload") } + popup.click_generate_caption + popup.cancel_caption + expect(popup).to have_no_disabled_generate_button + end end end context "when triggering caption with AI on mobile", mobile: true do it "should show update the image alt text with the caption" do - visit("/latest") - page.find("#create-topic").click - attach_file([file_path]) { page.find(".mobile-file-upload").click } - page.find(".mobile-preview").click - popup.click_generate_caption - wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs } - expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs) + DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do + visit("/latest") + page.find("#create-topic").click + attach_file([file_path]) { page.find(".mobile-file-upload").click } + page.find(".mobile-preview").click + popup.click_generate_caption + wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs } + expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs) + end end end @@ -125,15 +126,17 @@ RSpec.describe "AI image caption", type: :system, js: true do end it "should auto caption the existing images and update the preference when dialog is accepted" do - visit("/latest") - page.find("#create-topic").click - attach_file([file_path]) { composer.click_toolbar_button("upload") } - wait_for { composer.has_no_in_progress_uploads? } - composer.fill_title("I love using Discourse! It is my favorite forum software") - composer.create - dialog.click_yes - wait_for(timeout: 100) { page.find("#post_1 .cooked img")["alt"] == caption_with_attrs } - expect(page.find("#post_1 .cooked img")["alt"]).to eq(caption_with_attrs) + DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do + visit("/latest") + page.find("#create-topic").click + attach_file([file_path]) { composer.click_toolbar_button("upload") } + wait_for { composer.has_no_in_progress_uploads? } + composer.fill_title("I love using Discourse! It is my favorite forum software") + composer.create + dialog.click_yes + wait_for(timeout: 100) { page.find("#post_1 .cooked img")["alt"] == caption_with_attrs } + expect(page.find("#post_1 .cooked img")["alt"]).to eq(caption_with_attrs) + end end end @@ -142,14 +145,16 @@ RSpec.describe "AI image caption", type: :system, js: true do skip "TODO: Fix auto_image_caption user option not present in testing environment?" do it "should auto caption the image after uploading" do - visit("/latest") - page.find("#create-topic").click - attach_file([Rails.root.join("spec/fixtures/images/logo.jpg")]) do - composer.click_toolbar_button("upload") + DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do + visit("/latest") + page.find("#create-topic").click + attach_file([Rails.root.join("spec/fixtures/images/logo.jpg")]) do + composer.click_toolbar_button("upload") + end + wait_for { composer.has_no_in_progress_uploads? } + wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs } + expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs) end - wait_for { composer.has_no_in_progress_uploads? } - wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs } - expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs) end end end