diff --git a/app/controllers/discourse_ai/admin/ai_llms_controller.rb b/app/controllers/discourse_ai/admin/ai_llms_controller.rb
index 9098f305..72cd7b13 100644
--- a/app/controllers/discourse_ai/admin/ai_llms_controller.rb
+++ b/app/controllers/discourse_ai/admin/ai_llms_controller.rb
@@ -106,6 +106,7 @@ module DiscourseAi
:max_prompt_tokens,
:api_key,
:enabled_chat_bot,
+ :vision_enabled,
)
provider = updating ? updating.provider : permitted[:provider]
diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb
index 180a7a84..73666dd5 100644
--- a/app/models/llm_model.rb
+++ b/app/models/llm_model.rb
@@ -124,4 +124,6 @@ end
# api_key :string
# user_id :integer
# enabled_chat_bot :boolean default(FALSE), not null
+# provider_params :jsonb
+# vision_enabled :boolean default(FALSE), not null
#
diff --git a/app/serializers/llm_model_serializer.rb b/app/serializers/llm_model_serializer.rb
index 268f41b2..71a1c1b9 100644
--- a/app/serializers/llm_model_serializer.rb
+++ b/app/serializers/llm_model_serializer.rb
@@ -13,7 +13,8 @@ class LlmModelSerializer < ApplicationSerializer
:url,
:enabled_chat_bot,
:shadowed_by_srv,
- :provider_params
+ :provider_params,
+ :vision_enabled
has_one :user, serializer: BasicUserSerializer, embed: :object
diff --git a/assets/javascripts/discourse/admin/models/ai-llm.js b/assets/javascripts/discourse/admin/models/ai-llm.js
index e81d0d04..8545ee6b 100644
--- a/assets/javascripts/discourse/admin/models/ai-llm.js
+++ b/assets/javascripts/discourse/admin/models/ai-llm.js
@@ -13,7 +13,8 @@ export default class AiLlm extends RestModel {
"url",
"api_key",
"enabled_chat_bot",
- "provider_params"
+ "provider_params",
+ "vision_enabled"
);
}
diff --git a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs
index 11a198fc..20ce95db 100644
--- a/assets/javascripts/discourse/components/ai-llm-editor-form.gjs
+++ b/assets/javascripts/discourse/components/ai-llm-editor-form.gjs
@@ -267,6 +267,14 @@ export default class AiLlmEditorForm extends Component {
@content={{I18n.t "discourse_ai.llms.hints.max_prompt_tokens"}}
/>
+
+
+
+
+
Discourse.base_url, "Content-Type" => "application/json" }
- body = content.to_json
-
- if SiteSetting.ai_llava_endpoint_srv.present?
- service = DiscourseAi::Utils::DnsSrv.lookup(SiteSetting.ai_llava_endpoint_srv)
- api_endpoint = "https://#{service.target}:#{service.port}"
- else
- api_endpoint = SiteSetting.ai_llava_endpoint
- end
-
- headers["X-API-KEY"] = SiteSetting.ai_llava_api_key if SiteSetting.ai_llava_api_key.present?
-
- response = Faraday.post("#{api_endpoint}/predictions", body, headers)
-
- raise Net::HTTPBadResponse if ![200].include?(response.status)
-
- JSON.parse(response.body, symbolize_names: true)
- end
-
- def self.configured?
- SiteSetting.ai_llava_endpoint.present? || SiteSetting.ai_llava_endpoint_srv.present?
- end
- end
- end
-end
diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb
index 0c47f0e8..dfecbe1f 100644
--- a/spec/lib/completions/endpoints/anthropic_spec.rb
+++ b/spec/lib/completions/endpoints/anthropic_spec.rb
@@ -2,7 +2,18 @@
require_relative "endpoint_compliance"
RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
- let(:llm) { DiscourseAi::Completions::Llm.proxy("anthropic:claude-3-opus") }
+ let(:url) { "https://api.anthropic.com/v1/messages" }
+ fab!(:model) do
+ Fabricate(
+ :llm_model,
+ url: "https://api.anthropic.com/v1/messages",
+ name: "claude-3-opus",
+ provider: "anthropic",
+ api_key: "123",
+ vision_enabled: true,
+ )
+ end
+ let(:llm) { DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") }
let(:image100x100) { plugin_file_from_fixtures("100x100.jpg") }
let(:upload100x100) do
UploadCreator.new(image100x100, "image.jpg").create_for(Discourse.system_user.id)
@@ -45,8 +56,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
prompt_with_tools
end
- before { SiteSetting.ai_anthropic_api_key = "123" }
-
it "does not eat spaces with tool calls" do
SiteSetting.ai_anthropic_native_tool_call_models = "claude-3-opus"
body = <<~STRING
@@ -108,10 +117,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
result = +""
body = body.scan(/.*\n/)
EndpointMock.with_chunk_array_support do
- stub_request(:post, "https://api.anthropic.com/v1/messages").to_return(
- status: 200,
- body: body,
- )
+ stub_request(:post, url).to_return(status: 200, body: body)
llm.generate(prompt_with_google_tool, user: Discourse.system_user) do |partial|
result << partial
@@ -161,7 +167,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
parsed_body = nil
- stub_request(:post, "https://api.anthropic.com/v1/messages").with(
+ stub_request(:post, url).with(
body:
proc do |req_body|
parsed_body = JSON.parse(req_body, symbolize_names: true)
@@ -244,7 +250,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
},
}.to_json
- stub_request(:post, "https://api.anthropic.com/v1/messages").to_return(body: body)
+ stub_request(:post, url).to_return(body: body)
result = proxy.generate(prompt, user: Discourse.system_user)
@@ -314,7 +320,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
STRING
requested_body = nil
- stub_request(:post, "https://api.anthropic.com/v1/messages").with(
+ stub_request(:post, url).with(
body:
proc do |req_body|
requested_body = JSON.parse(req_body, symbolize_names: true)
@@ -351,7 +357,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
STRING
parsed_body = nil
- stub_request(:post, "https://api.anthropic.com/v1/messages").with(
+ stub_request(:post, url).with(
body:
proc do |req_body|
parsed_body = JSON.parse(req_body, symbolize_names: true)
diff --git a/spec/lib/completions/endpoints/gemini_spec.rb b/spec/lib/completions/endpoints/gemini_spec.rb
index dfa0d7fc..bcd733f6 100644
--- a/spec/lib/completions/endpoints/gemini_spec.rb
+++ b/spec/lib/completions/endpoints/gemini_spec.rb
@@ -130,6 +130,17 @@ end
RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
subject(:endpoint) { described_class.new("gemini-pro", DiscourseAi::Tokenizer::OpenAiTokenizer) }
+ fab!(:model) do
+ Fabricate(
+ :llm_model,
+ url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest",
+ name: "gemini-1.5-pro",
+ provider: "google",
+ api_key: "ABC",
+ vision_enabled: true,
+ )
+ end
+
fab!(:user)
let(:image100x100) { plugin_file_from_fixtures("100x100.jpg") }
@@ -144,8 +155,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
end
it "Supports Vision API" do
- SiteSetting.ai_gemini_api_key = "ABC"
-
prompt =
DiscourseAi::Completions::Prompt.new(
"You are image bot",
@@ -158,9 +167,8 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
req_body = nil
- llm = DiscourseAi::Completions::Llm.proxy("google:gemini-1.5-pro")
- url =
- "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest:generateContent?key=ABC"
+ llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
+ url = "#{model.url}:generateContent?key=ABC"
stub_request(:post, url).with(
body:
@@ -202,8 +210,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
end
it "Can correctly handle streamed responses even if they are chunked badly" do
- SiteSetting.ai_gemini_api_key = "ABC"
-
data = +""
data << "da|ta: |"
data << gemini_mock.response("Hello").to_json
@@ -214,9 +220,8 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
split = data.split("|")
- llm = DiscourseAi::Completions::Llm.proxy("google:gemini-1.5-flash")
- url =
- "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:streamGenerateContent?alt=sse&key=ABC"
+ llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
+ url = "#{model.url}:streamGenerateContent?alt=sse&key=ABC"
output = +""
gemini_mock.with_chunk_array_support do
diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb
index b4d942f2..6703d7ec 100644
--- a/spec/lib/completions/endpoints/open_ai_spec.rb
+++ b/spec/lib/completions/endpoints/open_ai_spec.rb
@@ -258,7 +258,8 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
describe "image support" do
it "can handle images" do
- llm = DiscourseAi::Completions::Llm.proxy("open_ai:gpt-4-turbo")
+ model = Fabricate(:llm_model, provider: "open_ai", vision_enabled: true)
+ llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
prompt =
DiscourseAi::Completions::Prompt.new(
"You are image bot",
diff --git a/spec/requests/ai_helper/assistant_controller_spec.rb b/spec/requests/ai_helper/assistant_controller_spec.rb
index f119f75d..d6c00bb6 100644
--- a/spec/requests/ai_helper/assistant_controller_spec.rb
+++ b/spec/requests/ai_helper/assistant_controller_spec.rb
@@ -112,43 +112,40 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
"A picture of a cat sitting on a table (#{I18n.t("discourse_ai.ai_helper.image_caption.attribution")})"
end
+ before { assign_fake_provider_to(:ai_helper_image_caption_model) }
+
+ def request_caption(params)
+ DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
+ post "/discourse-ai/ai-helper/caption_image", params: params
+
+ yield(response)
+ end
+ end
+
context "when logged in as an allowed user" do
fab!(:user) { Fabricate(:user, refresh_auto_groups: true) }
before do
sign_in(user)
- SiteSetting.ai_helper_allowed_groups = Group::AUTO_GROUPS[:trust_level_1]
- SiteSetting.ai_llava_endpoint = "https://example.com"
- stub_request(:post, "https://example.com/predictions").to_return(
- status: 200,
- body: { output: caption.gsub(" ", " |").split("|") }.to_json,
- )
+ SiteSetting.ai_helper_allowed_groups = Group::AUTO_GROUPS[:trust_level_1]
end
it "returns the suggested caption for the image" do
- post "/discourse-ai/ai-helper/caption_image",
- params: {
- image_url: image_url,
- image_url_type: "long_url",
- }
-
- expect(response.status).to eq(200)
- expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
+ request_caption({ image_url: image_url, image_url_type: "long_url" }) do |r|
+ expect(r.status).to eq(200)
+ expect(r.parsed_body["caption"]).to eq(caption_with_attrs)
+ end
end
context "when the image_url is a short_url" do
let(:image_url) { upload.short_url }
it "returns the suggested caption for the image" do
- post "/discourse-ai/ai-helper/caption_image",
- params: {
- image_url: image_url,
- image_url_type: "short_url",
- }
-
- expect(response.status).to eq(200)
- expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
+ request_caption({ image_url: image_url, image_url_type: "short_url" }) do |r|
+ expect(r.status).to eq(200)
+ expect(r.parsed_body["caption"]).to eq(caption_with_attrs)
+ end
end
end
@@ -156,27 +153,25 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
let(:image_url) { "#{Discourse.base_url}#{upload.short_path}" }
it "returns the suggested caption for the image" do
- post "/discourse-ai/ai-helper/caption_image",
- params: {
- image_url: image_url,
- image_url_type: "short_path",
- }
-
- expect(response.status).to eq(200)
- expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
+ request_caption({ image_url: image_url, image_url_type: "short_path" }) do |r|
+ expect(r.status).to eq(200)
+ expect(r.parsed_body["caption"]).to eq(caption_with_attrs)
+ end
end
end
it "returns a 502 error when the completion call fails" do
- stub_request(:post, "https://example.com/predictions").to_return(status: 502)
+ DiscourseAi::Completions::Llm.with_prepared_responses(
+ [DiscourseAi::Completions::Endpoints::Base::CompletionFailed.new],
+ ) do
+ post "/discourse-ai/ai-helper/caption_image",
+ params: {
+ image_url: image_url,
+ image_url_type: "long_url",
+ }
- post "/discourse-ai/ai-helper/caption_image",
- params: {
- image_url: image_url,
- image_url_type: "long_url",
- }
-
- expect(response.status).to eq(502)
+ expect(response.status).to eq(502)
+ end
end
it "returns a 400 error when the image_url is blank" do
@@ -211,9 +206,10 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
SiteSetting.provider = SiteSettings::DbProvider.new(SiteSetting)
setup_s3
stub_s3_store
+ assign_fake_provider_to(:ai_helper_image_caption_model)
SiteSetting.secure_uploads = true
SiteSetting.ai_helper_allowed_groups = Group::AUTO_GROUPS[:trust_level_1]
- SiteSetting.ai_llava_endpoint = "https://example.com"
+
Group.find(SiteSetting.ai_helper_allowed_groups_map.first).add(user)
user.reload
@@ -242,14 +238,11 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
it "returns a 200 message and caption if user can access the secure upload" do
group.add(user)
- post "/discourse-ai/ai-helper/caption_image",
- params: {
- image_url: image_url,
- image_url_type: "long_url",
- }
- expect(response.status).to eq(200)
- expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
+ request_caption({ image_url: image_url, image_url_type: "long_url" }) do |r|
+ expect(r.status).to eq(200)
+ expect(r.parsed_body["caption"]).to eq(caption_with_attrs)
+ end
end
context "if the input URL is for a secure upload but not on the secure-uploads path" do
@@ -257,13 +250,11 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
it "creates a signed URL properly and makes the caption" do
group.add(user)
- post "/discourse-ai/ai-helper/caption_image",
- params: {
- image_url: image_url,
- image_url_type: "long_url",
- }
- expect(response.status).to eq(200)
- expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
+
+ request_caption({ image_url: image_url, image_url_type: "long_url" }) do |r|
+ expect(r.status).to eq(200)
+ expect(r.parsed_body["caption"]).to eq(caption_with_attrs)
+ end
end
end
end
diff --git a/spec/system/ai_helper/ai_image_caption_spec.rb b/spec/system/ai_helper/ai_image_caption_spec.rb
index c0f18d5c..82dad656 100644
--- a/spec/system/ai_helper/ai_image_caption_spec.rb
+++ b/spec/system/ai_helper/ai_image_caption_spec.rb
@@ -21,14 +21,9 @@ RSpec.describe "AI image caption", type: :system, js: true do
before do
Group.find_by(id: Group::AUTO_GROUPS[:admins]).add(user)
assign_fake_provider_to(:ai_helper_model)
- SiteSetting.ai_llava_endpoint = "https://example.com"
+ assign_fake_provider_to(:ai_helper_image_caption_model)
SiteSetting.ai_helper_enabled_features = "image_caption"
sign_in(user)
-
- stub_request(:post, "https://example.com/predictions").to_return(
- status: 200,
- body: { output: caption.gsub(" ", " |").split("|") }.to_json,
- )
end
shared_examples "shows no image caption button" do
@@ -53,35 +48,41 @@ RSpec.describe "AI image caption", type: :system, js: true do
context "when triggering caption with AI on desktop" do
it "should show an image caption in an input field" do
- visit("/latest")
- page.find("#create-topic").click
- attach_file([file_path]) { composer.click_toolbar_button("upload") }
- popup.click_generate_caption
- expect(popup.has_caption_popup_value?(caption_with_attrs)).to eq(true)
- popup.save_caption
- wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
- expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
+ DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
+ visit("/latest")
+ page.find("#create-topic").click
+ attach_file([file_path]) { composer.click_toolbar_button("upload") }
+ popup.click_generate_caption
+ expect(popup.has_caption_popup_value?(caption_with_attrs)).to eq(true)
+ popup.save_caption
+ wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
+ expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
+ end
end
it "should allow you to cancel a caption request" do
- visit("/latest")
- page.find("#create-topic").click
- attach_file([file_path]) { composer.click_toolbar_button("upload") }
- popup.click_generate_caption
- popup.cancel_caption
- expect(popup).to have_no_disabled_generate_button
+ DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
+ visit("/latest")
+ page.find("#create-topic").click
+ attach_file([file_path]) { composer.click_toolbar_button("upload") }
+ popup.click_generate_caption
+ popup.cancel_caption
+ expect(popup).to have_no_disabled_generate_button
+ end
end
end
context "when triggering caption with AI on mobile", mobile: true do
it "should show update the image alt text with the caption" do
- visit("/latest")
- page.find("#create-topic").click
- attach_file([file_path]) { page.find(".mobile-file-upload").click }
- page.find(".mobile-preview").click
- popup.click_generate_caption
- wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
- expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
+ DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
+ visit("/latest")
+ page.find("#create-topic").click
+ attach_file([file_path]) { page.find(".mobile-file-upload").click }
+ page.find(".mobile-preview").click
+ popup.click_generate_caption
+ wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
+ expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
+ end
end
end
@@ -125,15 +126,17 @@ RSpec.describe "AI image caption", type: :system, js: true do
end
it "should auto caption the existing images and update the preference when dialog is accepted" do
- visit("/latest")
- page.find("#create-topic").click
- attach_file([file_path]) { composer.click_toolbar_button("upload") }
- wait_for { composer.has_no_in_progress_uploads? }
- composer.fill_title("I love using Discourse! It is my favorite forum software")
- composer.create
- dialog.click_yes
- wait_for(timeout: 100) { page.find("#post_1 .cooked img")["alt"] == caption_with_attrs }
- expect(page.find("#post_1 .cooked img")["alt"]).to eq(caption_with_attrs)
+ DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
+ visit("/latest")
+ page.find("#create-topic").click
+ attach_file([file_path]) { composer.click_toolbar_button("upload") }
+ wait_for { composer.has_no_in_progress_uploads? }
+ composer.fill_title("I love using Discourse! It is my favorite forum software")
+ composer.create
+ dialog.click_yes
+ wait_for(timeout: 100) { page.find("#post_1 .cooked img")["alt"] == caption_with_attrs }
+ expect(page.find("#post_1 .cooked img")["alt"]).to eq(caption_with_attrs)
+ end
end
end
@@ -142,14 +145,16 @@ RSpec.describe "AI image caption", type: :system, js: true do
skip "TODO: Fix auto_image_caption user option not present in testing environment?" do
it "should auto caption the image after uploading" do
- visit("/latest")
- page.find("#create-topic").click
- attach_file([Rails.root.join("spec/fixtures/images/logo.jpg")]) do
- composer.click_toolbar_button("upload")
+ DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
+ visit("/latest")
+ page.find("#create-topic").click
+ attach_file([Rails.root.join("spec/fixtures/images/logo.jpg")]) do
+ composer.click_toolbar_button("upload")
+ end
+ wait_for { composer.has_no_in_progress_uploads? }
+ wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
+ expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
end
- wait_for { composer.has_no_in_progress_uploads? }
- wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
- expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
end
end
end