FEATURE: Track if a model can do vision in the llm_models table (#725)
* FEATURE: Track if a model can do vision in the llm_models table * Data migration
This commit is contained in:
parent
06e239321b
commit
5c196bca89
|
@ -106,6 +106,7 @@ module DiscourseAi
|
|||
:max_prompt_tokens,
|
||||
:api_key,
|
||||
:enabled_chat_bot,
|
||||
:vision_enabled,
|
||||
)
|
||||
|
||||
provider = updating ? updating.provider : permitted[:provider]
|
||||
|
|
|
@ -124,4 +124,6 @@ end
|
|||
# api_key :string
|
||||
# user_id :integer
|
||||
# enabled_chat_bot :boolean default(FALSE), not null
|
||||
# provider_params :jsonb
|
||||
# vision_enabled :boolean default(FALSE), not null
|
||||
#
|
||||
|
|
|
@ -13,7 +13,8 @@ class LlmModelSerializer < ApplicationSerializer
|
|||
:url,
|
||||
:enabled_chat_bot,
|
||||
:shadowed_by_srv,
|
||||
:provider_params
|
||||
:provider_params,
|
||||
:vision_enabled
|
||||
|
||||
has_one :user, serializer: BasicUserSerializer, embed: :object
|
||||
|
||||
|
|
|
@ -13,7 +13,8 @@ export default class AiLlm extends RestModel {
|
|||
"url",
|
||||
"api_key",
|
||||
"enabled_chat_bot",
|
||||
"provider_params"
|
||||
"provider_params",
|
||||
"vision_enabled"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -267,6 +267,14 @@ export default class AiLlmEditorForm extends Component {
|
|||
@content={{I18n.t "discourse_ai.llms.hints.max_prompt_tokens"}}
|
||||
/>
|
||||
</div>
|
||||
<div class="control-group ai-llm-editor__vision-enabled">
|
||||
<Input @type="checkbox" @checked={{@model.vision_enabled}} />
|
||||
<label>{{I18n.t "discourse_ai.llms.vision_enabled"}}</label>
|
||||
<DTooltip
|
||||
@icon="question-circle"
|
||||
@content={{I18n.t "discourse_ai.llms.hints.vision_enabled"}}
|
||||
/>
|
||||
</div>
|
||||
<div class="control-group">
|
||||
<DToggleSwitch
|
||||
class="ai-llm-editor__enabled-chat-bot"
|
||||
|
|
|
@ -41,4 +41,9 @@
|
|||
display: flex;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
&__vision-enabled {
|
||||
display: flex;
|
||||
align-items: flex-start;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -228,6 +228,7 @@ en:
|
|||
url: "URL of the service hosting the model"
|
||||
api_key: "API Key of the service hosting the model"
|
||||
enabled_chat_bot: "Allow AI Bot"
|
||||
vision_enabled: "Vision enabled"
|
||||
ai_bot_user: "AI Bot User"
|
||||
save: "Save"
|
||||
edit: "Edit"
|
||||
|
@ -252,6 +253,7 @@ en:
|
|||
hints:
|
||||
max_prompt_tokens: "Max numbers of tokens for the prompt. As a rule of thumb, this should be 50% of the model's context window."
|
||||
name: "We include this in the API call to specify which model we'll use."
|
||||
vision_enabled: "If enabled, the AI will attempt to understand images. It depends on the model being used supporting vision. Supported by latest models from Anthropic, Google, and OpenAI."
|
||||
|
||||
providers:
|
||||
aws_bedrock: "AWS Bedrock"
|
||||
|
|
|
@ -189,10 +189,13 @@ discourse_ai:
|
|||
ai_vllm_api_key: ""
|
||||
ai_llava_endpoint:
|
||||
default: ""
|
||||
hidden: true
|
||||
ai_llava_endpoint_srv:
|
||||
default: ""
|
||||
hidden: true
|
||||
ai_llava_api_key: ""
|
||||
ai_llava_api_key:
|
||||
default: ""
|
||||
hidden: true
|
||||
ai_strict_token_counting:
|
||||
default: false
|
||||
hidden: true
|
||||
|
@ -254,7 +257,7 @@ discourse_ai:
|
|||
- "context_menu"
|
||||
- "image_caption"
|
||||
ai_helper_image_caption_model:
|
||||
default: "llava"
|
||||
default: ""
|
||||
type: enum
|
||||
enum: "DiscourseAi::Configuration::LlmVisionEnumerator"
|
||||
ai_auto_image_caption_allowed_groups:
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
# frozen_string_literal: true
|
||||
class LlmModelVisionEnabled < ActiveRecord::Migration[7.1]
|
||||
def change
|
||||
add_column :llm_models, :vision_enabled, :boolean, default: false, null: false
|
||||
end
|
||||
end
|
|
@ -0,0 +1,44 @@
|
|||
# frozen_string_literal: true
|
||||
class MigrateVisionLlms < ActiveRecord::Migration[7.1]
|
||||
def up
|
||||
vision_models = %w[
|
||||
claude-3-sonnet
|
||||
claude-3-opus
|
||||
claude-3-haiku
|
||||
gpt-4-vision-preview
|
||||
gpt-4-turbo
|
||||
gpt-4o
|
||||
gemini-1.5-pro
|
||||
gemini-1.5-flash
|
||||
]
|
||||
|
||||
DB.exec(<<~SQL, names: vision_models)
|
||||
UPDATE llm_models
|
||||
SET vision_enabled = true
|
||||
WHERE name IN (:names)
|
||||
SQL
|
||||
|
||||
current_value =
|
||||
DB.query_single(
|
||||
"SELECT value FROM site_settings WHERE name = :setting_name",
|
||||
setting_name: "ai_helper_image_caption_model",
|
||||
).first
|
||||
|
||||
if current_value && current_value != "llava"
|
||||
llm_model =
|
||||
DB.query_single("SELECT id FROM llm_models WHERE name = :model", model: current_value).first
|
||||
|
||||
if llm_model
|
||||
DB.exec(<<~SQL, new: "custom:#{llm_model}") if llm_model
|
||||
UPDATE site_settings
|
||||
SET value = :new
|
||||
WHERE name = 'ai_helper_image_caption_model'
|
||||
SQL
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def down
|
||||
raise ActiveRecord::IrreversibleMigration
|
||||
end
|
||||
end
|
|
@ -143,47 +143,26 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
def generate_image_caption(upload, user)
|
||||
if SiteSetting.ai_helper_image_caption_model == "llava"
|
||||
image_base64 =
|
||||
DiscourseAi::Completions::UploadEncoder.encode(
|
||||
upload_ids: [upload.id],
|
||||
max_pixels: 1_048_576,
|
||||
).first[
|
||||
:base64
|
||||
]
|
||||
parameters = {
|
||||
input: {
|
||||
image: "data:image/#{upload.extension};base64, #{image_base64}",
|
||||
top_p: 1,
|
||||
max_tokens: 1024,
|
||||
temperature: 0.2,
|
||||
prompt: "Please describe this image in a single sentence",
|
||||
},
|
||||
}
|
||||
|
||||
::DiscourseAi::Inference::Llava.perform!(parameters).dig(:output).join
|
||||
else
|
||||
prompt =
|
||||
DiscourseAi::Completions::Prompt.new(
|
||||
"You are a bot specializing in image captioning.",
|
||||
messages: [
|
||||
{
|
||||
type: :user,
|
||||
content:
|
||||
"Describe this image in a single sentence#{custom_locale_instructions(user)}",
|
||||
upload_ids: [upload.id],
|
||||
},
|
||||
],
|
||||
skip_validations: true,
|
||||
)
|
||||
|
||||
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_image_caption_model).generate(
|
||||
prompt,
|
||||
user: user,
|
||||
max_tokens: 1024,
|
||||
feature_name: "image_caption",
|
||||
prompt =
|
||||
DiscourseAi::Completions::Prompt.new(
|
||||
"You are a bot specializing in image captioning.",
|
||||
messages: [
|
||||
{
|
||||
type: :user,
|
||||
content:
|
||||
"Describe this image in a single sentence#{custom_locale_instructions(user)}",
|
||||
upload_ids: [upload.id],
|
||||
},
|
||||
],
|
||||
skip_validations: true,
|
||||
)
|
||||
end
|
||||
|
||||
DiscourseAi::Completions::Llm.proxy(SiteSetting.ai_helper_image_caption_model).generate(
|
||||
prompt,
|
||||
user: user,
|
||||
max_tokens: 1024,
|
||||
feature_name: "image_caption",
|
||||
)
|
||||
end
|
||||
|
||||
private
|
||||
|
|
|
@ -78,33 +78,25 @@ module DiscourseAi
|
|||
end
|
||||
end
|
||||
|
||||
user_message[:content] = inline_images(user_message[:content], msg)
|
||||
user_message[:content] = inline_images(user_message[:content], msg) if vision_support?
|
||||
user_message
|
||||
end
|
||||
|
||||
def inline_images(content, message)
|
||||
if model_name.include?("gpt-4-vision") || model_name == "gpt-4-turbo" ||
|
||||
model_name == "gpt-4o"
|
||||
content = message[:content]
|
||||
encoded_uploads = prompt.encoded_uploads(message)
|
||||
if encoded_uploads.present?
|
||||
new_content = []
|
||||
new_content.concat(
|
||||
encoded_uploads.map do |details|
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: "data:#{details[:mime_type]};base64,#{details[:base64]}",
|
||||
},
|
||||
}
|
||||
end,
|
||||
)
|
||||
new_content << { type: "text", text: content }
|
||||
content = new_content
|
||||
end
|
||||
end
|
||||
encoded_uploads = prompt.encoded_uploads(message)
|
||||
return content if encoded_uploads.blank?
|
||||
|
||||
content
|
||||
content_w_imgs =
|
||||
encoded_uploads.reduce([]) do |memo, details|
|
||||
memo << {
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: "data:#{details[:mime_type]};base64,#{details[:base64]}",
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
content_w_imgs << { type: "text", text: message[:content] }
|
||||
end
|
||||
|
||||
def per_message_overhead
|
||||
|
|
|
@ -109,34 +109,28 @@ module DiscourseAi
|
|||
content = +""
|
||||
content << "#{msg[:id]}: " if msg[:id]
|
||||
content << msg[:content]
|
||||
content = inline_images(content, msg)
|
||||
content = inline_images(content, msg) if vision_support?
|
||||
|
||||
{ role: "user", content: content }
|
||||
end
|
||||
|
||||
def inline_images(content, message)
|
||||
if model_name.include?("claude-3")
|
||||
encoded_uploads = prompt.encoded_uploads(message)
|
||||
if encoded_uploads.present?
|
||||
new_content = []
|
||||
new_content.concat(
|
||||
encoded_uploads.map do |details|
|
||||
{
|
||||
source: {
|
||||
type: "base64",
|
||||
data: details[:base64],
|
||||
media_type: details[:mime_type],
|
||||
},
|
||||
type: "image",
|
||||
}
|
||||
end,
|
||||
)
|
||||
new_content << { type: "text", text: content }
|
||||
content = new_content
|
||||
end
|
||||
end
|
||||
encoded_uploads = prompt.encoded_uploads(message)
|
||||
return content if encoded_uploads.blank?
|
||||
|
||||
content
|
||||
content_w_imgs =
|
||||
encoded_uploads.reduce([]) do |memo, details|
|
||||
memo << {
|
||||
source: {
|
||||
type: "base64",
|
||||
data: details[:base64],
|
||||
media_type: details[:mime_type],
|
||||
},
|
||||
type: "image",
|
||||
}
|
||||
end
|
||||
|
||||
content_w_imgs << { type: "text", text: content }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -56,6 +56,10 @@ module DiscourseAi
|
|||
false
|
||||
end
|
||||
|
||||
def vision_support?
|
||||
llm_model&.vision_enabled?
|
||||
end
|
||||
|
||||
def tools
|
||||
@tools ||= tools_dialect.translated_tools
|
||||
end
|
||||
|
|
|
@ -114,6 +114,8 @@ module DiscourseAi
|
|||
if beta_api?
|
||||
# support new format with multiple parts
|
||||
result = { role: "user", parts: [{ text: msg[:content] }] }
|
||||
return result unless vision_support?
|
||||
|
||||
upload_parts = uploaded_parts(msg)
|
||||
result[:parts].concat(upload_parts) if upload_parts.present?
|
||||
result
|
||||
|
|
|
@ -47,7 +47,28 @@ module DiscourseAi
|
|||
content << "#{msg[:id]}: " if msg[:id]
|
||||
content << msg[:content]
|
||||
|
||||
{ role: "user", content: content }
|
||||
message = { role: "user", content: content }
|
||||
|
||||
message[:content] = inline_images(message[:content], msg) if vision_support?
|
||||
|
||||
message
|
||||
end
|
||||
|
||||
def inline_images(content, message)
|
||||
encoded_uploads = prompt.encoded_uploads(message)
|
||||
return content if encoded_uploads.blank?
|
||||
|
||||
content_w_imgs =
|
||||
encoded_uploads.reduce([]) do |memo, details|
|
||||
memo << {
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: "data:#{details[:mime_type]};base64,#{details[:base64]}",
|
||||
},
|
||||
}
|
||||
end
|
||||
|
||||
content_w_imgs << { type: "text", text: message[:content] }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -35,6 +35,8 @@ module DiscourseAi
|
|||
"The number of completions you requested exceed the number of canned responses"
|
||||
end
|
||||
|
||||
raise response if response.is_a?(StandardError)
|
||||
|
||||
@completions += 1
|
||||
if block_given?
|
||||
cancelled = false
|
||||
|
|
|
@ -89,15 +89,6 @@ module DiscourseAi
|
|||
DiscourseAi::Tokenizer::BasicTokenizer.available_llm_tokenizers.map(&:name)
|
||||
end
|
||||
|
||||
def vision_models_by_provider
|
||||
@vision_models_by_provider ||= {
|
||||
aws_bedrock: %w[claude-3-sonnet claude-3-opus claude-3-haiku],
|
||||
anthropic: %w[claude-3-sonnet claude-3-opus claude-3-haiku],
|
||||
open_ai: %w[gpt-4-vision-preview gpt-4-turbo gpt-4o],
|
||||
google: %w[gemini-1.5-pro gemini-1.5-flash],
|
||||
}
|
||||
end
|
||||
|
||||
def models_by_provider
|
||||
# ChatGPT models are listed under open_ai but they are actually available through OpenAI and Azure.
|
||||
# However, since they use the same URL/key settings, there's no reason to duplicate them.
|
||||
|
|
|
@ -10,24 +10,15 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
def self.values
|
||||
begin
|
||||
result =
|
||||
DiscourseAi::Completions::Llm.vision_models_by_provider.flat_map do |provider, models|
|
||||
endpoint = DiscourseAi::Completions::Endpoints::Base.endpoint_for(provider.to_s)
|
||||
values = DB.query_hash(<<~SQL).map(&:symbolize_keys)
|
||||
SELECT display_name AS name, id AS value
|
||||
FROM llm_models
|
||||
WHERE vision_enabled
|
||||
SQL
|
||||
|
||||
models.map do |model_name|
|
||||
{ name: endpoint.display_name(model_name), value: "#{provider}:#{model_name}" }
|
||||
end
|
||||
end
|
||||
values.each { |value_h| value_h[:value] = "custom:#{value_h[:value]}" }
|
||||
|
||||
result << { name: "Llava", value: "llava" }
|
||||
|
||||
result
|
||||
# TODO add support for LlmModel as well
|
||||
# LlmModel.all.each do |model|
|
||||
# llm_models << { name: model.display_name, value: "custom:#{model.id}" }
|
||||
# end
|
||||
end
|
||||
values
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module ::DiscourseAi
|
||||
module Inference
|
||||
class Llava
|
||||
def self.perform!(content)
|
||||
headers = { "Referer" => Discourse.base_url, "Content-Type" => "application/json" }
|
||||
body = content.to_json
|
||||
|
||||
if SiteSetting.ai_llava_endpoint_srv.present?
|
||||
service = DiscourseAi::Utils::DnsSrv.lookup(SiteSetting.ai_llava_endpoint_srv)
|
||||
api_endpoint = "https://#{service.target}:#{service.port}"
|
||||
else
|
||||
api_endpoint = SiteSetting.ai_llava_endpoint
|
||||
end
|
||||
|
||||
headers["X-API-KEY"] = SiteSetting.ai_llava_api_key if SiteSetting.ai_llava_api_key.present?
|
||||
|
||||
response = Faraday.post("#{api_endpoint}/predictions", body, headers)
|
||||
|
||||
raise Net::HTTPBadResponse if ![200].include?(response.status)
|
||||
|
||||
JSON.parse(response.body, symbolize_names: true)
|
||||
end
|
||||
|
||||
def self.configured?
|
||||
SiteSetting.ai_llava_endpoint.present? || SiteSetting.ai_llava_endpoint_srv.present?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -2,7 +2,18 @@
|
|||
require_relative "endpoint_compliance"
|
||||
|
||||
RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
||||
let(:llm) { DiscourseAi::Completions::Llm.proxy("anthropic:claude-3-opus") }
|
||||
let(:url) { "https://api.anthropic.com/v1/messages" }
|
||||
fab!(:model) do
|
||||
Fabricate(
|
||||
:llm_model,
|
||||
url: "https://api.anthropic.com/v1/messages",
|
||||
name: "claude-3-opus",
|
||||
provider: "anthropic",
|
||||
api_key: "123",
|
||||
vision_enabled: true,
|
||||
)
|
||||
end
|
||||
let(:llm) { DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") }
|
||||
let(:image100x100) { plugin_file_from_fixtures("100x100.jpg") }
|
||||
let(:upload100x100) do
|
||||
UploadCreator.new(image100x100, "image.jpg").create_for(Discourse.system_user.id)
|
||||
|
@ -45,8 +56,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
|||
prompt_with_tools
|
||||
end
|
||||
|
||||
before { SiteSetting.ai_anthropic_api_key = "123" }
|
||||
|
||||
it "does not eat spaces with tool calls" do
|
||||
SiteSetting.ai_anthropic_native_tool_call_models = "claude-3-opus"
|
||||
body = <<~STRING
|
||||
|
@ -108,10 +117,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
|||
result = +""
|
||||
body = body.scan(/.*\n/)
|
||||
EndpointMock.with_chunk_array_support do
|
||||
stub_request(:post, "https://api.anthropic.com/v1/messages").to_return(
|
||||
status: 200,
|
||||
body: body,
|
||||
)
|
||||
stub_request(:post, url).to_return(status: 200, body: body)
|
||||
|
||||
llm.generate(prompt_with_google_tool, user: Discourse.system_user) do |partial|
|
||||
result << partial
|
||||
|
@ -161,7 +167,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
|||
|
||||
parsed_body = nil
|
||||
|
||||
stub_request(:post, "https://api.anthropic.com/v1/messages").with(
|
||||
stub_request(:post, url).with(
|
||||
body:
|
||||
proc do |req_body|
|
||||
parsed_body = JSON.parse(req_body, symbolize_names: true)
|
||||
|
@ -244,7 +250,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
|||
},
|
||||
}.to_json
|
||||
|
||||
stub_request(:post, "https://api.anthropic.com/v1/messages").to_return(body: body)
|
||||
stub_request(:post, url).to_return(body: body)
|
||||
|
||||
result = proxy.generate(prompt, user: Discourse.system_user)
|
||||
|
||||
|
@ -314,7 +320,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
|||
STRING
|
||||
|
||||
requested_body = nil
|
||||
stub_request(:post, "https://api.anthropic.com/v1/messages").with(
|
||||
stub_request(:post, url).with(
|
||||
body:
|
||||
proc do |req_body|
|
||||
requested_body = JSON.parse(req_body, symbolize_names: true)
|
||||
|
@ -351,7 +357,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
|||
STRING
|
||||
|
||||
parsed_body = nil
|
||||
stub_request(:post, "https://api.anthropic.com/v1/messages").with(
|
||||
stub_request(:post, url).with(
|
||||
body:
|
||||
proc do |req_body|
|
||||
parsed_body = JSON.parse(req_body, symbolize_names: true)
|
||||
|
|
|
@ -130,6 +130,17 @@ end
|
|||
RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
|
||||
subject(:endpoint) { described_class.new("gemini-pro", DiscourseAi::Tokenizer::OpenAiTokenizer) }
|
||||
|
||||
fab!(:model) do
|
||||
Fabricate(
|
||||
:llm_model,
|
||||
url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest",
|
||||
name: "gemini-1.5-pro",
|
||||
provider: "google",
|
||||
api_key: "ABC",
|
||||
vision_enabled: true,
|
||||
)
|
||||
end
|
||||
|
||||
fab!(:user)
|
||||
|
||||
let(:image100x100) { plugin_file_from_fixtures("100x100.jpg") }
|
||||
|
@ -144,8 +155,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
|
|||
end
|
||||
|
||||
it "Supports Vision API" do
|
||||
SiteSetting.ai_gemini_api_key = "ABC"
|
||||
|
||||
prompt =
|
||||
DiscourseAi::Completions::Prompt.new(
|
||||
"You are image bot",
|
||||
|
@ -158,9 +167,8 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
|
|||
|
||||
req_body = nil
|
||||
|
||||
llm = DiscourseAi::Completions::Llm.proxy("google:gemini-1.5-pro")
|
||||
url =
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-pro-latest:generateContent?key=ABC"
|
||||
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||
url = "#{model.url}:generateContent?key=ABC"
|
||||
|
||||
stub_request(:post, url).with(
|
||||
body:
|
||||
|
@ -202,8 +210,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
|
|||
end
|
||||
|
||||
it "Can correctly handle streamed responses even if they are chunked badly" do
|
||||
SiteSetting.ai_gemini_api_key = "ABC"
|
||||
|
||||
data = +""
|
||||
data << "da|ta: |"
|
||||
data << gemini_mock.response("Hello").to_json
|
||||
|
@ -214,9 +220,8 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
|
|||
|
||||
split = data.split("|")
|
||||
|
||||
llm = DiscourseAi::Completions::Llm.proxy("google:gemini-1.5-flash")
|
||||
url =
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:streamGenerateContent?alt=sse&key=ABC"
|
||||
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||
url = "#{model.url}:streamGenerateContent?alt=sse&key=ABC"
|
||||
|
||||
output = +""
|
||||
gemini_mock.with_chunk_array_support do
|
||||
|
|
|
@ -258,7 +258,8 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
|
|||
|
||||
describe "image support" do
|
||||
it "can handle images" do
|
||||
llm = DiscourseAi::Completions::Llm.proxy("open_ai:gpt-4-turbo")
|
||||
model = Fabricate(:llm_model, provider: "open_ai", vision_enabled: true)
|
||||
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||
prompt =
|
||||
DiscourseAi::Completions::Prompt.new(
|
||||
"You are image bot",
|
||||
|
|
|
@ -112,43 +112,40 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
|
|||
"A picture of a cat sitting on a table (#{I18n.t("discourse_ai.ai_helper.image_caption.attribution")})"
|
||||
end
|
||||
|
||||
before { assign_fake_provider_to(:ai_helper_image_caption_model) }
|
||||
|
||||
def request_caption(params)
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
|
||||
post "/discourse-ai/ai-helper/caption_image", params: params
|
||||
|
||||
yield(response)
|
||||
end
|
||||
end
|
||||
|
||||
context "when logged in as an allowed user" do
|
||||
fab!(:user) { Fabricate(:user, refresh_auto_groups: true) }
|
||||
|
||||
before do
|
||||
sign_in(user)
|
||||
SiteSetting.ai_helper_allowed_groups = Group::AUTO_GROUPS[:trust_level_1]
|
||||
SiteSetting.ai_llava_endpoint = "https://example.com"
|
||||
|
||||
stub_request(:post, "https://example.com/predictions").to_return(
|
||||
status: 200,
|
||||
body: { output: caption.gsub(" ", " |").split("|") }.to_json,
|
||||
)
|
||||
SiteSetting.ai_helper_allowed_groups = Group::AUTO_GROUPS[:trust_level_1]
|
||||
end
|
||||
|
||||
it "returns the suggested caption for the image" do
|
||||
post "/discourse-ai/ai-helper/caption_image",
|
||||
params: {
|
||||
image_url: image_url,
|
||||
image_url_type: "long_url",
|
||||
}
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
|
||||
request_caption({ image_url: image_url, image_url_type: "long_url" }) do |r|
|
||||
expect(r.status).to eq(200)
|
||||
expect(r.parsed_body["caption"]).to eq(caption_with_attrs)
|
||||
end
|
||||
end
|
||||
|
||||
context "when the image_url is a short_url" do
|
||||
let(:image_url) { upload.short_url }
|
||||
|
||||
it "returns the suggested caption for the image" do
|
||||
post "/discourse-ai/ai-helper/caption_image",
|
||||
params: {
|
||||
image_url: image_url,
|
||||
image_url_type: "short_url",
|
||||
}
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
|
||||
request_caption({ image_url: image_url, image_url_type: "short_url" }) do |r|
|
||||
expect(r.status).to eq(200)
|
||||
expect(r.parsed_body["caption"]).to eq(caption_with_attrs)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -156,27 +153,25 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
|
|||
let(:image_url) { "#{Discourse.base_url}#{upload.short_path}" }
|
||||
|
||||
it "returns the suggested caption for the image" do
|
||||
post "/discourse-ai/ai-helper/caption_image",
|
||||
params: {
|
||||
image_url: image_url,
|
||||
image_url_type: "short_path",
|
||||
}
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
|
||||
request_caption({ image_url: image_url, image_url_type: "short_path" }) do |r|
|
||||
expect(r.status).to eq(200)
|
||||
expect(r.parsed_body["caption"]).to eq(caption_with_attrs)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
it "returns a 502 error when the completion call fails" do
|
||||
stub_request(:post, "https://example.com/predictions").to_return(status: 502)
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(
|
||||
[DiscourseAi::Completions::Endpoints::Base::CompletionFailed.new],
|
||||
) do
|
||||
post "/discourse-ai/ai-helper/caption_image",
|
||||
params: {
|
||||
image_url: image_url,
|
||||
image_url_type: "long_url",
|
||||
}
|
||||
|
||||
post "/discourse-ai/ai-helper/caption_image",
|
||||
params: {
|
||||
image_url: image_url,
|
||||
image_url_type: "long_url",
|
||||
}
|
||||
|
||||
expect(response.status).to eq(502)
|
||||
expect(response.status).to eq(502)
|
||||
end
|
||||
end
|
||||
|
||||
it "returns a 400 error when the image_url is blank" do
|
||||
|
@ -211,9 +206,10 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
|
|||
SiteSetting.provider = SiteSettings::DbProvider.new(SiteSetting)
|
||||
setup_s3
|
||||
stub_s3_store
|
||||
assign_fake_provider_to(:ai_helper_image_caption_model)
|
||||
SiteSetting.secure_uploads = true
|
||||
SiteSetting.ai_helper_allowed_groups = Group::AUTO_GROUPS[:trust_level_1]
|
||||
SiteSetting.ai_llava_endpoint = "https://example.com"
|
||||
|
||||
Group.find(SiteSetting.ai_helper_allowed_groups_map.first).add(user)
|
||||
user.reload
|
||||
|
||||
|
@ -242,14 +238,11 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
|
|||
|
||||
it "returns a 200 message and caption if user can access the secure upload" do
|
||||
group.add(user)
|
||||
post "/discourse-ai/ai-helper/caption_image",
|
||||
params: {
|
||||
image_url: image_url,
|
||||
image_url_type: "long_url",
|
||||
}
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
|
||||
request_caption({ image_url: image_url, image_url_type: "long_url" }) do |r|
|
||||
expect(r.status).to eq(200)
|
||||
expect(r.parsed_body["caption"]).to eq(caption_with_attrs)
|
||||
end
|
||||
end
|
||||
|
||||
context "if the input URL is for a secure upload but not on the secure-uploads path" do
|
||||
|
@ -257,13 +250,11 @@ RSpec.describe DiscourseAi::AiHelper::AssistantController do
|
|||
|
||||
it "creates a signed URL properly and makes the caption" do
|
||||
group.add(user)
|
||||
post "/discourse-ai/ai-helper/caption_image",
|
||||
params: {
|
||||
image_url: image_url,
|
||||
image_url_type: "long_url",
|
||||
}
|
||||
expect(response.status).to eq(200)
|
||||
expect(response.parsed_body["caption"]).to eq(caption_with_attrs)
|
||||
|
||||
request_caption({ image_url: image_url, image_url_type: "long_url" }) do |r|
|
||||
expect(r.status).to eq(200)
|
||||
expect(r.parsed_body["caption"]).to eq(caption_with_attrs)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -21,14 +21,9 @@ RSpec.describe "AI image caption", type: :system, js: true do
|
|||
before do
|
||||
Group.find_by(id: Group::AUTO_GROUPS[:admins]).add(user)
|
||||
assign_fake_provider_to(:ai_helper_model)
|
||||
SiteSetting.ai_llava_endpoint = "https://example.com"
|
||||
assign_fake_provider_to(:ai_helper_image_caption_model)
|
||||
SiteSetting.ai_helper_enabled_features = "image_caption"
|
||||
sign_in(user)
|
||||
|
||||
stub_request(:post, "https://example.com/predictions").to_return(
|
||||
status: 200,
|
||||
body: { output: caption.gsub(" ", " |").split("|") }.to_json,
|
||||
)
|
||||
end
|
||||
|
||||
shared_examples "shows no image caption button" do
|
||||
|
@ -53,35 +48,41 @@ RSpec.describe "AI image caption", type: :system, js: true do
|
|||
|
||||
context "when triggering caption with AI on desktop" do
|
||||
it "should show an image caption in an input field" do
|
||||
visit("/latest")
|
||||
page.find("#create-topic").click
|
||||
attach_file([file_path]) { composer.click_toolbar_button("upload") }
|
||||
popup.click_generate_caption
|
||||
expect(popup.has_caption_popup_value?(caption_with_attrs)).to eq(true)
|
||||
popup.save_caption
|
||||
wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
|
||||
expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
|
||||
visit("/latest")
|
||||
page.find("#create-topic").click
|
||||
attach_file([file_path]) { composer.click_toolbar_button("upload") }
|
||||
popup.click_generate_caption
|
||||
expect(popup.has_caption_popup_value?(caption_with_attrs)).to eq(true)
|
||||
popup.save_caption
|
||||
wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
|
||||
expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
|
||||
end
|
||||
end
|
||||
|
||||
it "should allow you to cancel a caption request" do
|
||||
visit("/latest")
|
||||
page.find("#create-topic").click
|
||||
attach_file([file_path]) { composer.click_toolbar_button("upload") }
|
||||
popup.click_generate_caption
|
||||
popup.cancel_caption
|
||||
expect(popup).to have_no_disabled_generate_button
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
|
||||
visit("/latest")
|
||||
page.find("#create-topic").click
|
||||
attach_file([file_path]) { composer.click_toolbar_button("upload") }
|
||||
popup.click_generate_caption
|
||||
popup.cancel_caption
|
||||
expect(popup).to have_no_disabled_generate_button
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "when triggering caption with AI on mobile", mobile: true do
|
||||
it "should show update the image alt text with the caption" do
|
||||
visit("/latest")
|
||||
page.find("#create-topic").click
|
||||
attach_file([file_path]) { page.find(".mobile-file-upload").click }
|
||||
page.find(".mobile-preview").click
|
||||
popup.click_generate_caption
|
||||
wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
|
||||
expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
|
||||
visit("/latest")
|
||||
page.find("#create-topic").click
|
||||
attach_file([file_path]) { page.find(".mobile-file-upload").click }
|
||||
page.find(".mobile-preview").click
|
||||
popup.click_generate_caption
|
||||
wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
|
||||
expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -125,15 +126,17 @@ RSpec.describe "AI image caption", type: :system, js: true do
|
|||
end
|
||||
|
||||
it "should auto caption the existing images and update the preference when dialog is accepted" do
|
||||
visit("/latest")
|
||||
page.find("#create-topic").click
|
||||
attach_file([file_path]) { composer.click_toolbar_button("upload") }
|
||||
wait_for { composer.has_no_in_progress_uploads? }
|
||||
composer.fill_title("I love using Discourse! It is my favorite forum software")
|
||||
composer.create
|
||||
dialog.click_yes
|
||||
wait_for(timeout: 100) { page.find("#post_1 .cooked img")["alt"] == caption_with_attrs }
|
||||
expect(page.find("#post_1 .cooked img")["alt"]).to eq(caption_with_attrs)
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
|
||||
visit("/latest")
|
||||
page.find("#create-topic").click
|
||||
attach_file([file_path]) { composer.click_toolbar_button("upload") }
|
||||
wait_for { composer.has_no_in_progress_uploads? }
|
||||
composer.fill_title("I love using Discourse! It is my favorite forum software")
|
||||
composer.create
|
||||
dialog.click_yes
|
||||
wait_for(timeout: 100) { page.find("#post_1 .cooked img")["alt"] == caption_with_attrs }
|
||||
expect(page.find("#post_1 .cooked img")["alt"]).to eq(caption_with_attrs)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -142,14 +145,16 @@ RSpec.describe "AI image caption", type: :system, js: true do
|
|||
|
||||
skip "TODO: Fix auto_image_caption user option not present in testing environment?" do
|
||||
it "should auto caption the image after uploading" do
|
||||
visit("/latest")
|
||||
page.find("#create-topic").click
|
||||
attach_file([Rails.root.join("spec/fixtures/images/logo.jpg")]) do
|
||||
composer.click_toolbar_button("upload")
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([caption]) do
|
||||
visit("/latest")
|
||||
page.find("#create-topic").click
|
||||
attach_file([Rails.root.join("spec/fixtures/images/logo.jpg")]) do
|
||||
composer.click_toolbar_button("upload")
|
||||
end
|
||||
wait_for { composer.has_no_in_progress_uploads? }
|
||||
wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
|
||||
expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
|
||||
end
|
||||
wait_for { composer.has_no_in_progress_uploads? }
|
||||
wait_for { page.find(".image-wrapper img")["alt"] == caption_with_attrs }
|
||||
expect(page.find(".image-wrapper img")["alt"]).to eq(caption_with_attrs)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue