mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-07-10 08:03:28 +00:00
FIX: implement max_output tokens (anthropic/openai/bedrock/gemini/open router) (#1447)
* FIX: implement max_output tokens (anthropic/openai/bedrock/gemini/open router) Previously this feature existed but was not implemented Also updates a bunch of models to in our preset to point to latest * implementing in base is safer, simpler and easier to manage * anthropic 3.5 is getting older, lets use 4.0 here and fix spec
This commit is contained in:
parent
3e87e92631
commit
37dbd48513
@ -32,7 +32,7 @@ export default class AiLlmsListEditor extends Component {
|
|||||||
key = `${llm.provider}-${llm.name}`;
|
key = `${llm.provider}-${llm.name}`;
|
||||||
} else {
|
} else {
|
||||||
// case of preset
|
// case of preset
|
||||||
key = llm.id.replace(/\./g, "-");
|
key = llm.id.replace(/[.:\/]/g, "-");
|
||||||
}
|
}
|
||||||
|
|
||||||
key = `discourse_ai.llms.model_description.${key}`;
|
key = `discourse_ai.llms.model_description.${key}`;
|
||||||
|
@ -539,14 +539,16 @@ en:
|
|||||||
|
|
||||||
model_description:
|
model_description:
|
||||||
none: "General settings that work for most language models"
|
none: "General settings that work for most language models"
|
||||||
anthropic-claude-3-7-sonnet: "Anthropic's most intelligent model"
|
anthropic-claude-opus-4-0: "Anthropic's most intelligent model"
|
||||||
anthropic-claude-3-5-haiku: "Fast and cost-effective"
|
anthropic-claude-sonnet-4-0: "Optimal balance of speed and cost"
|
||||||
anthropic-claude-3-opus: "Excels at writing and complex tasks"
|
anthropic-claude-3-7-sonnet-latest: "Optimal balance of speed and cost (previous generation)"
|
||||||
google-gemini-2-5-pro: "Mid-sized multimodal model capable of a wide range of tasks"
|
anthropic-claude-3-5-haiku-latest: "Fast and cost-effective"
|
||||||
google-gemini-2-0-flash: "Lightweight, fast, and cost-efficient with multimodal reasoning"
|
google-gemini-2-5-pro: "Large multimodal model capable of a wide range of tasks"
|
||||||
|
google-gemini-2-0-flash: "Lightweight, fast, and cost-efficient with multimodal reasoning (previous generation)"
|
||||||
|
google-gemini-2-5-flash: "Lightweight, fast, and cost-efficient with multimodal reasoning"
|
||||||
google-gemini-2-0-flash-lite: "Cost efficient and low latency model"
|
google-gemini-2-0-flash-lite: "Cost efficient and low latency model"
|
||||||
open_ai-o1: "Open AI's most capable reasoning model"
|
open_ai-o3: "Open AI's most capable reasoning model"
|
||||||
open_ai-o3-mini: "Advanced Cost-efficient reasoning model"
|
open_ai-o4-mini: "Advanced Cost-efficient reasoning model"
|
||||||
open_ai-gpt-4-1: "Open AI's flagship model. It is well suited for problem solving across domains"
|
open_ai-gpt-4-1: "Open AI's flagship model. It is well suited for problem solving across domains"
|
||||||
open_ai-gpt-4-1-mini: "Provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases."
|
open_ai-gpt-4-1-mini: "Provides a balance between intelligence, speed, and cost that makes it an attractive model for many use cases."
|
||||||
open_ai-gpt-4-1-nano: "Fastest, most cost-effective GPT-4.1 model."
|
open_ai-gpt-4-1-nano: "Fastest, most cost-effective GPT-4.1 model."
|
||||||
@ -554,6 +556,9 @@ en:
|
|||||||
samba_nova-Meta-Llama-3-3-70B-Instruct": "Powerful multipurpose model"
|
samba_nova-Meta-Llama-3-3-70B-Instruct": "Powerful multipurpose model"
|
||||||
mistral-mistral-large-latest: "Mistral's most powerful model"
|
mistral-mistral-large-latest: "Mistral's most powerful model"
|
||||||
mistral-pixtral-large-latest: "Mistral's most powerful vision capable model"
|
mistral-pixtral-large-latest: "Mistral's most powerful vision capable model"
|
||||||
|
open_router-x-ai-grok-3-beta: "xAI's latest model"
|
||||||
|
open_router-deepseek-deepseek-r1-0528-free: "DeepSeek's latest reasoning model"
|
||||||
|
open_router-meta-llama-3-3-70b-instruct: "Highly capable multilingual model"
|
||||||
|
|
||||||
preseeded_model_description: "Pre-configured open-source model utilizing %{model}"
|
preseeded_model_description: "Pre-configured open-source model utilizing %{model}"
|
||||||
|
|
||||||
|
@ -31,6 +31,12 @@ module DiscourseAi
|
|||||||
"claude-3-opus-20240229"
|
"claude-3-opus-20240229"
|
||||||
when "claude-3-5-sonnet"
|
when "claude-3-5-sonnet"
|
||||||
"claude-3-5-sonnet-latest"
|
"claude-3-5-sonnet-latest"
|
||||||
|
when "claude-3-7-sonnet"
|
||||||
|
"claude-3-7-sonnet-latest"
|
||||||
|
when "claude-4-opus"
|
||||||
|
"claude-4-opus-20250514"
|
||||||
|
when "claude-4-sonnet"
|
||||||
|
"claude-4-sonnet-20250514"
|
||||||
else
|
else
|
||||||
llm_model.name
|
llm_model.name
|
||||||
end
|
end
|
||||||
@ -92,7 +98,6 @@ module DiscourseAi
|
|||||||
default_options(dialect).merge(model_params.except(:response_format)).merge(
|
default_options(dialect).merge(model_params.except(:response_format)).merge(
|
||||||
messages: prompt.messages,
|
messages: prompt.messages,
|
||||||
)
|
)
|
||||||
|
|
||||||
payload[:system] = prompt.system_prompt if prompt.system_prompt.present?
|
payload[:system] = prompt.system_prompt if prompt.system_prompt.present?
|
||||||
payload[:stream] = true if @streaming_mode
|
payload[:stream] = true if @streaming_mode
|
||||||
|
|
||||||
|
@ -120,6 +120,7 @@ module DiscourseAi
|
|||||||
default_options(dialect).merge(model_params.except(:response_format)).merge(
|
default_options(dialect).merge(model_params.except(:response_format)).merge(
|
||||||
messages: prompt.messages,
|
messages: prompt.messages,
|
||||||
)
|
)
|
||||||
|
|
||||||
payload[:system] = prompt.system_prompt if prompt.system_prompt.present?
|
payload[:system] = prompt.system_prompt if prompt.system_prompt.present?
|
||||||
|
|
||||||
prefilled_message = +""
|
prefilled_message = +""
|
||||||
|
@ -48,6 +48,14 @@ module DiscourseAi
|
|||||||
@llm_model = llm_model
|
@llm_model = llm_model
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def enforce_max_output_tokens(value)
|
||||||
|
if @llm_model.max_output_tokens.to_i > 0
|
||||||
|
value = @llm_model.max_output_tokens if (value.to_i > @llm_model.max_output_tokens) ||
|
||||||
|
(value.to_i <= 0)
|
||||||
|
end
|
||||||
|
value
|
||||||
|
end
|
||||||
|
|
||||||
def use_ssl?
|
def use_ssl?
|
||||||
if model_uri&.scheme.present?
|
if model_uri&.scheme.present?
|
||||||
model_uri.scheme == "https"
|
model_uri.scheme == "https"
|
||||||
@ -83,6 +91,8 @@ module DiscourseAi
|
|||||||
@partial_tool_calls = partial_tool_calls
|
@partial_tool_calls = partial_tool_calls
|
||||||
@output_thinking = output_thinking
|
@output_thinking = output_thinking
|
||||||
|
|
||||||
|
max_tokens = enforce_max_output_tokens(model_params[:max_tokens])
|
||||||
|
model_params[:max_tokens] = max_tokens if max_tokens
|
||||||
model_params = normalize_model_params(model_params)
|
model_params = normalize_model_params(model_params)
|
||||||
orig_blk = blk
|
orig_blk = blk
|
||||||
|
|
||||||
|
@ -63,6 +63,7 @@ module DiscourseAi
|
|||||||
tools = dialect.tools if @native_tool_support
|
tools = dialect.tools if @native_tool_support
|
||||||
|
|
||||||
payload = default_options.merge(contents: prompt[:messages])
|
payload = default_options.merge(contents: prompt[:messages])
|
||||||
|
|
||||||
payload[:systemInstruction] = {
|
payload[:systemInstruction] = {
|
||||||
role: "system",
|
role: "system",
|
||||||
parts: [{ text: prompt[:system_instruction].to_s }],
|
parts: [{ text: prompt[:system_instruction].to_s }],
|
||||||
|
@ -27,7 +27,7 @@ module DiscourseAi
|
|||||||
id: "anthropic",
|
id: "anthropic",
|
||||||
models: [
|
models: [
|
||||||
{
|
{
|
||||||
name: "claude-3-7-sonnet",
|
name: "claude-3-7-sonnet-latest",
|
||||||
tokens: 200_000,
|
tokens: 200_000,
|
||||||
display_name: "Claude 3.7 Sonnet",
|
display_name: "Claude 3.7 Sonnet",
|
||||||
input_cost: 3,
|
input_cost: 3,
|
||||||
@ -35,7 +35,15 @@ module DiscourseAi
|
|||||||
output_cost: 15,
|
output_cost: 15,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "claude-3-5-haiku",
|
name: "claude-sonnet-4-0",
|
||||||
|
tokens: 200_000,
|
||||||
|
display_name: "Claude 4 Sonnet",
|
||||||
|
input_cost: 3,
|
||||||
|
cached_input_cost: 0.30,
|
||||||
|
output_cost: 15,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "claude-3-5-haiku-latest",
|
||||||
tokens: 200_000,
|
tokens: 200_000,
|
||||||
display_name: "Claude 3.5 Haiku",
|
display_name: "Claude 3.5 Haiku",
|
||||||
input_cost: 0.80,
|
input_cost: 0.80,
|
||||||
@ -43,9 +51,9 @@ module DiscourseAi
|
|||||||
output_cost: 4,
|
output_cost: 4,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "claude-3-opus",
|
name: "claude-opus-4-0",
|
||||||
tokens: 200_000,
|
tokens: 200_000,
|
||||||
display_name: "Claude 3 Opus",
|
display_name: "Claude 4 Opus",
|
||||||
input_cost: 15,
|
input_cost: 15,
|
||||||
cached_input_cost: 1.50,
|
cached_input_cost: 1.50,
|
||||||
output_cost: 75,
|
output_cost: 75,
|
||||||
@ -62,8 +70,19 @@ module DiscourseAi
|
|||||||
name: "gemini-2.5-pro",
|
name: "gemini-2.5-pro",
|
||||||
tokens: 800_000,
|
tokens: 800_000,
|
||||||
endpoint:
|
endpoint:
|
||||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro-preview-03-25",
|
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro",
|
||||||
display_name: "Gemini 2.5 Pro",
|
display_name: "Gemini 2.5 Pro",
|
||||||
|
input_cost: 1.25,
|
||||||
|
oputput_cost: 10.0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "gemini-2.5-flash",
|
||||||
|
tokens: 800_000,
|
||||||
|
endpoint:
|
||||||
|
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash",
|
||||||
|
display_name: "Gemini 2.5 Pro",
|
||||||
|
input_cost: 0.30,
|
||||||
|
output_cost: 2.50,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "gemini-2.0-flash",
|
name: "gemini-2.0-flash",
|
||||||
@ -71,6 +90,8 @@ module DiscourseAi
|
|||||||
endpoint:
|
endpoint:
|
||||||
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash",
|
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash",
|
||||||
display_name: "Gemini 2.0 Flash",
|
display_name: "Gemini 2.0 Flash",
|
||||||
|
input_cost: 0.10,
|
||||||
|
output_cost: 0.40,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "gemini-2.0-flash-lite",
|
name: "gemini-2.0-flash-lite",
|
||||||
@ -89,20 +110,20 @@ module DiscourseAi
|
|||||||
id: "open_ai",
|
id: "open_ai",
|
||||||
models: [
|
models: [
|
||||||
{
|
{
|
||||||
name: "o3-mini",
|
name: "o4-mini",
|
||||||
tokens: 200_000,
|
tokens: 200_000,
|
||||||
display_name: "o3 Mini",
|
display_name: "o4 Mini",
|
||||||
input_cost: 1.10,
|
input_cost: 1.10,
|
||||||
cached_input_cost: 0.55,
|
cached_input_cost: 0.275,
|
||||||
output_cost: 4.40,
|
output_cost: 4.40,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "o1",
|
name: "o3",
|
||||||
tokens: 200_000,
|
tokens: 200_000,
|
||||||
display_name: "o1",
|
display_name: "o3",
|
||||||
input_cost: 15,
|
input_cost: 2,
|
||||||
cached_input_cost: 7.50,
|
cached_input_cost: 0.5,
|
||||||
output_cost: 60,
|
output_cost: 8,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "gpt-4.1",
|
name: "gpt-4.1",
|
||||||
@ -177,14 +198,23 @@ module DiscourseAi
|
|||||||
id: "open_router",
|
id: "open_router",
|
||||||
models: [
|
models: [
|
||||||
{
|
{
|
||||||
name: "meta-llama/llama-3.3-70b-instruct",
|
name: "x-ai/grok-3-beta",
|
||||||
tokens: 128_000,
|
tokens: 131_072,
|
||||||
display_name: "Llama 3.3 70B",
|
display_name: "xAI Grok 3 Beta",
|
||||||
|
input_cost: 3,
|
||||||
|
output_cost: 15,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "google/gemini-flash-1.5-exp",
|
name: "deepseek/deepseek-r1-0528:free",
|
||||||
tokens: 1_000_000,
|
tokens: 163_000,
|
||||||
display_name: "Gemini Flash 1.5 Exp",
|
display_name: "DeepSeek R1 0528 - free",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "meta-llama/llama-3.3-70b-instruct",
|
||||||
|
tokens: 131_072,
|
||||||
|
display_name: "Llama 3.3 70B Instruct",
|
||||||
|
input_cost: 0.05,
|
||||||
|
output_cost: 0.25,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer,
|
tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer,
|
||||||
|
@ -665,6 +665,51 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
|
|||||||
expect(log.response_tokens).to eq(30)
|
expect(log.response_tokens).to eq(30)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe "max output tokens" do
|
||||||
|
it "it respects max output tokens supplied to model unconditionally, even with thinking" do
|
||||||
|
model.update!(
|
||||||
|
provider_params: {
|
||||||
|
enable_reasoning: true,
|
||||||
|
reasoning_tokens: 1000,
|
||||||
|
},
|
||||||
|
max_output_tokens: 2000,
|
||||||
|
)
|
||||||
|
|
||||||
|
parsed_body = nil
|
||||||
|
stub_request(:post, url).with(
|
||||||
|
body:
|
||||||
|
proc do |req_body|
|
||||||
|
parsed_body = JSON.parse(req_body, symbolize_names: true)
|
||||||
|
true
|
||||||
|
end,
|
||||||
|
headers: {
|
||||||
|
"Content-Type" => "application/json",
|
||||||
|
"X-Api-Key" => "123",
|
||||||
|
"Anthropic-Version" => "2023-06-01",
|
||||||
|
},
|
||||||
|
).to_return(
|
||||||
|
status: 200,
|
||||||
|
body: {
|
||||||
|
id: "msg_123",
|
||||||
|
type: "message",
|
||||||
|
role: "assistant",
|
||||||
|
content: [{ type: "text", text: "test response" }],
|
||||||
|
model: "claude-3-opus-20240229",
|
||||||
|
usage: {
|
||||||
|
input_tokens: 10,
|
||||||
|
output_tokens: 5,
|
||||||
|
},
|
||||||
|
}.to_json,
|
||||||
|
)
|
||||||
|
|
||||||
|
llm.generate(prompt, user: Discourse.system_user, max_tokens: 2500)
|
||||||
|
expect(parsed_body[:max_tokens]).to eq(2000)
|
||||||
|
|
||||||
|
llm.generate(prompt, user: Discourse.system_user)
|
||||||
|
expect(parsed_body[:max_tokens]).to eq(2000)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
describe "parameter disabling" do
|
describe "parameter disabling" do
|
||||||
it "excludes disabled parameters from the request" do
|
it "excludes disabled parameters from the request" do
|
||||||
model.update!(provider_params: { disable_top_p: true, disable_temperature: true })
|
model.update!(provider_params: { disable_top_p: true, disable_temperature: true })
|
||||||
|
@ -179,6 +179,40 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
|
|||||||
expect(parsed.dig(:generationConfig, :thinkingConfig)).to eq({ thinkingBudget: 10_000 })
|
expect(parsed.dig(:generationConfig, :thinkingConfig)).to eq({ thinkingBudget: 10_000 })
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "correctly handles max output tokens" do
|
||||||
|
model.update!(max_output_tokens: 1000)
|
||||||
|
|
||||||
|
response = gemini_mock.response("some response mode").to_json
|
||||||
|
|
||||||
|
req_body = nil
|
||||||
|
|
||||||
|
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||||
|
url = "#{model.url}:generateContent?key=123"
|
||||||
|
|
||||||
|
stub_request(:post, url).with(
|
||||||
|
body:
|
||||||
|
proc do |_req_body|
|
||||||
|
req_body = _req_body
|
||||||
|
true
|
||||||
|
end,
|
||||||
|
).to_return(status: 200, body: response)
|
||||||
|
|
||||||
|
response = llm.generate("Hello", user: user, max_tokens: 10_000)
|
||||||
|
parsed = JSON.parse(req_body, symbolize_names: true)
|
||||||
|
|
||||||
|
expect(parsed.dig(:generationConfig, :maxOutputTokens)).to eq(1000)
|
||||||
|
|
||||||
|
response = llm.generate("Hello", user: user, max_tokens: 50)
|
||||||
|
parsed = JSON.parse(req_body, symbolize_names: true)
|
||||||
|
|
||||||
|
expect(parsed.dig(:generationConfig, :maxOutputTokens)).to eq(50)
|
||||||
|
|
||||||
|
response = llm.generate("Hello", user: user)
|
||||||
|
parsed = JSON.parse(req_body, symbolize_names: true)
|
||||||
|
|
||||||
|
expect(parsed.dig(:generationConfig, :maxOutputTokens)).to eq(1000)
|
||||||
|
end
|
||||||
|
|
||||||
it "clamps thinking tokens within allowed limits" do
|
it "clamps thinking tokens within allowed limits" do
|
||||||
model.update!(provider_params: { enable_thinking: "true", thinking_tokens: "30000" })
|
model.update!(provider_params: { enable_thinking: "true", thinking_tokens: "30000" })
|
||||||
|
|
||||||
@ -551,7 +585,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
|
|||||||
data: {"candidates": [{"content": {"parts": [{"text": "\\","}],"role": "model"},"finishReason": "STOP"}],"usageMetadata": {"promptTokenCount": 399,"candidatesTokenCount": 191,"totalTokenCount": 590},"modelVersion": "gemini-1.5-pro-002"}
|
data: {"candidates": [{"content": {"parts": [{"text": "\\","}],"role": "model"},"finishReason": "STOP"}],"usageMetadata": {"promptTokenCount": 399,"candidatesTokenCount": 191,"totalTokenCount": 590},"modelVersion": "gemini-1.5-pro-002"}
|
||||||
|
|
||||||
data: {"candidates": [{"content": {"parts": [{"text": "\\""}],"role": "model"}}],"usageMetadata": {"promptTokenCount": 399,"totalTokenCount": 399},"modelVersion": "gemini-1.5-pro-002"}
|
data: {"candidates": [{"content": {"parts": [{"text": "\\""}],"role": "model"}}],"usageMetadata": {"promptTokenCount": 399,"totalTokenCount": 399},"modelVersion": "gemini-1.5-pro-002"}
|
||||||
|
|
||||||
data: {"candidates": [{"content": {"parts": [{"text": "num"}],"role": "model"},"finishReason": "STOP"}],"usageMetadata": {"promptTokenCount": 399,"candidatesTokenCount": 191,"totalTokenCount": 590},"modelVersion": "gemini-1.5-pro-002"}
|
data: {"candidates": [{"content": {"parts": [{"text": "num"}],"role": "model"},"finishReason": "STOP"}],"usageMetadata": {"promptTokenCount": 399,"candidatesTokenCount": 191,"totalTokenCount": 590},"modelVersion": "gemini-1.5-pro-002"}
|
||||||
|
|
||||||
data: {"candidates": [{"content": {"parts": [{"text": "\\":"}],"role": "model"},"safetyRatings": [{"category": "HARM_CATEGORY_HATE_SPEECH","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_DANGEROUS_CONTENT","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_HARASSMENT","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT","probability": "NEGLIGIBLE"}]}],"usageMetadata": {"promptTokenCount": 399,"totalTokenCount": 399},"modelVersion": "gemini-1.5-pro-002"}
|
data: {"candidates": [{"content": {"parts": [{"text": "\\":"}],"role": "model"},"safetyRatings": [{"category": "HARM_CATEGORY_HATE_SPEECH","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_DANGEROUS_CONTENT","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_HARASSMENT","probability": "NEGLIGIBLE"},{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT","probability": "NEGLIGIBLE"}]}],"usageMetadata": {"promptTokenCount": 399,"totalTokenCount": 399},"modelVersion": "gemini-1.5-pro-002"}
|
||||||
|
@ -173,7 +173,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
|
|||||||
|
|
||||||
describe "max tokens for reasoning models" do
|
describe "max tokens for reasoning models" do
|
||||||
it "uses max_completion_tokens for reasoning models" do
|
it "uses max_completion_tokens for reasoning models" do
|
||||||
model.update!(name: "o3-mini")
|
model.update!(name: "o3-mini", max_output_tokens: 999)
|
||||||
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}")
|
||||||
prompt =
|
prompt =
|
||||||
DiscourseAi::Completions::Prompt.new(
|
DiscourseAi::Completions::Prompt.new(
|
||||||
@ -201,7 +201,13 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
|
|||||||
llm.generate(prompt, user: user, max_tokens: 1000) { |chunk| result << chunk }
|
llm.generate(prompt, user: user, max_tokens: 1000) { |chunk| result << chunk }
|
||||||
|
|
||||||
expect(result).to eq("hello")
|
expect(result).to eq("hello")
|
||||||
expect(body_parsed["max_completion_tokens"]).to eq(1000)
|
expect(body_parsed["max_completion_tokens"]).to eq(999)
|
||||||
|
|
||||||
|
llm.generate(prompt, user: user, max_tokens: 100) { |chunk| result << chunk }
|
||||||
|
expect(body_parsed["max_completion_tokens"]).to eq(100)
|
||||||
|
|
||||||
|
llm.generate(prompt, user: user) { |chunk| result << chunk }
|
||||||
|
expect(body_parsed["max_completion_tokens"]).to eq(999)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ RSpec.describe "Managing LLM configurations", type: :system, js: true do
|
|||||||
it "correctly sets defaults" do
|
it "correctly sets defaults" do
|
||||||
visit "/admin/plugins/discourse-ai/ai-llms"
|
visit "/admin/plugins/discourse-ai/ai-llms"
|
||||||
|
|
||||||
find("[data-llm-id='anthropic-claude-3-5-haiku'] button").click()
|
find("[data-llm-id='anthropic-claude-opus-4-0'] button").click()
|
||||||
form.field("api_key").fill_in("abcd")
|
form.field("api_key").fill_in("abcd")
|
||||||
form.field("enabled_chat_bot").toggle
|
form.field("enabled_chat_bot").toggle
|
||||||
form.submit
|
form.submit
|
||||||
@ -26,9 +26,9 @@ RSpec.describe "Managing LLM configurations", type: :system, js: true do
|
|||||||
expect(llm.api_key).to eq("abcd")
|
expect(llm.api_key).to eq("abcd")
|
||||||
|
|
||||||
preset = DiscourseAi::Completions::Llm.presets.find { |p| p[:id] == "anthropic" }
|
preset = DiscourseAi::Completions::Llm.presets.find { |p| p[:id] == "anthropic" }
|
||||||
model_preset = preset[:models].find { |m| m[:name] == "claude-3-5-haiku" }
|
model_preset = preset[:models].find { |m| m[:name] == "claude-opus-4-0" }
|
||||||
|
|
||||||
expect(llm.name).to eq("claude-3-5-haiku")
|
expect(llm.name).to eq("claude-opus-4-0")
|
||||||
expect(llm.url).to eq(preset[:endpoint])
|
expect(llm.url).to eq(preset[:endpoint])
|
||||||
expect(llm.tokenizer).to eq(preset[:tokenizer].to_s)
|
expect(llm.tokenizer).to eq(preset[:tokenizer].to_s)
|
||||||
expect(llm.max_prompt_tokens.to_i).to eq(model_preset[:tokens])
|
expect(llm.max_prompt_tokens.to_i).to eq(model_preset[:tokens])
|
||||||
|
Loading…
x
Reference in New Issue
Block a user