Sam a3c827efcc
FEATURE: allow personas to supply top_p and temperature params (#459)
* FEATURE: allow personas to supply top_p and temperature params

Code assistance generally are more focused at a lower temperature
This amends it so SQL Helper runs at 0.2 temperature vs the more
common default across LLMs of 1.0.

Reduced temperature leads to more focused, concise and predictable
answers for the SQL Helper

* fix tests

* This is not perfect, but far better than what we do today

Instead of fishing for

1. Draft sequence
2. Draft body

We skip (2), this means the composer "only" needs 1 http request to
open, we also want to eliminate (1) but it is a bit of a trickier
core change, may figure out how to pull it off (defer it to first draft save)

Value of bot drafts < value of opening bot conversations really fast
2024-02-03 07:09:34 +11:00

199 lines
6.3 KiB
Ruby

# frozen_string_literal: true
module DiscourseAi
module AiBot
class Bot
BOT_NOT_FOUND = Class.new(StandardError)
MAX_COMPLETIONS = 5
def self.as(bot_user, persona: DiscourseAi::AiBot::Personas::General.new)
new(bot_user, persona)
end
def initialize(bot_user, persona)
@bot_user = bot_user
@persona = persona
end
attr_reader :bot_user
def get_updated_title(conversation_context, post_user)
system_insts = <<~TEXT.strip
You are titlebot. Given a topic, you will figure out a title.
You will never respond with anything but 7 word topic title.
TEXT
title_prompt =
DiscourseAi::Completions::Prompt.new(system_insts, messages: conversation_context)
title_prompt.push(
type: :user,
content:
"Based on our previous conversation, suggest a 7 word title without quoting any of it.",
)
DiscourseAi::Completions::Llm
.proxy(model)
.generate(title_prompt, user: post_user)
.strip
.split("\n")
.last
end
def reply(context, &update_blk)
prompt = persona.craft_prompt(context)
total_completions = 0
ongoing_chain = true
low_cost = false
raw_context = []
user = context[:user]
llm_kwargs = { user: user }
llm_kwargs[:temperature] = persona.temperature if persona.temperature
llm_kwargs[:top_p] = persona.top_p if persona.top_p
while total_completions <= MAX_COMPLETIONS && ongoing_chain
current_model = model(prefer_low_cost: low_cost)
llm = DiscourseAi::Completions::Llm.proxy(current_model)
tool_found = false
result =
llm.generate(prompt, **llm_kwargs) do |partial, cancel|
if (tool = persona.find_tool(partial))
tool_found = true
ongoing_chain = tool.chain_next_response?
low_cost = tool.low_cost?
tool_call_id = tool.tool_call_id
invocation_result_json = invoke_tool(tool, llm, cancel, &update_blk).to_json
tool_call_message = {
type: :tool_call,
id: tool_call_id,
content: { name: tool.name, arguments: tool.parameters }.to_json,
}
tool_message = { type: :tool, id: tool_call_id, content: invocation_result_json }
if tool.standalone?
standalone_context =
context.dup.merge(
conversation_context: [
context[:conversation_context].last,
tool_call_message,
tool_message,
],
)
prompt = persona.craft_prompt(standalone_context)
else
prompt.push(**tool_call_message)
prompt.push(**tool_message)
end
raw_context << [tool_call_message[:content], tool_call_id, "tool_call"]
raw_context << [invocation_result_json, tool_call_id, "tool"]
else
update_blk.call(partial, cancel, nil)
end
end
if !tool_found
ongoing_chain = false
raw_context << [result, bot_user.username]
end
total_completions += 1
# do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS)
prompt.tools = [] if total_completions == MAX_COMPLETIONS
end
raw_context
end
attr_reader :persona
private
def invoke_tool(tool, llm, cancel, &update_blk)
update_blk.call("", cancel, build_placeholder(tool.summary, ""))
result =
tool.invoke(bot_user, llm) do |progress|
placeholder = build_placeholder(tool.summary, progress)
update_blk.call("", cancel, placeholder)
end
tool_details = build_placeholder(tool.summary, tool.details, custom_raw: tool.custom_raw)
update_blk.call(tool_details, cancel, nil)
result
end
def model(prefer_low_cost: false)
# HACK(roman): We'll do this until we define how we represent different providers in the bot settings
default_model =
case bot_user.id
when DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID
if DiscourseAi::Completions::Endpoints::AwsBedrock.correctly_configured?("claude-2")
"aws_bedrock:claude-2"
else
"anthropic:claude-2"
end
when DiscourseAi::AiBot::EntryPoint::GPT4_ID
"open_ai:gpt-4"
when DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
"open_ai:gpt-4-turbo"
when DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
"open_ai:gpt-3.5-turbo-16k"
when DiscourseAi::AiBot::EntryPoint::MIXTRAL_ID
if DiscourseAi::Completions::Endpoints::Vllm.correctly_configured?(
"mistralai/Mixtral-8x7B-Instruct-v0.1",
)
"vllm:mistralai/Mixtral-8x7B-Instruct-v0.1"
else
"hugging_face:mistralai/Mixtral-8x7B-Instruct-v0.1"
end
when DiscourseAi::AiBot::EntryPoint::GEMINI_ID
"google:gemini-pro"
when DiscourseAi::AiBot::EntryPoint::FAKE_ID
"fake:fake"
else
nil
end
if %w[open_ai:gpt-4 open_ai:gpt-4-turbo].include?(default_model) && prefer_low_cost
return "open_ai:gpt-3.5-turbo-16k"
end
default_model
end
def tool_invocation?(partial)
Nokogiri::HTML5.fragment(partial).at("invoke").present?
end
def build_placeholder(summary, details, custom_raw: nil)
placeholder = +(<<~HTML)
<details>
<summary>#{summary}</summary>
<p>#{details}</p>
</details>
HTML
if custom_raw
placeholder << "\n"
placeholder << custom_raw
else
# we need this for cursor placeholder to work
# doing this in CSS is very hard
# if changing test with a custom tool such as search
placeholder << "<span></span>\n\n"
end
placeholder
end
end
end
end