mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-07-02 04:22:40 +00:00
* DEV: refactor bot internals This introduces a proper object for bot context, this makes it simpler to improve context management as we go cause we have a nice object to work with Starts refactoring allowing for a single message to have multiple uploads throughout * transplant method to message builder * chipping away at inline uploads * image support is improved but not fully fixed yet partially working in anthropic, still got quite a few dialects to go * open ai and claude are now working * Gemini is now working as well * fix nova * more dialects... * fix ollama * fix specs * update artifact fixed * more tests * spam scanner * pass more specs * bunch of specs improved * more bug fixes. * all the rest of the tests are working * improve tests coverage and ensure custom tools are aware of new context object * tests are working, but we need more tests * resolve merge conflict * new preamble and expanded specs on ai tool * remove concept of "standalone tools" This is no longer needed, we can set custom raw, tool details are injected into tool calls
266 lines
7.6 KiB
Ruby
266 lines
7.6 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module Completions
|
|
module Dialects
|
|
class Dialect
|
|
class << self
|
|
def can_translate?(llm_model)
|
|
raise NotImplemented
|
|
end
|
|
|
|
def all_dialects
|
|
[
|
|
DiscourseAi::Completions::Dialects::ChatGpt,
|
|
DiscourseAi::Completions::Dialects::Gemini,
|
|
DiscourseAi::Completions::Dialects::Claude,
|
|
DiscourseAi::Completions::Dialects::Command,
|
|
DiscourseAi::Completions::Dialects::Ollama,
|
|
DiscourseAi::Completions::Dialects::Mistral,
|
|
DiscourseAi::Completions::Dialects::Nova,
|
|
DiscourseAi::Completions::Dialects::OpenAiCompatible,
|
|
]
|
|
end
|
|
|
|
def dialect_for(llm_model)
|
|
dialects = []
|
|
|
|
if Rails.env.test? || Rails.env.development?
|
|
dialects = [DiscourseAi::Completions::Dialects::Fake]
|
|
end
|
|
|
|
dialects = dialects.concat(all_dialects)
|
|
|
|
dialect = dialects.find { |d| d.can_translate?(llm_model) }
|
|
raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL if !dialect
|
|
|
|
dialect
|
|
end
|
|
end
|
|
|
|
def initialize(generic_prompt, llm_model, opts: {})
|
|
@prompt = generic_prompt
|
|
@opts = opts
|
|
@llm_model = llm_model
|
|
end
|
|
|
|
VALID_ID_REGEX = /\A[a-zA-Z0-9_]+\z/
|
|
|
|
def native_tool_support?
|
|
false
|
|
end
|
|
|
|
def vision_support?
|
|
llm_model.vision_enabled?
|
|
end
|
|
|
|
def tools
|
|
@tools ||= tools_dialect.translated_tools
|
|
end
|
|
|
|
def tool_choice
|
|
prompt.tool_choice
|
|
end
|
|
|
|
def self.no_more_tool_calls_text
|
|
# note, Anthropic must never prefill with an ending whitespace
|
|
"I WILL NOT USE TOOLS IN THIS REPLY, user expressed they wanted to stop using tool calls.\nHere is the best, complete, answer I can come up with given the information I have."
|
|
end
|
|
|
|
def self.no_more_tool_calls_text_user
|
|
"DO NOT USE TOOLS IN YOUR REPLY. Return the best answer you can given the information I supplied you."
|
|
end
|
|
|
|
def no_more_tool_calls_text
|
|
self.class.no_more_tool_calls_text
|
|
end
|
|
|
|
def no_more_tool_calls_text_user
|
|
self.class.no_more_tool_calls_text_user
|
|
end
|
|
|
|
def translate
|
|
messages = trim_messages(prompt.messages)
|
|
last_message = messages.last
|
|
inject_done_on_last_tool_call = false
|
|
|
|
if !native_tool_support? && last_message && last_message[:type].to_sym == :tool &&
|
|
prompt.tool_choice == :none
|
|
inject_done_on_last_tool_call = true
|
|
end
|
|
|
|
translated =
|
|
messages
|
|
.map do |msg|
|
|
case msg[:type].to_sym
|
|
when :system
|
|
system_msg(msg)
|
|
when :user
|
|
user_msg(msg)
|
|
when :model
|
|
model_msg(msg)
|
|
when :tool
|
|
if inject_done_on_last_tool_call && msg == last_message
|
|
tools_dialect.inject_done { tool_msg(msg) }
|
|
else
|
|
tool_msg(msg)
|
|
end
|
|
when :tool_call
|
|
tool_call_msg(msg)
|
|
else
|
|
raise ArgumentError, "Unknown message type: #{msg[:type]}"
|
|
end
|
|
end
|
|
.compact
|
|
|
|
translated
|
|
end
|
|
|
|
def conversation_context
|
|
raise NotImplemented
|
|
end
|
|
|
|
def max_prompt_tokens
|
|
raise NotImplemented
|
|
end
|
|
|
|
attr_reader :prompt
|
|
|
|
private
|
|
|
|
attr_reader :opts, :llm_model
|
|
|
|
def trim_messages(messages)
|
|
prompt_limit = max_prompt_tokens
|
|
current_token_count = 0
|
|
message_step_size = (prompt_limit / 25).to_i * -1
|
|
|
|
trimmed_messages = []
|
|
|
|
range = (0..-1)
|
|
if messages.dig(0, :type) == :system
|
|
max_system_tokens = prompt_limit * 0.6
|
|
system_message = messages[0]
|
|
system_size = calculate_message_token(system_message)
|
|
|
|
if system_size > max_system_tokens
|
|
system_message[:content] = tokenizer.truncate(
|
|
system_message[:content],
|
|
max_system_tokens,
|
|
)
|
|
end
|
|
|
|
trimmed_messages << system_message
|
|
current_token_count += calculate_message_token(system_message)
|
|
range = (1..-1)
|
|
end
|
|
|
|
reversed_trimmed_msgs = []
|
|
|
|
messages[range].reverse.each do |msg|
|
|
break if current_token_count >= prompt_limit
|
|
|
|
message_tokens = calculate_message_token(msg)
|
|
|
|
dupped_msg = msg.dup
|
|
|
|
# Don't trim tool call metadata.
|
|
if msg[:type] == :tool_call
|
|
break if current_token_count + message_tokens + per_message_overhead > prompt_limit
|
|
|
|
current_token_count += message_tokens + per_message_overhead
|
|
reversed_trimmed_msgs << dupped_msg
|
|
next
|
|
end
|
|
|
|
# Trimming content to make sure we respect token limit.
|
|
while dupped_msg[:content].present? &&
|
|
message_tokens + current_token_count + per_message_overhead > prompt_limit
|
|
dupped_msg[:content] = dupped_msg[:content][0..message_step_size] || ""
|
|
message_tokens = calculate_message_token(dupped_msg)
|
|
end
|
|
|
|
next if dupped_msg[:content].blank?
|
|
|
|
current_token_count += message_tokens + per_message_overhead
|
|
|
|
reversed_trimmed_msgs << dupped_msg
|
|
end
|
|
|
|
reversed_trimmed_msgs.pop if reversed_trimmed_msgs.last&.dig(:type) == :tool
|
|
|
|
trimmed_messages.concat(reversed_trimmed_msgs.reverse)
|
|
end
|
|
|
|
def per_message_overhead
|
|
0
|
|
end
|
|
|
|
def calculate_message_token(msg)
|
|
llm_model.tokenizer_class.size(msg[:content].to_s)
|
|
end
|
|
|
|
def tools_dialect
|
|
@tools_dialect ||= DiscourseAi::Completions::Dialects::XmlTools.new(prompt.tools)
|
|
end
|
|
|
|
def system_msg(msg)
|
|
raise NotImplemented
|
|
end
|
|
|
|
def model_msg(msg)
|
|
raise NotImplemented
|
|
end
|
|
|
|
def user_msg(msg)
|
|
raise NotImplemented
|
|
end
|
|
|
|
def tool_call_msg(msg)
|
|
new_content = tools_dialect.from_raw_tool_call(msg)
|
|
msg = msg.merge(content: new_content)
|
|
model_msg(msg)
|
|
end
|
|
|
|
def tool_msg(msg)
|
|
new_content = tools_dialect.from_raw_tool(msg)
|
|
msg = msg.merge(content: new_content)
|
|
user_msg(msg)
|
|
end
|
|
|
|
def to_encoded_content_array(
|
|
content:,
|
|
image_encoder:,
|
|
text_encoder:,
|
|
other_encoder: nil,
|
|
allow_vision:
|
|
)
|
|
content = [content] if !content.is_a?(Array)
|
|
|
|
current_string = +""
|
|
result = []
|
|
|
|
content.each do |c|
|
|
if c.is_a?(String)
|
|
current_string << c
|
|
elsif c.is_a?(Hash) && c.key?(:upload_id) && allow_vision
|
|
if !current_string.empty?
|
|
result << text_encoder.call(current_string)
|
|
current_string = +""
|
|
end
|
|
encoded = prompt.encode_upload(c[:upload_id])
|
|
result << image_encoder.call(encoded) if encoded
|
|
elsif other_encoder
|
|
encoded = other_encoder.call(c)
|
|
result << encoded if encoded
|
|
end
|
|
end
|
|
|
|
result << text_encoder.call(current_string) if !current_string.empty?
|
|
result
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|