discourse-ai/lib/completions/dialects/dialect.rb

196 lines
5.3 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
module DiscourseAi
module Completions
module Dialects
class Dialect
class << self
def can_translate?(llm_model)
raise NotImplemented
end
def all_dialects
[
DiscourseAi::Completions::Dialects::ChatGpt,
DiscourseAi::Completions::Dialects::Gemini,
DiscourseAi::Completions::Dialects::Claude,
DiscourseAi::Completions::Dialects::Command,
DiscourseAi::Completions::Dialects::Ollama,
DiscourseAi::Completions::Dialects::Mistral,
DiscourseAi::Completions::Dialects::Nova,
DiscourseAi::Completions::Dialects::OpenAiCompatible,
]
end
def dialect_for(llm_model)
dialects = []
if Rails.env.test? || Rails.env.development?
dialects = [DiscourseAi::Completions::Dialects::Fake]
end
dialects = dialects.concat(all_dialects)
dialect = dialects.find { |d| d.can_translate?(llm_model) }
raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL if !dialect
dialect
end
end
def initialize(generic_prompt, llm_model, opts: {})
@prompt = generic_prompt
@opts = opts
@llm_model = llm_model
end
VALID_ID_REGEX = /\A[a-zA-Z0-9_]+\z/
def can_end_with_assistant_msg?
false
end
def native_tool_support?
false
end
def vision_support?
llm_model.vision_enabled?
end
def tools
@tools ||= tools_dialect.translated_tools
end
def tool_choice
prompt.tool_choice
end
def translate
messages = prompt.messages
# Some models use an assistant msg to improve long-context responses.
if messages.last[:type] == :model && can_end_with_assistant_msg?
messages = messages.dup
messages.pop
end
trim_messages(messages).map { |msg| send("#{msg[:type]}_msg", msg) }.compact
end
def conversation_context
raise NotImplemented
end
def max_prompt_tokens
raise NotImplemented
end
attr_reader :prompt
private
attr_reader :opts, :llm_model
def trim_messages(messages)
prompt_limit = max_prompt_tokens
current_token_count = 0
message_step_size = (prompt_limit / 25).to_i * -1
trimmed_messages = []
range = (0..-1)
if messages.dig(0, :type) == :system
max_system_tokens = prompt_limit * 0.6
system_message = messages[0]
system_size = calculate_message_token(system_message)
if system_size > max_system_tokens
system_message[:content] = tokenizer.truncate(
system_message[:content],
max_system_tokens,
)
end
trimmed_messages << system_message
current_token_count += calculate_message_token(system_message)
range = (1..-1)
end
reversed_trimmed_msgs = []
messages[range].reverse.each do |msg|
break if current_token_count >= prompt_limit
message_tokens = calculate_message_token(msg)
dupped_msg = msg.dup
# Don't trim tool call metadata.
if msg[:type] == :tool_call
break if current_token_count + message_tokens + per_message_overhead > prompt_limit
current_token_count += message_tokens + per_message_overhead
reversed_trimmed_msgs << dupped_msg
next
end
# Trimming content to make sure we respect token limit.
while dupped_msg[:content].present? &&
message_tokens + current_token_count + per_message_overhead > prompt_limit
dupped_msg[:content] = dupped_msg[:content][0..message_step_size] || ""
message_tokens = calculate_message_token(dupped_msg)
end
next if dupped_msg[:content].blank?
current_token_count += message_tokens + per_message_overhead
reversed_trimmed_msgs << dupped_msg
end
reversed_trimmed_msgs.pop if reversed_trimmed_msgs.last&.dig(:type) == :tool
trimmed_messages.concat(reversed_trimmed_msgs.reverse)
end
def per_message_overhead
0
end
def calculate_message_token(msg)
llm_model.tokenizer_class.size(msg[:content].to_s)
end
def tools_dialect
@tools_dialect ||= DiscourseAi::Completions::Dialects::XmlTools.new(prompt.tools)
end
def system_msg(msg)
raise NotImplemented
end
FEATURE: AI artifacts (#898) This is a significant PR that introduces AI Artifacts functionality to the discourse-ai plugin along with several other improvements. Here are the key changes: 1. AI Artifacts System: - Adds a new `AiArtifact` model and database migration - Allows creation of web artifacts with HTML, CSS, and JavaScript content - Introduces security settings (`strict`, `lax`, `disabled`) for controlling artifact execution - Implements artifact rendering in iframes with sandbox protection - New `CreateArtifact` tool for AI to generate interactive content 2. Tool System Improvements: - Adds support for partial tool calls, allowing incremental updates during generation - Better handling of tool call states and progress tracking - Improved XML tool processing with CDATA support - Fixes for tool parameter handling and duplicate invocations 3. LLM Provider Updates: - Updates for Anthropic Claude models with correct token limits - Adds support for native/XML tool modes in Gemini integration - Adds new model configurations including Llama 3.1 models - Improvements to streaming response handling 4. UI Enhancements: - New artifact viewer component with expand/collapse functionality - Security controls for artifact execution (click-to-run in strict mode) - Improved dialog and response handling - Better error management for tool execution 5. Security Improvements: - Sandbox controls for artifact execution - Public/private artifact sharing controls - Security settings to control artifact behavior - CSP and frame-options handling for artifacts 6. Technical Improvements: - Better post streaming implementation - Improved error handling in completions - Better memory management for partial tool calls - Enhanced testing coverage 7. Configuration: - New site settings for artifact security - Extended LLM model configurations - Additional tool configuration options This PR significantly enhances the plugin's capabilities for generating and displaying interactive content while maintaining security and providing flexible configuration options for administrators.
2024-11-18 17:22:39 -05:00
def model_msg(msg)
raise NotImplemented
end
def user_msg(msg)
raise NotImplemented
end
def tool_call_msg(msg)
FEATURE: AI artifacts (#898) This is a significant PR that introduces AI Artifacts functionality to the discourse-ai plugin along with several other improvements. Here are the key changes: 1. AI Artifacts System: - Adds a new `AiArtifact` model and database migration - Allows creation of web artifacts with HTML, CSS, and JavaScript content - Introduces security settings (`strict`, `lax`, `disabled`) for controlling artifact execution - Implements artifact rendering in iframes with sandbox protection - New `CreateArtifact` tool for AI to generate interactive content 2. Tool System Improvements: - Adds support for partial tool calls, allowing incremental updates during generation - Better handling of tool call states and progress tracking - Improved XML tool processing with CDATA support - Fixes for tool parameter handling and duplicate invocations 3. LLM Provider Updates: - Updates for Anthropic Claude models with correct token limits - Adds support for native/XML tool modes in Gemini integration - Adds new model configurations including Llama 3.1 models - Improvements to streaming response handling 4. UI Enhancements: - New artifact viewer component with expand/collapse functionality - Security controls for artifact execution (click-to-run in strict mode) - Improved dialog and response handling - Better error management for tool execution 5. Security Improvements: - Sandbox controls for artifact execution - Public/private artifact sharing controls - Security settings to control artifact behavior - CSP and frame-options handling for artifacts 6. Technical Improvements: - Better post streaming implementation - Improved error handling in completions - Better memory management for partial tool calls - Enhanced testing coverage 7. Configuration: - New site settings for artifact security - Extended LLM model configurations - Additional tool configuration options This PR significantly enhances the plugin's capabilities for generating and displaying interactive content while maintaining security and providing flexible configuration options for administrators.
2024-11-18 17:22:39 -05:00
new_content = tools_dialect.from_raw_tool_call(msg)
msg = msg.merge(content: new_content)
model_msg(msg)
end
def tool_msg(msg)
FEATURE: AI artifacts (#898) This is a significant PR that introduces AI Artifacts functionality to the discourse-ai plugin along with several other improvements. Here are the key changes: 1. AI Artifacts System: - Adds a new `AiArtifact` model and database migration - Allows creation of web artifacts with HTML, CSS, and JavaScript content - Introduces security settings (`strict`, `lax`, `disabled`) for controlling artifact execution - Implements artifact rendering in iframes with sandbox protection - New `CreateArtifact` tool for AI to generate interactive content 2. Tool System Improvements: - Adds support for partial tool calls, allowing incremental updates during generation - Better handling of tool call states and progress tracking - Improved XML tool processing with CDATA support - Fixes for tool parameter handling and duplicate invocations 3. LLM Provider Updates: - Updates for Anthropic Claude models with correct token limits - Adds support for native/XML tool modes in Gemini integration - Adds new model configurations including Llama 3.1 models - Improvements to streaming response handling 4. UI Enhancements: - New artifact viewer component with expand/collapse functionality - Security controls for artifact execution (click-to-run in strict mode) - Improved dialog and response handling - Better error management for tool execution 5. Security Improvements: - Sandbox controls for artifact execution - Public/private artifact sharing controls - Security settings to control artifact behavior - CSP and frame-options handling for artifacts 6. Technical Improvements: - Better post streaming implementation - Improved error handling in completions - Better memory management for partial tool calls - Enhanced testing coverage 7. Configuration: - New site settings for artifact security - Extended LLM model configurations - Additional tool configuration options This PR significantly enhances the plugin's capabilities for generating and displaying interactive content while maintaining security and providing flexible configuration options for administrators.
2024-11-18 17:22:39 -05:00
new_content = tools_dialect.from_raw_tool(msg)
msg = msg.merge(content: new_content)
user_msg(msg)
end
end
end
end
end