- #{I18n.t("discourse_ai.ai_bot.command_summary.#{self.class.name}")}
-
- #{localized_description}
-
-
-
- HTML
-
- raw << custom_raw if custom_raw.present?
-
- raw = @post.raw.sub(@placeholder, raw)
-
- @post.revise(bot_user, { raw: raw }, skip_validations: true, skip_revision: true)
-
- if chain_next_response
- # somewhat annoying but whitespace was stripped in revise
- # so we need to save again
- @post.raw = raw
- @post.save!(validate: false)
- end
-
- [chain_next_response, @post]
- end
-
- def format_results(rows, column_names = nil, args: nil)
- rows = rows&.map { |row| yield row } if block_given?
-
- if !column_names
- index = -1
- column_indexes = {}
-
- rows =
- rows&.map do |data|
- new_row = []
- data.each do |key, value|
- found_index = column_indexes[key.to_s] ||= (index += 1)
- new_row[found_index] = value
- end
- new_row
- end
- column_names = column_indexes.keys
- end
-
- # this is not the most efficient format
- # however this is needed cause GPT 3.5 / 4 was steered using JSON
- result = { column_names: column_names, rows: rows }
- result[:args] = args if args
- result
- end
-
- protected
-
- attr_reader :bot_user, :args
- end
- end
- end
-end
diff --git a/lib/ai_bot/commands/dall_e_command.rb b/lib/ai_bot/commands/dall_e_command.rb
deleted file mode 100644
index 8d92398a..00000000
--- a/lib/ai_bot/commands/dall_e_command.rb
+++ /dev/null
@@ -1,122 +0,0 @@
-#frozen_string_literal: true
-
-module DiscourseAi::AiBot::Commands
- class DallECommand < Command
- class << self
- def name
- "dall_e"
- end
-
- def desc
- "Renders images from supplied descriptions"
- end
-
- def parameters
- [
- Parameter.new(
- name: "prompts",
- description:
- "The prompts used to generate or create or draw the image (5000 chars or less, be creative) up to 4 prompts",
- type: "array",
- item_type: "string",
- required: true,
- ),
- ]
- end
- end
-
- def result_name
- "results"
- end
-
- def description_args
- { prompt: @last_prompt }
- end
-
- def chain_next_response
- false
- end
-
- def custom_raw
- @custom_raw
- end
-
- def process(prompts:)
- # max 4 prompts
- prompts = prompts.take(4)
-
- @last_prompt = prompts[0]
-
- show_progress(localized_description)
-
- results = nil
-
- # this ensures multisite safety since background threads
- # generate the images
- api_key = SiteSetting.ai_openai_api_key
- api_url = SiteSetting.ai_openai_dall_e_3_url
-
- threads = []
- prompts.each_with_index do |prompt, index|
- threads << Thread.new(prompt) do |inner_prompt|
- attempts = 0
- begin
- DiscourseAi::Inference::OpenAiImageGenerator.perform!(
- inner_prompt,
- api_key: api_key,
- api_url: api_url,
- )
- rescue => e
- attempts += 1
- sleep 2
- retry if attempts < 3
- Discourse.warn_exception(e, message: "Failed to generate image for prompt #{prompt}")
- nil
- end
- end
- end
-
- while true
- show_progress(".", progress_caret: true)
- break if threads.all? { |t| t.join(2) }
- end
-
- results = threads.filter_map(&:value)
-
- if results.blank?
- return { prompts: prompts, error: "Something went wrong, could not generate image" }
- end
-
- uploads = []
-
- results.each_with_index do |result, index|
- result[:data].each do |image|
- Tempfile.create("v1_txt2img_#{index}.png") do |file|
- file.binmode
- file.write(Base64.decode64(image[:b64_json]))
- file.rewind
- uploads << {
- prompt: image[:revised_prompt],
- upload: UploadCreator.new(file, "image.png").create_for(bot_user.id),
- }
- end
- end
- end
-
- @custom_raw = <<~RAW
-
- [grid]
- #{
- uploads
- .map do |item|
- "![#{item[:prompt].gsub(/\|\'\"/, "")}|512x512, 50%](#{item[:upload].short_url})"
- end
- .join(" ")
- }
- [/grid]
- RAW
-
- { prompts: uploads.map { |item| item[:prompt] } }
- end
- end
-end
diff --git a/lib/ai_bot/commands/db_schema_command.rb b/lib/ai_bot/commands/db_schema_command.rb
deleted file mode 100644
index 96831ae8..00000000
--- a/lib/ai_bot/commands/db_schema_command.rb
+++ /dev/null
@@ -1,54 +0,0 @@
-#frozen_string_literal: true
-
-module DiscourseAi::AiBot::Commands
- class DbSchemaCommand < Command
- class << self
- def name
- "schema"
- end
-
- def desc
- "Will load schema information for specific tables in the database"
- end
-
- def parameters
- [
- Parameter.new(
- name: "tables",
- description:
- "list of tables to load schema information for, comma seperated list eg: (users,posts))",
- type: "string",
- required: true,
- ),
- ]
- end
- end
-
- def result_name
- "results"
- end
-
- def description_args
- { tables: @tables.join(", ") }
- end
-
- def process(tables:)
- @tables = tables.split(",").map(&:strip)
-
- table_info = {}
- DB
- .query(<<~SQL, @tables)
- select table_name, column_name, data_type from information_schema.columns
- where table_schema = 'public'
- and table_name in (?)
- order by table_name
- SQL
- .each { |row| (table_info[row.table_name] ||= []) << "#{row.column_name} #{row.data_type}" }
-
- schema_info =
- table_info.map { |table_name, columns| "#{table_name}(#{columns.join(",")})" }.join("\n")
-
- { schema_info: schema_info, tables: tables }
- end
- end
-end
diff --git a/lib/ai_bot/commands/google_command.rb b/lib/ai_bot/commands/google_command.rb
deleted file mode 100644
index 7c829034..00000000
--- a/lib/ai_bot/commands/google_command.rb
+++ /dev/null
@@ -1,82 +0,0 @@
-#frozen_string_literal: true
-
-module DiscourseAi::AiBot::Commands
- class GoogleCommand < Command
- class << self
- def name
- "google"
- end
-
- def desc
- "Will search using Google - global internet search (supports all Google search operators)"
- end
-
- def parameters
- [
- Parameter.new(
- name: "query",
- description: "The search query",
- type: "string",
- required: true,
- ),
- ]
- end
-
- def custom_system_message
- "You were trained on OLD data, lean on search to get up to date information from the web"
- end
- end
-
- def result_name
- "results"
- end
-
- def description_args
- {
- count: @last_num_results || 0,
- query: @last_query || "",
- url: "https://google.com/search?q=#{CGI.escape(@last_query || "")}",
- }
- end
-
- def process(query:)
- @last_query = query
-
- show_progress(localized_description)
-
- api_key = SiteSetting.ai_google_custom_search_api_key
- cx = SiteSetting.ai_google_custom_search_cx
- query = CGI.escape(query)
- uri =
- URI("https://www.googleapis.com/customsearch/v1?key=#{api_key}&cx=#{cx}&q=#{query}&num=10")
- body = Net::HTTP.get(uri)
-
- parse_search_json(body, query)
- end
-
- def minimize_field(result, field, max_tokens: 100)
- data = result[field]
- return "" if data.blank?
-
- data = ::DiscourseAi::Tokenizer::BertTokenizer.truncate(data, max_tokens).squish
- data
- end
-
- def parse_search_json(json_data, query)
- parsed = JSON.parse(json_data)
- results = parsed["items"]
-
- @last_num_results = parsed.dig("searchInformation", "totalResults").to_i
-
- format_results(results, args: query) do |result|
- {
- title: minimize_field(result, "title"),
- link: minimize_field(result, "link"),
- snippet: minimize_field(result, "snippet", max_tokens: 120),
- displayLink: minimize_field(result, "displayLink"),
- formattedUrl: minimize_field(result, "formattedUrl"),
- }
- end
- end
- end
-end
diff --git a/lib/ai_bot/commands/image_command.rb b/lib/ai_bot/commands/image_command.rb
deleted file mode 100644
index cedf9208..00000000
--- a/lib/ai_bot/commands/image_command.rb
+++ /dev/null
@@ -1,135 +0,0 @@
-#frozen_string_literal: true
-
-module DiscourseAi::AiBot::Commands
- class ImageCommand < Command
- class << self
- def name
- "image"
- end
-
- def desc
- "Renders an image from the description (remove all connector words, keep it to 40 words or less). Despite being a text based bot you can generate images! (when user asks to draw, paint or other synonyms try this)"
- end
-
- def parameters
- [
- Parameter.new(
- name: "prompts",
- description:
- "The prompts used to generate or create or draw the image (40 words or less, be creative) up to 4 prompts",
- type: "array",
- item_type: "string",
- required: true,
- ),
- Parameter.new(
- name: "seeds",
- description:
- "The seed used to generate the image (optional) - can be used to retain image style on amended prompts",
- type: "array",
- item_type: "integer",
- ),
- ]
- end
- end
-
- def result_name
- "results"
- end
-
- def description_args
- { prompt: @last_prompt }
- end
-
- def chain_next_response
- false
- end
-
- def custom_raw
- @custom_raw
- end
-
- def process(prompts:, seeds: nil)
- # max 4 prompts
- prompts = prompts[0..3]
- seeds = seeds[0..3] if seeds
-
- @last_prompt = prompts[0]
-
- show_progress(localized_description)
-
- results = nil
-
- # this ensures multisite safety since background threads
- # generate the images
- api_key = SiteSetting.ai_stability_api_key
- engine = SiteSetting.ai_stability_engine
- api_url = SiteSetting.ai_stability_api_url
-
- threads = []
- prompts.each_with_index do |prompt, index|
- seed = seeds ? seeds[index] : nil
- threads << Thread.new(seed, prompt) do |inner_seed, inner_prompt|
- attempts = 0
- begin
- DiscourseAi::Inference::StabilityGenerator.perform!(
- inner_prompt,
- engine: engine,
- api_key: api_key,
- api_url: api_url,
- image_count: 1,
- seed: inner_seed,
- )
- rescue => e
- attempts += 1
- retry if attempts < 3
- Rails.logger.warn("Failed to generate image for prompt #{prompt}: #{e}")
- nil
- end
- end
- end
-
- while true
- show_progress(".", progress_caret: true)
- break if threads.all? { |t| t.join(2) }
- end
-
- results = threads.map(&:value).compact
-
- if !results.present?
- return { prompts: prompts, error: "Something went wrong, could not generate image" }
- end
-
- uploads = []
-
- results.each_with_index do |result, index|
- result[:artifacts].each do |image|
- Tempfile.create("v1_txt2img_#{index}.png") do |file|
- file.binmode
- file.write(Base64.decode64(image[:base64]))
- file.rewind
- uploads << {
- prompt: prompts[index],
- upload: UploadCreator.new(file, "image.png").create_for(bot_user.id),
- seed: image[:seed],
- }
- end
- end
- end
-
- @custom_raw = <<~RAW
-
- [grid]
- #{
- uploads
- .map do |item|
- "![#{item[:prompt].gsub(/\|\'\"/, "")}|512x512, 50%](#{item[:upload].short_url})"
- end
- .join(" ")
- }
- [/grid]
- RAW
-
- { prompts: uploads.map { |item| item[:prompt] }, seeds: uploads.map { |item| item[:seed] } }
- end
- end
-end
diff --git a/lib/ai_bot/commands/option.rb b/lib/ai_bot/commands/option.rb
deleted file mode 100644
index 4705dcba..00000000
--- a/lib/ai_bot/commands/option.rb
+++ /dev/null
@@ -1,23 +0,0 @@
-# frozen_string_literal: true
-module DiscourseAi
- module AiBot
- module Commands
- class Option
- attr_reader :command, :name, :type
- def initialize(command:, name:, type:)
- @command = command
- @name = name.to_s
- @type = type
- end
-
- def localized_name
- I18n.t("discourse_ai.ai_bot.command_options.#{command.name}.#{name}.name")
- end
-
- def localized_description
- I18n.t("discourse_ai.ai_bot.command_options.#{command.name}.#{name}.description")
- end
- end
- end
- end
-end
diff --git a/lib/ai_bot/commands/parameter.rb b/lib/ai_bot/commands/parameter.rb
deleted file mode 100644
index b4a22a55..00000000
--- a/lib/ai_bot/commands/parameter.rb
+++ /dev/null
@@ -1,18 +0,0 @@
-# frozen_string_literal: true
-module DiscourseAi
- module AiBot
- module Commands
- class Parameter
- attr_reader :item_type, :name, :description, :type, :enum, :required
- def initialize(name:, description:, type:, enum: nil, required: false, item_type: nil)
- @name = name
- @description = description
- @type = type
- @enum = enum
- @required = required
- @item_type = item_type
- end
- end
- end
- end
-end
diff --git a/lib/ai_bot/commands/read_command.rb b/lib/ai_bot/commands/read_command.rb
deleted file mode 100644
index b8025b51..00000000
--- a/lib/ai_bot/commands/read_command.rb
+++ /dev/null
@@ -1,77 +0,0 @@
-#frozen_string_literal: true
-
-module DiscourseAi::AiBot::Commands
- class ReadCommand < Command
- class << self
- def name
- "read"
- end
-
- def desc
- "Will read a topic or a post on this Discourse instance"
- end
-
- def parameters
- [
- Parameter.new(
- name: "topic_id",
- description: "the id of the topic to read",
- type: "integer",
- required: true,
- ),
- Parameter.new(
- name: "post_number",
- description: "the post number to read",
- type: "integer",
- required: false,
- ),
- ]
- end
- end
-
- def description_args
- { title: @title, url: @url }
- end
-
- def process(topic_id:, post_number: nil)
- not_found = { topic_id: topic_id, description: "Topic not found" }
-
- @title = ""
-
- topic_id = topic_id.to_i
-
- topic = Topic.find_by(id: topic_id)
- return not_found if !topic || !Guardian.new.can_see?(topic)
-
- @title = topic.title
-
- posts = Post.secured(Guardian.new).where(topic_id: topic_id).order(:post_number).limit(40)
- @url = topic.relative_url(post_number)
-
- posts = posts.where("post_number = ?", post_number) if post_number
-
- content = +<<~TEXT.strip
- title: #{topic.title}
- TEXT
-
- category_names = [topic.category&.parent_category&.name, topic.category&.name].compact.join(
- " ",
- )
- content << "\ncategories: #{category_names}" if category_names.present?
-
- if topic.tags.length > 0
- tags = DiscourseTagging.filter_visible(topic.tags, Guardian.new)
- content << "\ntags: #{tags.map(&:name).join(", ")}\n\n" if tags.length > 0
- end
-
- posts.each { |post| content << "\n\n#{post.username} said:\n\n#{post.raw}" }
-
- # TODO: 16k or 100k models can handle a lot more tokens
- content = tokenizer.truncate(content, 1500).squish
-
- result = { topic_id: topic_id, content: content, complete: true }
- result[:post_number] = post_number if post_number
- result
- end
- end
-end
diff --git a/lib/ai_bot/commands/search_command.rb b/lib/ai_bot/commands/search_command.rb
deleted file mode 100644
index f2bfd8af..00000000
--- a/lib/ai_bot/commands/search_command.rb
+++ /dev/null
@@ -1,232 +0,0 @@
-#frozen_string_literal: true
-
-module DiscourseAi::AiBot::Commands
- class SearchCommand < Command
- class << self
- def name
- "search"
- end
-
- def desc
- "Will search topics in the current discourse instance, when rendering always prefer to link to the topics you find"
- end
-
- def options
- [option(:base_query, type: :string), option(:max_results, type: :integer)]
- end
-
- def parameters
- [
- Parameter.new(
- name: "search_query",
- description:
- "Specific keywords to search for, space seperated (correct bad spelling, remove connector words)",
- type: "string",
- ),
- Parameter.new(
- name: "user",
- description:
- "Filter search results to this username (only include if user explicitly asks to filter by user)",
- type: "string",
- ),
- Parameter.new(
- name: "order",
- description: "search result order",
- type: "string",
- enum: %w[latest latest_topic oldest views likes],
- ),
- Parameter.new(
- name: "limit",
- description:
- "Number of results to return. Defaults to maximum number of results. Only set if absolutely necessary",
- type: "integer",
- ),
- Parameter.new(
- name: "max_posts",
- description:
- "maximum number of posts on the topics (topics where lots of people posted)",
- type: "integer",
- ),
- Parameter.new(
- name: "tags",
- description:
- "list of tags to search for. Use + to join with OR, use , to join with AND",
- type: "string",
- ),
- Parameter.new(
- name: "category",
- description: "category name to filter to",
- type: "string",
- ),
- Parameter.new(
- name: "before",
- description: "only topics created before a specific date YYYY-MM-DD",
- type: "string",
- ),
- Parameter.new(
- name: "after",
- description: "only topics created after a specific date YYYY-MM-DD",
- type: "string",
- ),
- Parameter.new(
- name: "status",
- description: "search for topics in a particular state",
- type: "string",
- enum: %w[open closed archived noreplies single_user],
- ),
- ]
- end
-
- def custom_system_message
- <<~TEXT
- You were trained on OLD data, lean on search to get up to date information about this forum
- When searching try to SIMPLIFY search terms
- Discourse search joins all terms with AND. Reduce and simplify terms to find more results.
- TEXT
- end
- end
-
- def result_name
- "results"
- end
-
- def description_args
- {
- count: @last_num_results || 0,
- query: @last_query || "",
- url: "#{Discourse.base_path}/search?q=#{CGI.escape(@last_query || "")}",
- }
- end
-
- MIN_SEMANTIC_RESULTS = 5
-
- def max_semantic_results
- max_results / 4
- end
-
- def max_results
- return 20 if !bot
-
- max_results = persona_options[:max_results].to_i
- return [max_results, 100].min if max_results > 0
-
- if bot.prompt_limit(allow_commands: false) > 30_000
- 60
- elsif bot.prompt_limit(allow_commands: false) > 10_000
- 40
- else
- 20
- end
- end
-
- def process(**search_args)
- limit = nil
-
- search_string =
- search_args
- .map do |key, value|
- if key == :search_query
- value
- elsif key == :limit
- limit = value.to_i
- nil
- else
- "#{key}:#{value}"
- end
- end
- .compact
- .join(" ")
-
- @last_query = search_string
-
- show_progress(I18n.t("discourse_ai.ai_bot.searching", query: search_string))
-
- if persona_options[:base_query].present?
- search_string = "#{search_string} #{persona_options[:base_query]}"
- end
-
- results =
- Search.execute(
- search_string.to_s + " status:public",
- search_type: :full_page,
- guardian: Guardian.new(),
- )
-
- # let's be frugal with tokens, 50 results is too much and stuff gets cut off
- limit ||= max_results
- limit = max_results if limit > max_results
-
- should_try_semantic_search = SiteSetting.ai_embeddings_semantic_search_enabled
- should_try_semantic_search &&= (limit == max_results)
- should_try_semantic_search &&= (search_args[:search_query].present?)
-
- limit = limit - max_semantic_results if should_try_semantic_search
-
- posts = results&.posts || []
- posts = posts[0..limit - 1]
-
- if should_try_semantic_search
- semantic_search = DiscourseAi::Embeddings::SemanticSearch.new(Guardian.new())
- topic_ids = Set.new(posts.map(&:topic_id))
-
- search = Search.new(search_string, guardian: Guardian.new)
-
- results = nil
- begin
- results = semantic_search.search_for_topics(search.term)
- rescue => e
- Discourse.warn_exception(e, message: "Semantic search failed")
- end
-
- if results
- results = search.apply_filters(results)
-
- results.each do |post|
- next if topic_ids.include?(post.topic_id)
-
- topic_ids << post.topic_id
- posts << post
-
- break if posts.length >= max_results
- end
- end
- end
-
- @last_num_results = posts.length
- # this is the general pattern from core
- # if there are millions of hidden tags it may fail
- hidden_tags = nil
-
- if posts.blank?
- { args: search_args, rows: [], instruction: "nothing was found, expand your search" }
- else
- format_results(posts, args: search_args) do |post|
- category_names = [
- post.topic.category&.parent_category&.name,
- post.topic.category&.name,
- ].compact.join(" > ")
- row = {
- title: post.topic.title,
- url: Discourse.base_path + post.url,
- username: post.user&.username,
- excerpt: post.excerpt,
- created: post.created_at,
- category: category_names,
- likes: post.like_count,
- topic_views: post.topic.views,
- topic_likes: post.topic.like_count,
- topic_replies: post.topic.posts_count - 1,
- }
-
- if SiteSetting.tagging_enabled
- hidden_tags ||= DiscourseTagging.hidden_tag_names
- # using map over pluck to avoid n+1 (assuming caller preloading)
- tags = post.topic.tags.map(&:name) - hidden_tags
- row[:tags] = tags.join(", ") if tags.present?
- end
- row
- end
- end
- end
- end
-end
diff --git a/lib/ai_bot/commands/search_settings_command.rb b/lib/ai_bot/commands/search_settings_command.rb
deleted file mode 100644
index b1c86337..00000000
--- a/lib/ai_bot/commands/search_settings_command.rb
+++ /dev/null
@@ -1,85 +0,0 @@
-#frozen_string_literal: true
-
-module DiscourseAi::AiBot::Commands
- class SearchSettingsCommand < Command
- class << self
- def name
- "search_settings"
- end
-
- def desc
- "Will search through site settings and return top 20 results"
- end
-
- def parameters
- [
- Parameter.new(
- name: "query",
- description:
- "comma delimited list of settings to search for (e.g. 'setting_1,setting_2')",
- type: "string",
- required: true,
- ),
- ]
- end
- end
-
- def result_name
- "results"
- end
-
- def description_args
- { count: @last_num_results || 0, query: @last_query || "" }
- end
-
- INCLUDE_DESCRIPTIONS_MAX_LENGTH = 10
- MAX_RESULTS = 200
-
- def process(query:)
- @last_query = query
- @last_num_results = 0
-
- terms = query.split(",").map(&:strip).map(&:downcase).reject(&:blank?)
-
- found =
- SiteSetting.all_settings.filter do |setting|
- name = setting[:setting].to_s.downcase
- description = setting[:description].to_s.downcase
- plugin = setting[:plugin].to_s.downcase
-
- search_string = "#{name} #{description} #{plugin}"
-
- terms.any? { |term| search_string.include?(term) }
- end
-
- if found.blank?
- {
- args: {
- query: query,
- },
- rows: [],
- instruction: "no settings matched #{query}, expand your search",
- }
- else
- include_descriptions = false
-
- if found.length > MAX_RESULTS
- found = found[0..MAX_RESULTS]
- elsif found.length < INCLUDE_DESCRIPTIONS_MAX_LENGTH
- include_descriptions = true
- end
-
- @last_num_results = found.length
-
- format_results(found, args: { query: query }) do |setting|
- result = { name: setting[:setting] }
- if include_descriptions
- result[:description] = setting[:description]
- result[:plugin] = setting[:plugin]
- end
- result
- end
- end
- end
- end
-end
diff --git a/lib/ai_bot/commands/setting_context_command.rb b/lib/ai_bot/commands/setting_context_command.rb
deleted file mode 100644
index 65dd1cca..00000000
--- a/lib/ai_bot/commands/setting_context_command.rb
+++ /dev/null
@@ -1,154 +0,0 @@
-#frozen_string_literal: true
-
-module DiscourseAi::AiBot::Commands
- MAX_CONTEXT_TOKENS = 2000
-
- class SettingContextCommand < Command
- def self.rg_installed?
- if defined?(@rg_installed)
- @rg_installed
- else
- @rg_installed =
- begin
- Discourse::Utils.execute_command("which", "rg")
- true
- rescue Discourse::Utils::CommandError
- false
- end
- end
- end
-
- class << self
- def name
- "setting_context"
- end
-
- def desc
- "Will provide you with full context regarding a particular site setting in Discourse"
- end
-
- def parameters
- [
- Parameter.new(
- name: "setting_name",
- description: "The name of the site setting we need context for",
- type: "string",
- required: true,
- ),
- ]
- end
- end
-
- def result_name
- "context"
- end
-
- def description_args
- { setting_name: @setting_name }
- end
-
- CODE_FILE_EXTENSIONS = "rb,js,gjs,hbs"
-
- def process(setting_name:)
- if !self.class.rg_installed?
- return(
- {
- setting_name: setting_name,
- context: "This command requires the rg command line tool to be installed on the server",
- }
- )
- end
-
- @setting_name = setting_name
- if !SiteSetting.has_setting?(setting_name)
- { setting_name: setting_name, context: "This setting does not exist" }
- else
- description = SiteSetting.description(setting_name)
- result = +"# #{setting_name}\n#{description}\n\n"
-
- setting_info =
- find_setting_info(setting_name, [Rails.root.join("config", "site_settings.yml").to_s])
- if !setting_info
- setting_info =
- find_setting_info(setting_name, Dir[Rails.root.join("plugins/**/settings.yml")])
- end
-
- result << setting_info
- result << "\n\n"
-
- %w[lib app plugins].each do |dir|
- path = Rails.root.join(dir).to_s
- result << Discourse::Utils.execute_command(
- "rg",
- setting_name,
- path,
- "-g",
- "!**/spec/**",
- "-g",
- "!**/dist/**",
- "-g",
- "*.{#{CODE_FILE_EXTENSIONS}}",
- "-C",
- "10",
- "--color",
- "never",
- "--heading",
- "--no-ignore",
- chdir: path,
- success_status_codes: [0, 1],
- )
- end
-
- result.gsub!(/^#{Regexp.escape(Rails.root.to_s)}/, "")
-
- result = tokenizer.truncate(result, MAX_CONTEXT_TOKENS)
-
- { setting_name: setting_name, context: result }
- end
- end
-
- def find_setting_info(name, paths)
- path, result = nil
-
- paths.each do |search_path|
- result =
- Discourse::Utils.execute_command(
- "rg",
- name,
- search_path,
- "-g",
- "*.{#{CODE_FILE_EXTENSIONS}}",
- "-A",
- "10",
- "--color",
- "never",
- "--heading",
- success_status_codes: [0, 1],
- )
- if !result.blank?
- path = search_path
- break
- end
- end
-
- if result.blank?
- nil
- else
- rows = result.split("\n")
- leading_spaces = rows[0].match(/^\s*/)[0].length
-
- filtered = []
-
- rows.each do |row|
- if !filtered.blank?
- break if row.match(/^\s*/)[0].length <= leading_spaces
- end
- filtered << row
- end
-
- filtered.unshift("#{path}")
- filtered.join("\n")
- end
- end
- end
-end
diff --git a/lib/ai_bot/commands/summarize_command.rb b/lib/ai_bot/commands/summarize_command.rb
deleted file mode 100644
index b8fcd13b..00000000
--- a/lib/ai_bot/commands/summarize_command.rb
+++ /dev/null
@@ -1,184 +0,0 @@
-#frozen_string_literal: true
-
-module DiscourseAi::AiBot::Commands
- class SummarizeCommand < Command
- class << self
- def name
- "summarize"
- end
-
- def desc
- "Will summarize a topic attempting to answer question in guidance"
- end
-
- def parameters
- [
- Parameter.new(
- name: "topic_id",
- description: "The discourse topic id to summarize",
- type: "integer",
- required: true,
- ),
- Parameter.new(
- name: "guidance",
- description: "Special guidance on how to summarize the topic",
- type: "string",
- ),
- ]
- end
- end
-
- def result_name
- "summary"
- end
-
- def standalone?
- true
- end
-
- def low_cost?
- true
- end
-
- def description_args
- { url: "#{Discourse.base_path}/t/-/#{@last_topic_id}", title: @last_topic_title || "" }
- end
-
- def process(topic_id:, guidance: nil)
- @last_topic_id = topic_id
-
- topic_id = topic_id.to_i
- topic = nil
- if topic_id > 0
- topic = Topic.find_by(id: topic_id)
- topic = nil if !topic || !Guardian.new.can_see?(topic)
- end
-
- @last_summary = nil
-
- if topic
- @last_topic_title = topic.title
-
- posts =
- Post
- .where(topic_id: topic.id)
- .where("post_type in (?)", [Post.types[:regular], Post.types[:small_action]])
- .where("not hidden")
- .order(:post_number)
-
- columns = ["posts.id", :post_number, :raw, :username]
-
- current_post_numbers = posts.limit(5).pluck(:post_number)
- current_post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
- current_post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
-
- data =
- Post
- .where(topic_id: topic.id)
- .joins(:user)
- .where("post_number in (?)", current_post_numbers)
- .order(:post_number)
- .pluck(*columns)
-
- @last_summary = summarize(data, guidance, topic)
- end
-
- if !@last_summary
- "Say: No topic found!"
- else
- "Topic summarized"
- end
- end
-
- def custom_raw
- @last_summary || I18n.t("discourse_ai.ai_bot.topic_not_found")
- end
-
- def chain_next_response
- false
- end
-
- def summarize(data, guidance, topic)
- text = +""
- data.each do |id, post_number, raw, username|
- text << "(#{post_number} #{username} said: #{raw}"
- end
-
- summaries = []
- current_section = +""
- split = []
-
- text
- .split(/\s+/)
- .each_slice(20) do |slice|
- current_section << " "
- current_section << slice.join(" ")
-
- # somehow any more will get closer to limits
- if bot.tokenize(current_section).length > 2500
- split << current_section
- current_section = +""
- end
- end
-
- split << current_section if current_section.present?
-
- split = split[0..3] + split[-3..-1] if split.length > 5
-
- split.each do |section|
- # TODO progress meter
- summary =
- generate_gpt_summary(
- section,
- topic: topic,
- context: "Guidance: #{guidance}\nYou are summarizing the topic: #{topic.title}",
- )
- summaries << summary
- end
-
- if summaries.length > 1
- messages = []
- messages << { role: "system", content: "You are a helpful bot" }
- messages << {
- role: "user",
- content:
- "concatenated the disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}}",
- }
- bot.submit_prompt(messages, temperature: 0.6, max_tokens: 500, prefer_low_cost: true).dig(
- :choices,
- 0,
- :message,
- :content,
- )
- else
- summaries.first
- end
- end
-
- def generate_gpt_summary(text, topic:, context: nil, length: nil)
- length ||= 400
-
- prompt = <<~TEXT
- #{context}
- Summarize the following in #{length} words:
-
- #{text}
- TEXT
-
- system_prompt = <<~TEXT
- You are a summarization bot.
- You effectively summarise any text.
- You condense it into a shorter version.
- You understand and generate Discourse forum markdown.
- Try generating links as well the format is #{topic.url}/POST_NUMBER. eg: [ref](#{topic.url}/77)
- TEXT
-
- messages = [{ role: "system", content: system_prompt }]
- messages << { role: "user", content: prompt }
-
- result =
- bot.submit_prompt(messages, temperature: 0.6, max_tokens: length, prefer_low_cost: true)
- result.dig(:choices, 0, :message, :content)
- end
- end
-end
diff --git a/lib/ai_bot/commands/tags_command.rb b/lib/ai_bot/commands/tags_command.rb
deleted file mode 100644
index fa8381a4..00000000
--- a/lib/ai_bot/commands/tags_command.rb
+++ /dev/null
@@ -1,42 +0,0 @@
-#frozen_string_literal: true
-
-module DiscourseAi::AiBot::Commands
- class TagsCommand < Command
- class << self
- def name
- "tags"
- end
-
- def desc
- "Will list the 100 most popular tags on the current discourse instance"
- end
-
- def parameters
- []
- end
- end
-
- def result_name
- "results"
- end
-
- def description_args
- { count: @last_count || 0 }
- end
-
- def process
- column_names = { name: "Name", public_topic_count: "Topic Count" }
-
- tags =
- Tag
- .where("public_topic_count > 0")
- .order(public_topic_count: :desc)
- .limit(100)
- .pluck(*column_names.keys)
-
- @last_count = tags.length
-
- format_results(tags, column_names.values)
- end
- end
-end
diff --git a/lib/ai_bot/commands/time_command.rb b/lib/ai_bot/commands/time_command.rb
deleted file mode 100644
index 39e2f46e..00000000
--- a/lib/ai_bot/commands/time_command.rb
+++ /dev/null
@@ -1,49 +0,0 @@
-#frozen_string_literal: true
-
-module DiscourseAi::AiBot::Commands
- class TimeCommand < Command
- class << self
- def name
- "time"
- end
-
- def desc
- "Will generate the time in a timezone"
- end
-
- def parameters
- [
- Parameter.new(
- name: "timezone",
- description: "ALWAYS supply a Ruby compatible timezone",
- type: "string",
- required: true,
- ),
- ]
- end
- end
-
- def result_name
- "time"
- end
-
- def description_args
- { timezone: @last_timezone, time: @last_time }
- end
-
- def process(timezone:)
- time =
- begin
- Time.now.in_time_zone(timezone)
- rescue StandardError
- nil
- end
- time = Time.now if !time
-
- @last_timezone = timezone
- @last_time = time.to_s
-
- { args: { timezone: timezone }, time: time.to_s }
- end
- end
-end
diff --git a/lib/ai_bot/entry_point.rb b/lib/ai_bot/entry_point.rb
index c231927c..0567490e 100644
--- a/lib/ai_bot/entry_point.rb
+++ b/lib/ai_bot/entry_point.rb
@@ -50,7 +50,7 @@ module DiscourseAi
scope.user.in_any_groups?(SiteSetting.ai_bot_allowed_groups_map)
end,
) do
- DiscourseAi::AiBot::Personas
+ DiscourseAi::AiBot::Personas::Persona
.all(user: scope.user)
.map do |persona|
{ id: persona.id, name: persona.name, description: persona.description }
@@ -92,32 +92,19 @@ module DiscourseAi
include_condition: -> { SiteSetting.ai_bot_enabled && object.topic.private_message? },
) do
id = topic.custom_fields["ai_persona_id"]
- name = DiscourseAi::AiBot::Personas.find_by(user: scope.user, id: id.to_i)&.name if id
+ name =
+ DiscourseAi::AiBot::Personas::Persona.find_by(user: scope.user, id: id.to_i)&.name if id
name || topic.custom_fields["ai_persona"]
end
plugin.on(:post_created) do |post|
bot_ids = BOTS.map(&:first)
- if post.post_type == Post.types[:regular] && post.topic.private_message? &&
- !bot_ids.include?(post.user_id)
- if (SiteSetting.ai_bot_allowed_groups_map & post.user.group_ids).present?
- bot_id = post.topic.topic_allowed_users.where(user_id: bot_ids).first&.user_id
-
- if bot_id
- if post.post_number == 1
- post.topic.custom_fields[REQUIRE_TITLE_UPDATE] = true
- post.topic.save_custom_fields
- end
- ::Jobs.enqueue(:create_ai_reply, post_id: post.id, bot_user_id: bot_id)
- ::Jobs.enqueue_in(
- 5.minutes,
- :update_ai_bot_pm_title,
- post_id: post.id,
- bot_user_id: bot_id,
- )
- end
- end
+ # Don't schedule a reply for a bot reply.
+ if !bot_ids.include?(post.user_id)
+ bot_user = post.topic.topic_allowed_users.where(user_id: bot_ids).first&.user
+ bot = DiscourseAi::AiBot::Bot.as(bot_user)
+ DiscourseAi::AiBot::Playground.new(bot).update_playground_with(post)
end
end
diff --git a/lib/ai_bot/open_ai_bot.rb b/lib/ai_bot/open_ai_bot.rb
deleted file mode 100644
index 39f4b0b2..00000000
--- a/lib/ai_bot/open_ai_bot.rb
+++ /dev/null
@@ -1,157 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
- module AiBot
- class OpenAiBot < Bot
- def self.can_reply_as?(bot_user)
- open_ai_bot_ids = [
- DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID,
- DiscourseAi::AiBot::EntryPoint::GPT4_ID,
- DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID,
- ]
-
- open_ai_bot_ids.include?(bot_user.id)
- end
-
- def prompt_limit(allow_commands:)
- # provide a buffer of 120 tokens - our function counting is not
- # 100% accurate and getting numbers to align exactly is very hard
- buffer = reply_params[:max_tokens] + 50
-
- if allow_commands
- # note this is about 100 tokens over, OpenAI have a more optimal representation
- @function_size ||= tokenize(available_functions.to_json.to_s).length
- buffer += @function_size
- end
-
- if bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_TURBO_ID
- 150_000 - buffer
- elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
- 8192 - buffer
- else
- 16_384 - buffer
- end
- end
-
- def reply_params
- # technically we could allow GPT-3.5 16k more tokens
- # but lets just keep it here for now
- { temperature: 0.4, top_p: 0.9, max_tokens: 2500 }
- end
-
- def extra_tokens_per_message
- # open ai defines about 4 tokens per message of overhead
- 4
- end
-
- def submit_prompt(
- prompt,
- prefer_low_cost: false,
- post: nil,
- temperature: nil,
- top_p: nil,
- max_tokens: nil,
- &blk
- )
- params =
- reply_params.merge(
- temperature: temperature,
- top_p: top_p,
- max_tokens: max_tokens,
- ) { |key, old_value, new_value| new_value.nil? ? old_value : new_value }
-
- model = model_for(low_cost: prefer_low_cost)
-
- params[:functions] = available_functions if available_functions.present?
-
- DiscourseAi::Inference::OpenAiCompletions.perform!(
- prompt,
- model,
- **params,
- post: post,
- &blk
- )
- end
-
- def tokenizer
- DiscourseAi::Tokenizer::OpenAiTokenizer
- end
-
- def model_for(low_cost: false)
- if low_cost || bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID
- "gpt-3.5-turbo-16k"
- elsif bot_user.id == DiscourseAi::AiBot::EntryPoint::GPT4_ID
- "gpt-4"
- else
- # not quite released yet, once released we should replace with
- # gpt-4-turbo
- "gpt-4-1106-preview"
- end
- end
-
- def clean_username(username)
- if username.match?(/\0[a-zA-Z0-9_-]{1,64}\z/)
- username
- else
- # not the best in the world, but this is what we have to work with
- # if sites enable unicode usernames this can get messy
- username.gsub(/[^a-zA-Z0-9_-]/, "_")[0..63]
- end
- end
-
- def include_function_instructions_in_system_prompt?
- # open ai uses a bespoke system for function calls
- false
- end
-
- private
-
- def populate_functions(partial:, reply:, functions:, done:, current_delta:)
- return if !partial
- fn = partial.dig(:choices, 0, :delta, :function_call)
- if fn
- functions.add_function(fn[:name]) if fn[:name].present?
- functions.add_argument_fragment(fn[:arguments]) if !fn[:arguments].nil?
- functions.custom = true
- end
- end
-
- def build_message(poster_username, content, function: false, system: false)
- is_bot = poster_username == bot_user.username
-
- if function
- role = "function"
- elsif system
- role = "system"
- else
- role = is_bot ? "assistant" : "user"
- end
-
- result = { role: role, content: content }
-
- if function
- result[:name] = poster_username
- elsif !system && poster_username != bot_user.username && poster_username.present?
- # Open AI restrict name to 64 chars and only A-Za-z._ (work around)
- result[:name] = clean_username(poster_username)
- end
-
- result
- end
-
- def get_delta(partial, _context)
- partial.dig(:choices, 0, :delta, :content).to_s
- end
-
- def get_updated_title(prompt)
- DiscourseAi::Inference::OpenAiCompletions.perform!(
- prompt,
- model_for,
- temperature: 0.7,
- top_p: 0.9,
- max_tokens: 40,
- ).dig(:choices, 0, :message, :content)
- end
- end
- end
-end
diff --git a/lib/ai_bot/personas.rb b/lib/ai_bot/personas.rb
deleted file mode 100644
index ec105986..00000000
--- a/lib/ai_bot/personas.rb
+++ /dev/null
@@ -1,46 +0,0 @@
-# frozen_string_literal: true
-
-module DiscourseAi
- module AiBot
- module Personas
- def self.system_personas
- @system_personas ||= {
- Personas::General => -1,
- Personas::SqlHelper => -2,
- Personas::Artist => -3,
- Personas::SettingsExplorer => -4,
- Personas::Researcher => -5,
- Personas::Creative => -6,
- Personas::DallE3 => -7,
- }
- end
-
- def self.system_personas_by_id
- @system_personas_by_id ||= system_personas.invert
- end
-
- def self.all(user:)
- # this needs to be dynamic cause site settings may change
- all_available_commands = Persona.all_available_commands
-
- AiPersona.all_personas.filter do |persona|
- next false if !user.in_any_groups?(persona.allowed_group_ids)
-
- if persona.system
- instance = persona.new
- (
- instance.required_commands == [] ||
- (instance.required_commands - all_available_commands).empty?
- )
- else
- true
- end
- end
- end
-
- def self.find_by(id: nil, name: nil, user:)
- all(user: user).find { |persona| persona.id == id || persona.name == name }
- end
- end
- end
-end
diff --git a/lib/ai_bot/personas/artist.rb b/lib/ai_bot/personas/artist.rb
index a6b0e034..d16d74e4 100644
--- a/lib/ai_bot/personas/artist.rb
+++ b/lib/ai_bot/personas/artist.rb
@@ -4,12 +4,12 @@ module DiscourseAi
module AiBot
module Personas
class Artist < Persona
- def commands
- [Commands::ImageCommand]
+ def tools
+ [Tools::Image]
end
- def required_commands
- [Commands::ImageCommand]
+ def required_tools
+ [Tools::Image]
end
def system_prompt
diff --git a/lib/ai_bot/personas/creative.rb b/lib/ai_bot/personas/creative.rb
index 8a0e9ea6..338577b5 100644
--- a/lib/ai_bot/personas/creative.rb
+++ b/lib/ai_bot/personas/creative.rb
@@ -4,7 +4,7 @@ module DiscourseAi
module AiBot
module Personas
class Creative < Persona
- def commands
+ def tools
[]
end
diff --git a/lib/ai_bot/personas/dall_e_3.rb b/lib/ai_bot/personas/dall_e_3.rb
index 75fb035f..65666284 100644
--- a/lib/ai_bot/personas/dall_e_3.rb
+++ b/lib/ai_bot/personas/dall_e_3.rb
@@ -4,12 +4,12 @@ module DiscourseAi
module AiBot
module Personas
class DallE3 < Persona
- def commands
- [Commands::DallECommand]
+ def tools
+ [Tools::DallE]
end
- def required_commands
- [Commands::DallECommand]
+ def required_tools
+ [Tools::DallE]
end
def system_prompt
diff --git a/lib/ai_bot/personas/general.rb b/lib/ai_bot/personas/general.rb
index 0d31e6aa..dfb79ad6 100644
--- a/lib/ai_bot/personas/general.rb
+++ b/lib/ai_bot/personas/general.rb
@@ -4,15 +4,15 @@ module DiscourseAi
module AiBot
module Personas
class General < Persona
- def commands
+ def tools
[
- Commands::SearchCommand,
- Commands::GoogleCommand,
- Commands::ImageCommand,
- Commands::ReadCommand,
- Commands::ImageCommand,
- Commands::CategoriesCommand,
- Commands::TagsCommand,
+ Tools::Search,
+ Tools::Google,
+ Tools::Image,
+ Tools::Read,
+ Tools::Image,
+ Tools::ListCategories,
+ Tools::ListTags,
]
end
diff --git a/lib/ai_bot/personas/persona.rb b/lib/ai_bot/personas/persona.rb
index 3caf2a24..481d0ddc 100644
--- a/lib/ai_bot/personas/persona.rb
+++ b/lib/ai_bot/personas/persona.rb
@@ -4,19 +4,84 @@ module DiscourseAi
module AiBot
module Personas
class Persona
- def self.name
- I18n.t("discourse_ai.ai_bot.personas.#{to_s.demodulize.underscore}.name")
+ class << self
+ def system_personas
+ @system_personas ||= {
+ Personas::General => -1,
+ Personas::SqlHelper => -2,
+ Personas::Artist => -3,
+ Personas::SettingsExplorer => -4,
+ Personas::Researcher => -5,
+ Personas::Creative => -6,
+ Personas::DallE3 => -7,
+ }
+ end
+
+ def system_personas_by_id
+ @system_personas_by_id ||= system_personas.invert
+ end
+
+ def all(user:)
+ # listing tools has to be dynamic cause site settings may change
+
+ AiPersona.all_personas.filter do |persona|
+ next false if !user.in_any_groups?(persona.allowed_group_ids)
+
+ if persona.system
+ instance = persona.new
+ (
+ instance.required_tools == [] ||
+ (instance.required_tools - all_available_tools).empty?
+ )
+ else
+ true
+ end
+ end
+ end
+
+ def find_by(id: nil, name: nil, user:)
+ all(user: user).find { |persona| persona.id == id || persona.name == name }
+ end
+
+ def name
+ I18n.t("discourse_ai.ai_bot.personas.#{to_s.demodulize.underscore}.name")
+ end
+
+ def description
+ I18n.t("discourse_ai.ai_bot.personas.#{to_s.demodulize.underscore}.description")
+ end
+
+ def all_available_tools
+ tools = [
+ Tools::ListCategories,
+ Tools::Time,
+ Tools::Search,
+ Tools::Summarize,
+ Tools::Read,
+ Tools::DbSchema,
+ Tools::SearchSettings,
+ Tools::Summarize,
+ Tools::SettingContext,
+ ]
+
+ tools << Tools::ListTags if SiteSetting.tagging_enabled
+ tools << Tools::Image if SiteSetting.ai_stability_api_key.present?
+
+ tools << Tools::DallE if SiteSetting.ai_openai_api_key.present?
+ if SiteSetting.ai_google_custom_search_api_key.present? &&
+ SiteSetting.ai_google_custom_search_cx.present?
+ tools << Tools::Google
+ end
+
+ tools
+ end
end
- def self.description
- I18n.t("discourse_ai.ai_bot.personas.#{to_s.demodulize.underscore}.description")
- end
-
- def commands
+ def tools
[]
end
- def required_commands
+ def required_tools
[]
end
@@ -24,104 +89,55 @@ module DiscourseAi
{}
end
- def render_commands(render_function_instructions:)
- return +"" if available_commands.empty?
-
- result = +""
- if render_function_instructions
- result << "\n"
- result << function_list.system_prompt
- result << "\n"
- end
- result << available_commands.map(&:custom_system_message).compact.join("\n")
- result
+ def available_tools
+ self.class.all_available_tools.filter { |tool| tools.include?(tool) }
end
- def render_system_prompt(
- topic: nil,
- render_function_instructions: true,
- allow_commands: true
- )
- substitutions = {
- site_url: Discourse.base_url,
- site_title: SiteSetting.title,
- site_description: SiteSetting.site_description,
- time: Time.zone.now,
- }
-
- substitutions[:participants] = topic.allowed_users.map(&:username).join(", ") if topic
-
- prompt =
+ def craft_prompt(context)
+ system_insts =
system_prompt.gsub(/\{(\w+)\}/) do |match|
- found = substitutions[match[1..-2].to_sym]
+ found = context[match[1..-2].to_sym]
found.nil? ? match : found.to_s
end
- if allow_commands
- prompt += render_commands(render_function_instructions: render_function_instructions)
+ insts = <<~TEXT
+ #{system_insts}
+ #{available_tools.map(&:custom_system_message).compact_blank.join("\n")}
+ TEXT
+
+ { insts: insts }.tap do |prompt|
+ prompt[:tools] = available_tools.map(&:signature) if available_tools
+ prompt[:conversation_context] = context[:conversation_context] if context[
+ :conversation_context
+ ]
end
-
- prompt
end
- def available_commands
- return @available_commands if @available_commands
- @available_commands = all_available_commands.filter { |cmd| commands.include?(cmd) }
- end
+ def find_tool(partial)
+ parsed_function = Nokogiri::HTML5.fragment(partial)
+ function_id = parsed_function.at("tool_id")&.text
+ function_name = parsed_function.at("tool_name")&.text
+ return false if function_name.nil?
- def available_functions
- # note if defined? can be a problem in test
- # this can never be nil so it is safe
- return @available_functions if @available_functions
+ tool_klass = available_tools.find { |c| c.signature.dig(:name) == function_name }
+ return false if tool_klass.nil?
- functions = []
+ arguments =
+ tool_klass.signature[:parameters]
+ .to_a
+ .reduce({}) do |memo, p|
+ argument = parsed_function.at(p[:name])&.text
+ next(memo) unless argument
- functions =
- available_commands.map do |command|
- function =
- DiscourseAi::Inference::Function.new(name: command.name, description: command.desc)
- command.parameters.each { |parameter| function.add_parameter(parameter) }
- function
- end
+ memo[p[:name].to_sym] = argument
+ memo
+ end
- @available_functions = functions
- end
-
- def function_list
- return @function_list if @function_list
-
- @function_list = DiscourseAi::Inference::FunctionList.new
- available_functions.each { |function| @function_list << function }
- @function_list
- end
-
- def self.all_available_commands
- all_commands = [
- Commands::CategoriesCommand,
- Commands::TimeCommand,
- Commands::SearchCommand,
- Commands::SummarizeCommand,
- Commands::ReadCommand,
- Commands::DbSchemaCommand,
- Commands::SearchSettingsCommand,
- Commands::SummarizeCommand,
- Commands::SettingContextCommand,
- ]
-
- all_commands << Commands::TagsCommand if SiteSetting.tagging_enabled
- all_commands << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present?
-
- all_commands << Commands::DallECommand if SiteSetting.ai_openai_api_key.present?
- if SiteSetting.ai_google_custom_search_api_key.present? &&
- SiteSetting.ai_google_custom_search_cx.present?
- all_commands << Commands::GoogleCommand
- end
-
- all_commands
- end
-
- def all_available_commands
- @cmds ||= self.class.all_available_commands
+ tool_klass.new(
+ arguments,
+ tool_call_id: function_id,
+ persona_options: options[tool_klass].to_h,
+ )
end
end
end
diff --git a/lib/ai_bot/personas/researcher.rb b/lib/ai_bot/personas/researcher.rb
index c565bce5..5c7f8998 100644
--- a/lib/ai_bot/personas/researcher.rb
+++ b/lib/ai_bot/personas/researcher.rb
@@ -4,12 +4,12 @@ module DiscourseAi
module AiBot
module Personas
class Researcher < Persona
- def commands
- [Commands::GoogleCommand]
+ def tools
+ [Tools::Google]
end
- def required_commands
- [Commands::GoogleCommand]
+ def required_tools
+ [Tools::Google]
end
def system_prompt
diff --git a/lib/ai_bot/personas/settings_explorer.rb b/lib/ai_bot/personas/settings_explorer.rb
index 4ba07aba..018df156 100644
--- a/lib/ai_bot/personas/settings_explorer.rb
+++ b/lib/ai_bot/personas/settings_explorer.rb
@@ -4,15 +4,8 @@ module DiscourseAi
module AiBot
module Personas
class SettingsExplorer < Persona
- def commands
- all_available_commands
- end
-
- def all_available_commands
- [
- DiscourseAi::AiBot::Commands::SettingContextCommand,
- DiscourseAi::AiBot::Commands::SearchSettingsCommand,
- ]
+ def tools
+ [Tools::SettingContext, Tools::SearchSettings]
end
def system_prompt
diff --git a/lib/ai_bot/personas/sql_helper.rb b/lib/ai_bot/personas/sql_helper.rb
index 650e099e..b048d7e3 100644
--- a/lib/ai_bot/personas/sql_helper.rb
+++ b/lib/ai_bot/personas/sql_helper.rb
@@ -27,12 +27,8 @@ module DiscourseAi
@schema = schema
end
- def commands
- all_available_commands
- end
-
- def all_available_commands
- [DiscourseAi::AiBot::Commands::DbSchemaCommand]
+ def tools
+ [Tools::DbSchema]
end
def system_prompt
diff --git a/lib/ai_bot/playground.rb b/lib/ai_bot/playground.rb
new file mode 100644
index 00000000..7869cfda
--- /dev/null
+++ b/lib/ai_bot/playground.rb
@@ -0,0 +1,228 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ class Playground
+ # An abstraction to manage the bot and topic interactions.
+ # The bot will take care of completions while this class updates the topic title
+ # and stream replies.
+
+ REQUIRE_TITLE_UPDATE = "discourse-ai-title-update"
+
+ def initialize(bot)
+ @bot = bot
+ end
+
+ def update_playground_with(post)
+ if can_attach?(post) && bot.bot_user
+ schedule_playground_titling(post, bot.bot_user)
+ schedule_bot_reply(post, bot.bot_user)
+ end
+ end
+
+ def conversation_context(post)
+ # Pay attention to the `post_number <= ?` here.
+ # We want to inject the last post as context because they are translated differently.
+ context =
+ post
+ .topic
+ .posts
+ .includes(:user)
+ .joins("LEFT JOIN post_custom_prompts ON post_custom_prompts.post_id = posts.id")
+ .where("post_number <= ?", post.post_number)
+ .order("post_number desc")
+ .where("post_type = ?", Post.types[:regular])
+ .limit(50)
+ .pluck(:raw, :username, "post_custom_prompts.custom_prompt")
+
+ result = []
+
+ first = true
+ context.each do |raw, username, custom_prompt|
+ custom_prompt_translation =
+ Proc.new do |message|
+ # We can't keep backwards-compatibility for stored functions.
+ # Tool syntax requires a tool_call_id which we don't have.
+ if message[2] != "function"
+ custom_context = {
+ content: message[0],
+ type: message[2].present? ? message[2] : "assistant",
+ }
+
+ custom_context[:name] = message[1] if custom_context[:type] != "assistant"
+
+ result << custom_context
+ end
+ end
+
+ if custom_prompt.present?
+ if first
+ custom_prompt.reverse_each(&custom_prompt_translation)
+ first = false
+ else
+ tool_call_and_tool = custom_prompt.first(2)
+ tool_call_and_tool.reverse_each(&custom_prompt_translation)
+ end
+ else
+ context = {
+ content: raw,
+ type: (available_bot_usernames.include?(username) ? "assistant" : "user"),
+ }
+
+ context[:name] = username if context[:type] == "user"
+
+ result << context
+ end
+ end
+
+ result
+ end
+
+ def title_playground(post)
+ context = conversation_context(post)
+
+ bot
+ .get_updated_title(context, post.user)
+ .tap do |new_title|
+ PostRevisor.new(post.topic.first_post, post.topic).revise!(
+ bot.bot_user,
+ title: new_title.sub(/\A"/, "").sub(/"\Z/, ""),
+ )
+ post.topic.custom_fields.delete(DiscourseAi::AiBot::EntryPoint::REQUIRE_TITLE_UPDATE)
+ post.topic.save_custom_fields
+ end
+ end
+
+ def reply_to(post)
+ reply = +""
+ start = Time.now
+
+ context = {
+ site_url: Discourse.base_url,
+ site_title: SiteSetting.title,
+ site_description: SiteSetting.site_description,
+ time: Time.zone.now,
+ participants: post.topic.allowed_users.map(&:username).join(", "),
+ conversation_context: conversation_context(post),
+ user: post.user,
+ }
+
+ reply_post =
+ PostCreator.create!(
+ bot.bot_user,
+ topic_id: post.topic_id,
+ raw: I18n.t("discourse_ai.ai_bot.placeholder_reply"),
+ skip_validations: true,
+ )
+
+ redis_stream_key = "gpt_cancel:#{reply_post.id}"
+ Discourse.redis.setex(redis_stream_key, 60, 1)
+
+ new_custom_prompts =
+ bot.reply(context) do |partial, cancel, placeholder|
+ reply << partial
+ raw = reply.dup
+ raw << "\n\n" << placeholder if placeholder.present?
+
+ if !Discourse.redis.get(redis_stream_key)
+ cancel&.call
+
+ reply_post.update!(raw: reply, cooked: PrettyText.cook(reply))
+ end
+
+ # Minor hack to skip the delay during tests.
+ if placeholder.blank?
+ next if (Time.now - start < 0.5) && !Rails.env.test?
+ start = Time.now
+ end
+
+ Discourse.redis.expire(redis_stream_key, 60)
+
+ publish_update(reply_post, raw: raw)
+ end
+
+ return if reply.blank?
+
+ reply_post.tap do |bot_reply|
+ publish_update(bot_reply, done: true)
+
+ bot_reply.revise(
+ bot.bot_user,
+ { raw: reply },
+ skip_validations: true,
+ skip_revision: true,
+ )
+
+ bot_reply.post_custom_prompt ||= bot_reply.build_post_custom_prompt(custom_prompt: [])
+ prompt = bot_reply.post_custom_prompt.custom_prompt || []
+ prompt.concat(new_custom_prompts)
+ prompt << [reply, bot.bot_user.username]
+ bot_reply.post_custom_prompt.update!(custom_prompt: prompt)
+ end
+ end
+
+ private
+
+ attr_reader :bot
+
+ def can_attach?(post)
+ return false if bot.bot_user.nil?
+ return false if post.post_type != Post.types[:regular]
+ return false unless post.topic.private_message?
+ return false if (SiteSetting.ai_bot_allowed_groups_map & post.user.group_ids).blank?
+
+ true
+ end
+
+ def schedule_playground_titling(post, bot_user)
+ if post.post_number == 1
+ post.topic.custom_fields[REQUIRE_TITLE_UPDATE] = true
+ post.topic.save_custom_fields
+ end
+
+ ::Jobs.enqueue_in(
+ 5.minutes,
+ :update_ai_bot_pm_title,
+ post_id: post.id,
+ bot_user_id: bot_user.id,
+ )
+ end
+
+ def schedule_bot_reply(post, bot_user)
+ ::Jobs.enqueue(:create_ai_reply, post_id: post.id, bot_user_id: bot_user.id)
+ end
+
+ def context(topic)
+ {
+ site_url: Discourse.base_url,
+ site_title: SiteSetting.title,
+ site_description: SiteSetting.site_description,
+ time: Time.zone.now,
+ participants: topic.allowed_users.map(&:username).join(", "),
+ }
+ end
+
+ def publish_update(bot_reply_post, payload)
+ MessageBus.publish(
+ "discourse-ai/ai-bot/topic/#{bot_reply_post.topic_id}",
+ payload.merge(post_id: bot_reply_post.id, post_number: bot_reply_post.post_number),
+ user_ids: bot_reply_post.topic.allowed_user_ids,
+ )
+ end
+
+ def available_bot_usernames
+ @bot_usernames ||= DiscourseAi::AiBot::EntryPoint::BOTS.map(&:second)
+ end
+
+ def clean_username(username)
+ if username.match?(/\0[a-zA-Z0-9_-]{1,64}\z/)
+ username
+ else
+ # not the best in the world, but this is what we have to work with
+ # if sites enable unicode usernames this can get messy
+ username.gsub(/[^a-zA-Z0-9_-]/, "_")[0..63]
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/dall_e.rb b/lib/ai_bot/tools/dall_e.rb
new file mode 100644
index 00000000..1e9532c3
--- /dev/null
+++ b/lib/ai_bot/tools/dall_e.rb
@@ -0,0 +1,125 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class DallE < Tool
+ def self.signature
+ {
+ name: name,
+ description: "Renders images from supplied descriptions",
+ parameters: [
+ {
+ name: "prompts",
+ description:
+ "The prompts used to generate or create or draw the image (5000 chars or less, be creative) up to 4 prompts",
+ type: "array",
+ item_type: "string",
+ required: true,
+ },
+ ],
+ }
+ end
+
+ def self.name
+ "dall_e"
+ end
+
+ def prompts
+ parameters[:prompts]
+ end
+
+ def chain_next_response?
+ false
+ end
+
+ def invoke(bot_user, _llm)
+ # max 4 prompts
+ max_prompts = prompts.take(4)
+ progress = +""
+
+ yield(progress)
+
+ results = nil
+
+ # this ensures multisite safety since background threads
+ # generate the images
+ api_key = SiteSetting.ai_openai_api_key
+ api_url = SiteSetting.ai_openai_dall_e_3_url
+
+ threads = []
+ max_prompts.each_with_index do |prompt, index|
+ threads << Thread.new(prompt) do |inner_prompt|
+ attempts = 0
+ begin
+ DiscourseAi::Inference::OpenAiImageGenerator.perform!(
+ inner_prompt,
+ api_key: api_key,
+ api_url: api_url,
+ )
+ rescue => e
+ attempts += 1
+ sleep 2
+ retry if attempts < 3
+ Discourse.warn_exception(
+ e,
+ message: "Failed to generate image for prompt #{prompt}",
+ )
+ nil
+ end
+ end
+ end
+
+ while true
+ progress << "."
+ yield(progress)
+ break if threads.all? { |t| t.join(2) }
+ end
+
+ results = threads.filter_map(&:value)
+
+ if results.blank?
+ return { prompts: max_prompts, error: "Something went wrong, could not generate image" }
+ end
+
+ uploads = []
+
+ results.each_with_index do |result, index|
+ result[:data].each do |image|
+ Tempfile.create("v1_txt2img_#{index}.png") do |file|
+ file.binmode
+ file.write(Base64.decode64(image[:b64_json]))
+ file.rewind
+ uploads << {
+ prompt: image[:revised_prompt],
+ upload: UploadCreator.new(file, "image.png").create_for(bot_user.id),
+ }
+ end
+ end
+ end
+
+ self.custom_raw = <<~RAW
+
+ [grid]
+ #{
+ uploads
+ .map do |item|
+ "![#{item[:prompt].gsub(/\|\'\"/, "")}|512x512, 50%](#{item[:upload].short_url})"
+ end
+ .join(" ")
+ }
+ [/grid]
+ RAW
+
+ { prompts: uploads.map { |item| item[:prompt] } }
+ end
+
+ protected
+
+ def description_args
+ { prompt: prompts.first }
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/db_schema.rb b/lib/ai_bot/tools/db_schema.rb
new file mode 100644
index 00000000..7359260f
--- /dev/null
+++ b/lib/ai_bot/tools/db_schema.rb
@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class DbSchema < Tool
+ def self.signature
+ {
+ name: name,
+ description: "Will load schema information for specific tables in the database",
+ parameters: [
+ {
+ name: "tables",
+ description:
+ "list of tables to load schema information for, comma seperated list eg: (users,posts))",
+ type: "string",
+ required: true,
+ },
+ ],
+ }
+ end
+
+ def self.name
+ "schema"
+ end
+
+ def tables
+ parameters[:tables]
+ end
+
+ def invoke(_bot_user, _llm)
+ tables_arr = tables.split(",").map(&:strip)
+
+ table_info = {}
+ DB
+ .query(<<~SQL, tables_arr)
+ select table_name, column_name, data_type from information_schema.columns
+ where table_schema = 'public'
+ and table_name in (?)
+ order by table_name
+ SQL
+ .each do |row|
+ (table_info[row.table_name] ||= []) << "#{row.column_name} #{row.data_type}"
+ end
+
+ schema_info =
+ table_info
+ .map { |table_name, columns| "#{table_name}(#{columns.join(",")})" }
+ .join("\n")
+
+ { schema_info: schema_info, tables: tables }
+ end
+
+ protected
+
+ def description_args
+ { tables: tables.join(", ") }
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/google.rb b/lib/ai_bot/tools/google.rb
new file mode 100644
index 00000000..64930ebe
--- /dev/null
+++ b/lib/ai_bot/tools/google.rb
@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class Google < Tool
+ def self.signature
+ {
+ name: name,
+ description:
+ "Will search using Google - global internet search (supports all Google search operators)",
+ parameters: [
+ { name: "query", description: "The search query", type: "string", required: true },
+ ],
+ }
+ end
+
+ def self.custom_system_message
+ "You were trained on OLD data, lean on search to get up to date information from the web"
+ end
+
+ def self.name
+ "google"
+ end
+
+ def query
+ parameters[:query].to_s
+ end
+
+ def invoke(bot_user, llm)
+ yield("") # Triggers placeholder update
+
+ api_key = SiteSetting.ai_google_custom_search_api_key
+ cx = SiteSetting.ai_google_custom_search_cx
+ escaped_query = CGI.escape(query)
+ uri =
+ URI(
+ "https://www.googleapis.com/customsearch/v1?key=#{api_key}&cx=#{cx}&q=#{escaped_query}&num=10",
+ )
+ body = Net::HTTP.get(uri)
+
+ parse_search_json(body, escaped_query, llm)
+ end
+
+ attr_reader :results_count
+
+ protected
+
+ def description_args
+ {
+ count: results_count || 0,
+ query: query,
+ url: "https://google.com/search?q=#{CGI.escape(query)}",
+ }
+ end
+
+ private
+
+ def minimize_field(result, field, llm, max_tokens: 100)
+ data = result[field]
+ return "" if data.blank?
+
+ llm.tokenizer.truncate(data, max_tokens).squish
+ end
+
+ def parse_search_json(json_data, escaped_query, llm)
+ parsed = JSON.parse(json_data)
+ results = parsed["items"]
+
+ @results_count = parsed.dig("searchInformation", "totalResults").to_i
+
+ format_results(results, args: escaped_query) do |result|
+ {
+ title: minimize_field(result, "title", llm),
+ link: minimize_field(result, "link", llm),
+ snippet: minimize_field(result, "snippet", llm, max_tokens: 120),
+ displayLink: minimize_field(result, "displayLink", llm),
+ formattedUrl: minimize_field(result, "formattedUrl", llm),
+ }
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/image.rb b/lib/ai_bot/tools/image.rb
new file mode 100644
index 00000000..e9d47fa3
--- /dev/null
+++ b/lib/ai_bot/tools/image.rb
@@ -0,0 +1,144 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class Image < Tool
+ def self.signature
+ {
+ name: name,
+ description:
+ "Renders an image from the description (remove all connector words, keep it to 40 words or less). Despite being a text based bot you can generate images! (when user asks to draw, paint or other synonyms try this)",
+ parameters: [
+ {
+ name: "prompts",
+ description:
+ "The prompts used to generate or create or draw the image (40 words or less, be creative) up to 4 prompts",
+ type: "array",
+ item_type: "string",
+ required: true,
+ },
+ {
+ name: "seeds",
+ description:
+ "The seed used to generate the image (optional) - can be used to retain image style on amended prompts",
+ type: "array",
+ item_type: "integer",
+ required: true,
+ },
+ ],
+ }
+ end
+
+ def self.name
+ "image"
+ end
+
+ def prompts
+ JSON.parse(parameters[:prompts].to_s)
+ end
+
+ def seeds
+ parameters[:seeds]
+ end
+
+ def chain_next_response?
+ false
+ end
+
+ def invoke(bot_user, _llm)
+ # max 4 prompts
+ selected_prompts = prompts.take(4)
+ seeds = seeds.take(4) if seeds
+
+ progress = +""
+ yield(progress)
+
+ results = nil
+
+ # this ensures multisite safety since background threads
+ # generate the images
+ api_key = SiteSetting.ai_stability_api_key
+ engine = SiteSetting.ai_stability_engine
+ api_url = SiteSetting.ai_stability_api_url
+
+ threads = []
+ selected_prompts.each_with_index do |prompt, index|
+ seed = seeds ? seeds[index] : nil
+ threads << Thread.new(seed, prompt) do |inner_seed, inner_prompt|
+ attempts = 0
+ begin
+ DiscourseAi::Inference::StabilityGenerator.perform!(
+ inner_prompt,
+ engine: engine,
+ api_key: api_key,
+ api_url: api_url,
+ image_count: 1,
+ seed: inner_seed,
+ )
+ rescue => e
+ attempts += 1
+ retry if attempts < 3
+ Rails.logger.warn("Failed to generate image for prompt #{prompt}: #{e}")
+ nil
+ end
+ end
+ end
+
+ while true
+ progress << "."
+ yield(progress)
+ break if threads.all? { |t| t.join(2) }
+ end
+
+ results = threads.map(&:value).compact
+
+ if !results.present?
+ return { prompts: prompts, error: "Something went wrong, could not generate image" }
+ end
+
+ uploads = []
+
+ results.each_with_index do |result, index|
+ result[:artifacts].each do |image|
+ Tempfile.create("v1_txt2img_#{index}.png") do |file|
+ file.binmode
+ file.write(Base64.decode64(image[:base64]))
+ file.rewind
+ uploads << {
+ prompt: prompts[index],
+ upload: UploadCreator.new(file, "image.png").create_for(bot_user.id),
+ seed: image[:seed],
+ }
+ end
+ end
+ end
+
+ @custom_raw = <<~RAW
+
+ [grid]
+ #{
+ uploads
+ .map do |item|
+ "![#{item[:prompt].gsub(/\|\'\"/, "")}|512x512, 50%](#{item[:upload].short_url})"
+ end
+ .join(" ")
+ }
+ [/grid]
+ RAW
+
+ {
+ prompts: uploads.map { |item| item[:prompt] },
+ seeds: uploads.map { |item| item[:seed] },
+ }
+ end
+
+ protected
+
+ def description_args
+ { prompt: prompts.first }
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/list_categories.rb b/lib/ai_bot/tools/list_categories.rb
new file mode 100644
index 00000000..52bf0cb6
--- /dev/null
+++ b/lib/ai_bot/tools/list_categories.rb
@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class ListCategories < Tool
+ def self.signature
+ {
+ name: name,
+ description:
+ "Will list the categories on the current discourse instance, prefer to format with # in front of the category name",
+ }
+ end
+
+ def self.name
+ "categories"
+ end
+
+ def invoke(_bot_user, _llm)
+ columns = {
+ name: "Name",
+ slug: "Slug",
+ description: "Description",
+ posts_year: "Posts Year",
+ posts_month: "Posts Month",
+ posts_week: "Posts Week",
+ id: "id",
+ parent_category_id: "parent_category_id",
+ }
+
+ rows = Category.where(read_restricted: false).limit(100).pluck(*columns.keys)
+
+ @last_count = rows.length
+
+ { rows: rows, column_names: columns.values }
+ end
+
+ private
+
+ def description_args
+ { count: @last_count || 0 }
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/list_tags.rb b/lib/ai_bot/tools/list_tags.rb
new file mode 100644
index 00000000..e12c2491
--- /dev/null
+++ b/lib/ai_bot/tools/list_tags.rb
@@ -0,0 +1,41 @@
+#frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class ListTags < Tool
+ def self.signature
+ {
+ name: name,
+ description: "Will list the 100 most popular tags on the current discourse instance",
+ }
+ end
+
+ def self.name
+ "tags"
+ end
+
+ def invoke(_bot_user, _llm)
+ column_names = { name: "Name", public_topic_count: "Topic Count" }
+
+ tags =
+ Tag
+ .where("public_topic_count > 0")
+ .order(public_topic_count: :desc)
+ .limit(100)
+ .pluck(*column_names.keys)
+
+ @last_count = tags.length
+
+ format_results(tags, column_names.values)
+ end
+
+ private
+
+ def description_args
+ { count: @last_count || 0 }
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/option.rb b/lib/ai_bot/tools/option.rb
new file mode 100644
index 00000000..1723e7d1
--- /dev/null
+++ b/lib/ai_bot/tools/option.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class Option
+ attr_reader :tool, :name, :type
+
+ def initialize(tool:, name:, type:)
+ @tool = tool
+ @name = name.to_s
+ @type = type
+ end
+
+ def localized_name
+ I18n.t("discourse_ai.ai_bot.command_options.#{tool.signature[:name]}.#{name}.name")
+ end
+
+ def localized_description
+ I18n.t("discourse_ai.ai_bot.command_options.#{tool.signature[:name]}.#{name}.description")
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/read.rb b/lib/ai_bot/tools/read.rb
new file mode 100644
index 00000000..12a522e6
--- /dev/null
+++ b/lib/ai_bot/tools/read.rb
@@ -0,0 +1,90 @@
+#frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class Read < Tool
+ def self.signature
+ {
+ name: name,
+ description: "Will read a topic or a post on this Discourse instance",
+ parameters: [
+ {
+ name: "topic_id",
+ description: "the id of the topic to read",
+ type: "integer",
+ required: true,
+ },
+ {
+ name: "post_number",
+ description: "the post number to read",
+ type: "integer",
+ required: true,
+ },
+ ],
+ }
+ end
+
+ def self.name
+ "read"
+ end
+
+ attr_reader :title, :url
+
+ def topic_id
+ parameters[:topic_id]
+ end
+
+ def post_number
+ parameters[:post_number]
+ end
+
+ def invoke(_bot_user, llm)
+ not_found = { topic_id: topic_id, description: "Topic not found" }
+
+ @title = ""
+
+ topic = Topic.find_by(id: topic_id.to_i)
+ return not_found if !topic || !Guardian.new.can_see?(topic)
+
+ @title = topic.title
+
+ posts = Post.secured(Guardian.new).where(topic_id: topic_id).order(:post_number).limit(40)
+ @url = topic.relative_url(post_number)
+
+ posts = posts.where("post_number = ?", post_number) if post_number
+
+ content = +<<~TEXT.strip
+ title: #{topic.title}
+ TEXT
+
+ category_names = [
+ topic.category&.parent_category&.name,
+ topic.category&.name,
+ ].compact.join(" ")
+ content << "\ncategories: #{category_names}" if category_names.present?
+
+ if topic.tags.length > 0
+ tags = DiscourseTagging.filter_visible(topic.tags, Guardian.new)
+ content << "\ntags: #{tags.map(&:name).join(", ")}\n\n" if tags.length > 0
+ end
+
+ posts.each { |post| content << "\n\n#{post.username} said:\n\n#{post.raw}" }
+
+ # TODO: 16k or 100k models can handle a lot more tokens
+ content = llm.tokenizer.truncate(content, 1500).squish
+
+ result = { topic_id: topic_id, content: content, complete: true }
+ result[:post_number] = post_number if post_number
+ result
+ end
+
+ protected
+
+ def description_args
+ { title: title, url: url }
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/search.rb b/lib/ai_bot/tools/search.rb
new file mode 100644
index 00000000..33b4f6d0
--- /dev/null
+++ b/lib/ai_bot/tools/search.rb
@@ -0,0 +1,223 @@
+#frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class Search < Tool
+ MIN_SEMANTIC_RESULTS = 5
+
+ class << self
+ def signature
+ {
+ name: name,
+ description:
+ "Will search topics in the current discourse instance, when rendering always prefer to link to the topics you find",
+ parameters: [
+ {
+ name: "search_query",
+ description:
+ "Specific keywords to search for, space seperated (correct bad spelling, remove connector words)",
+ type: "string",
+ },
+ {
+ name: "user",
+ description:
+ "Filter search results to this username (only include if user explicitly asks to filter by user)",
+ type: "string",
+ },
+ {
+ name: "order",
+ description: "search result order",
+ type: "string",
+ enum: %w[latest latest_topic oldest views likes],
+ },
+ {
+ name: "limit",
+ description:
+ "limit number of results returned (generally prefer to just keep to default)",
+ type: "integer",
+ },
+ {
+ name: "max_posts",
+ description:
+ "maximum number of posts on the topics (topics where lots of people posted)",
+ type: "integer",
+ },
+ {
+ name: "tags",
+ description:
+ "list of tags to search for. Use + to join with OR, use , to join with AND",
+ type: "string",
+ },
+ { name: "category", description: "category name to filter to", type: "string" },
+ {
+ name: "before",
+ description: "only topics created before a specific date YYYY-MM-DD",
+ type: "string",
+ },
+ {
+ name: "after",
+ description: "only topics created after a specific date YYYY-MM-DD",
+ type: "string",
+ },
+ {
+ name: "status",
+ description: "search for topics in a particular state",
+ type: "string",
+ enum: %w[open closed archived noreplies single_user],
+ },
+ ],
+ }
+ end
+
+ def name
+ "search"
+ end
+
+ def custom_system_message
+ <<~TEXT
+ You were trained on OLD data, lean on search to get up to date information about this forum
+ When searching try to SIMPLIFY search terms
+ Discourse search joins all terms with AND. Reduce and simplify terms to find more results.
+ TEXT
+ end
+
+ def accepted_options
+ [option(:base_query, type: :string), option(:max_results, type: :integer)]
+ end
+ end
+
+ def search_args
+ parameters.slice(:user, :order, :max_posts, :tags, :before, :after, :status)
+ end
+
+ def invoke(bot_user, llm)
+ search_string =
+ search_args.reduce(+parameters[:search_query].to_s) do |memo, (key, value)|
+ return memo if value.blank?
+ memo << " " << "#{key}:#{value}"
+ end
+
+ @last_query = search_string
+
+ yield(I18n.t("discourse_ai.ai_bot.searching", query: search_string))
+
+ if options[:base_query].present?
+ search_string = "#{search_string} #{options[:base_query]}"
+ end
+
+ results =
+ ::Search.execute(
+ search_string.to_s + " status:public",
+ search_type: :full_page,
+ guardian: Guardian.new(),
+ )
+
+ # let's be frugal with tokens, 50 results is too much and stuff gets cut off
+ max_results = calculate_max_results(llm)
+ results_limit = parameters[:limit] || max_results
+ results_limit = max_results if parameters[:limit].to_i > max_results
+
+ should_try_semantic_search =
+ SiteSetting.ai_embeddings_semantic_search_enabled && results_limit == max_results &&
+ parameters[:search_query].present?
+
+ max_semantic_results = max_results / 4
+ results_limit = results_limit - max_semantic_results if should_try_semantic_search
+
+ posts = results&.posts || []
+ posts = posts[0..results_limit.to_i - 1]
+
+ if should_try_semantic_search
+ semantic_search = DiscourseAi::Embeddings::SemanticSearch.new(Guardian.new())
+ topic_ids = Set.new(posts.map(&:topic_id))
+
+ search = ::Search.new(search_string, guardian: Guardian.new)
+
+ results = nil
+ begin
+ results = semantic_search.search_for_topics(search.term)
+ rescue => e
+ Discourse.warn_exception(e, message: "Semantic search failed")
+ end
+
+ if results
+ results = search.apply_filters(results)
+
+ results.each do |post|
+ next if topic_ids.include?(post.topic_id)
+
+ topic_ids << post.topic_id
+ posts << post
+
+ break if posts.length >= max_results
+ end
+ end
+ end
+
+ @last_num_results = posts.length
+ # this is the general pattern from core
+ # if there are millions of hidden tags it may fail
+ hidden_tags = nil
+
+ if posts.blank?
+ { args: parameters, rows: [], instruction: "nothing was found, expand your search" }
+ else
+ format_results(posts, args: parameters) do |post|
+ category_names = [
+ post.topic.category&.parent_category&.name,
+ post.topic.category&.name,
+ ].compact.join(" > ")
+ row = {
+ title: post.topic.title,
+ url: Discourse.base_path + post.url,
+ username: post.user&.username,
+ excerpt: post.excerpt,
+ created: post.created_at,
+ category: category_names,
+ likes: post.like_count,
+ topic_views: post.topic.views,
+ topic_likes: post.topic.like_count,
+ topic_replies: post.topic.posts_count - 1,
+ }
+
+ if SiteSetting.tagging_enabled
+ hidden_tags ||= DiscourseTagging.hidden_tag_names
+ # using map over pluck to avoid n+1 (assuming caller preloading)
+ tags = post.topic.tags.map(&:name) - hidden_tags
+ row[:tags] = tags.join(", ") if tags.present?
+ end
+
+ row
+ end
+ end
+ end
+
+ protected
+
+ def description_args
+ {
+ count: @last_num_results || 0,
+ query: @last_query || "",
+ url: "#{Discourse.base_path}/search?q=#{CGI.escape(@last_query || "")}",
+ }
+ end
+
+ private
+
+ def calculate_max_results(llm)
+ max_results = options[:max_results].to_i
+ return [max_results, 100].min if max_results > 0
+
+ if llm.max_prompt_tokens > 30_000
+ 60
+ elsif llm.max_prompt_tokens > 10_000
+ 40
+ else
+ 20
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/search_settings.rb b/lib/ai_bot/tools/search_settings.rb
new file mode 100644
index 00000000..504b7f0d
--- /dev/null
+++ b/lib/ai_bot/tools/search_settings.rb
@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class SearchSettings < Tool
+ INCLUDE_DESCRIPTIONS_MAX_LENGTH = 10
+ MAX_RESULTS = 200
+
+ def self.signature
+ {
+ name: name,
+ description: "Will search through site settings and return top 20 results",
+ parameters: [
+ {
+ name: "query",
+ description:
+ "comma delimited list of settings to search for (e.g. 'setting_1,setting_2')",
+ type: "string",
+ required: true,
+ },
+ ],
+ }
+ end
+
+ def self.name
+ "search_settings"
+ end
+
+ def query
+ parameters[:query].to_s
+ end
+
+ def invoke(_bot_user, _llm)
+ @last_num_results = 0
+
+ terms = query.split(",").map(&:strip).map(&:downcase).reject(&:blank?)
+
+ found =
+ SiteSetting.all_settings.filter do |setting|
+ name = setting[:setting].to_s.downcase
+ description = setting[:description].to_s.downcase
+ plugin = setting[:plugin].to_s.downcase
+
+ search_string = "#{name} #{description} #{plugin}"
+
+ terms.any? { |term| search_string.include?(term) }
+ end
+
+ if found.blank?
+ {
+ args: {
+ query: query,
+ },
+ rows: [],
+ instruction: "no settings matched #{query}, expand your search",
+ }
+ else
+ include_descriptions = false
+
+ if found.length > MAX_RESULTS
+ found = found[0..MAX_RESULTS]
+ elsif found.length < INCLUDE_DESCRIPTIONS_MAX_LENGTH
+ include_descriptions = true
+ end
+
+ @last_num_results = found.length
+
+ format_results(found, args: { query: query }) do |setting|
+ result = { name: setting[:setting] }
+ if include_descriptions
+ result[:description] = setting[:description]
+ result[:plugin] = setting[:plugin]
+ end
+ result
+ end
+ end
+ end
+
+ protected
+
+ def description_args
+ { count: @last_num_results || 0, query: parameters[:query].to_s }
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/setting_context.rb b/lib/ai_bot/tools/setting_context.rb
new file mode 100644
index 00000000..ef08d323
--- /dev/null
+++ b/lib/ai_bot/tools/setting_context.rb
@@ -0,0 +1,160 @@
+#frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class SettingContext < Tool
+ MAX_CONTEXT_TOKENS = 2000
+ CODE_FILE_EXTENSIONS = "rb,js,gjs,hbs"
+
+ class << self
+ def rg_installed?
+ if defined?(@rg_installed)
+ @rg_installed
+ else
+ @rg_installed =
+ begin
+ Discourse::Utils.execute_command("which", "rg")
+ true
+ rescue Discourse::Utils::CommandError
+ false
+ end
+ end
+ end
+
+ def signature
+ {
+ name: name,
+ description:
+ "Will provide you with full context regarding a particular site setting in Discourse",
+ parameters: [
+ {
+ name: "setting_name",
+ description: "The name of the site setting we need context for",
+ type: "string",
+ required: true,
+ },
+ ],
+ }
+ end
+
+ def name
+ "setting_context"
+ end
+ end
+
+ def setting_name
+ parameters[:setting_name]
+ end
+
+ def invoke(_bot_user, llm)
+ if !self.class.rg_installed?
+ return(
+ {
+ setting_name: setting_name,
+ context:
+ "This command requires the rg command line tool to be installed on the server",
+ }
+ )
+ end
+
+ if !SiteSetting.has_setting?(setting_name)
+ { setting_name: setting_name, context: "This setting does not exist" }
+ else
+ description = SiteSetting.description(setting_name)
+ result = +"# #{setting_name}\n#{description}\n\n"
+
+ setting_info =
+ find_setting_info(setting_name, [Rails.root.join("config", "site_settings.yml").to_s])
+ if !setting_info
+ setting_info =
+ find_setting_info(setting_name, Dir[Rails.root.join("plugins/**/settings.yml")])
+ end
+
+ result << setting_info
+ result << "\n\n"
+
+ %w[lib app plugins].each do |dir|
+ path = Rails.root.join(dir).to_s
+ result << Discourse::Utils.execute_command(
+ "rg",
+ setting_name,
+ path,
+ "-g",
+ "!**/spec/**",
+ "-g",
+ "!**/dist/**",
+ "-g",
+ "*.{#{CODE_FILE_EXTENSIONS}}",
+ "-C",
+ "10",
+ "--color",
+ "never",
+ "--heading",
+ "--no-ignore",
+ chdir: path,
+ success_status_codes: [0, 1],
+ )
+ end
+
+ result.gsub!(/^#{Regexp.escape(Rails.root.to_s)}/, "")
+
+ result = llm.tokenizer.truncate(result, MAX_CONTEXT_TOKENS)
+
+ { setting_name: setting_name, context: result }
+ end
+ end
+
+ private
+
+ def find_setting_info(name, paths)
+ path, result = nil
+
+ paths.each do |search_path|
+ result =
+ Discourse::Utils.execute_command(
+ "rg",
+ name,
+ search_path,
+ "-g",
+ "*.{#{CODE_FILE_EXTENSIONS}}",
+ "-A",
+ "10",
+ "--color",
+ "never",
+ "--heading",
+ success_status_codes: [0, 1],
+ )
+ if !result.blank?
+ path = search_path
+ break
+ end
+ end
+
+ if result.blank?
+ nil
+ else
+ rows = result.split("\n")
+ leading_spaces = rows[0].match(/^\s*/)[0].length
+
+ filtered = []
+
+ rows.each do |row|
+ if !filtered.blank?
+ break if row.match(/^\s*/)[0].length <= leading_spaces
+ end
+ filtered << row
+ end
+
+ filtered.unshift("#{path}")
+ filtered.join("\n")
+ end
+ end
+
+ def description_args
+ parameters.slice(:setting_name)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/summarize.rb b/lib/ai_bot/tools/summarize.rb
new file mode 100644
index 00000000..2108184f
--- /dev/null
+++ b/lib/ai_bot/tools/summarize.rb
@@ -0,0 +1,183 @@
+#frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class Summarize < Tool
+ def self.signature
+ {
+ name: name,
+ description: "Will summarize a topic attempting to answer question in guidance",
+ parameters: [
+ {
+ name: "topic_id",
+ description: "The discourse topic id to summarize",
+ type: "integer",
+ required: true,
+ },
+ {
+ name: "guidance",
+ description: "Special guidance on how to summarize the topic",
+ type: "string",
+ },
+ ],
+ }
+ end
+
+ def self.name
+ "summary"
+ end
+
+ def topic_id
+ parameters[:topic_id].to_i
+ end
+
+ def guidance
+ parameters[:guidance]
+ end
+
+ def chain_next_response?
+ false
+ end
+
+ def standalone?
+ true
+ end
+
+ def low_cost?
+ true
+ end
+
+ def custom_raw
+ @last_summary || I18n.t("discourse_ai.ai_bot.topic_not_found")
+ end
+
+ def invoke(bot_user, llm, &progress_blk)
+ topic = nil
+ if topic_id > 0
+ topic = Topic.find_by(id: topic_id)
+ topic = nil if !topic || !Guardian.new.can_see?(topic)
+ end
+
+ @last_summary = nil
+
+ if topic
+ @last_topic_title = topic.title
+
+ posts =
+ Post
+ .where(topic_id: topic.id)
+ .where("post_type in (?)", [Post.types[:regular], Post.types[:small_action]])
+ .where("not hidden")
+ .order(:post_number)
+
+ columns = ["posts.id", :post_number, :raw, :username]
+
+ current_post_numbers = posts.limit(5).pluck(:post_number)
+ current_post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
+ current_post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
+
+ data =
+ Post
+ .where(topic_id: topic.id)
+ .joins(:user)
+ .where("post_number in (?)", current_post_numbers)
+ .order(:post_number)
+ .pluck(*columns)
+
+ @last_summary = summarize(data, topic, guidance, bot_user, llm, &progress_blk)
+ end
+
+ if !@last_summary
+ "Say: No topic found!"
+ else
+ "Topic summarized"
+ end
+ end
+
+ protected
+
+ def description_args
+ { url: "#{Discourse.base_path}/t/-/#{@last_topic_id}", title: @last_topic_title || "" }
+ end
+
+ private
+
+ def summarize(data, topic, guidance, bot_user, llm, &progress_blk)
+ text = +""
+ data.each do |id, post_number, raw, username|
+ text << "(#{post_number} #{username} said: #{raw}"
+ end
+
+ summaries = []
+ current_section = +""
+ split = []
+
+ text
+ .split(/\s+/)
+ .each_slice(20) do |slice|
+ current_section << " "
+ current_section << slice.join(" ")
+
+ # somehow any more will get closer to limits
+ if llm.tokenizer.tokenize(current_section).length > 2500
+ split << current_section
+ current_section = +""
+ end
+ end
+
+ split << current_section if current_section.present?
+
+ split = split[0..3] + split[-3..-1] if split.length > 5
+
+ progress = +I18n.t("discourse_ai.ai_bot.summarizing")
+ progress_blk.call(progress)
+
+ split.each do |section|
+ progress << "."
+ progress_blk.call(progress)
+
+ prompt = section_prompt(topic, section, guidance)
+
+ summary = llm.generate(prompt, temperature: 0.6, max_tokens: 400, user: bot_user)
+
+ summaries << summary
+ end
+
+ if summaries.length > 1
+ progress << "."
+ progress_blk.call(progress)
+
+ contatenation_prompt = {
+ insts: "You are a helpful bot",
+ input:
+ "concatenated the disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}}",
+ }
+
+ llm.generate(contatenation_prompt, temperature: 0.6, max_tokens: 500, user: bot_user)
+ else
+ summaries.first
+ end
+ end
+
+ def section_prompt(topic, text, guidance)
+ insts = <<~TEXT
+ You are a summarization bot.
+ You effectively summarise any text.
+ You condense it into a shorter version.
+ You understand and generate Discourse forum markdown.
+ Try generating links as well the format is #{topic.url}/POST_NUMBER. eg: [ref](#{topic.url}/77)
+ TEXT
+
+ { insts: insts, input: <<~TEXT }
+ Guidance: #{guidance}
+ You are summarizing the topic: #{topic.title}
+ Summarize the following in 400 words:
+
+ #{text}
+ TEXT
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/time.rb b/lib/ai_bot/tools/time.rb
new file mode 100644
index 00000000..563f2ceb
--- /dev/null
+++ b/lib/ai_bot/tools/time.rb
@@ -0,0 +1,52 @@
+#frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class Time < Tool
+ def self.signature
+ {
+ name: name,
+ description: "Will generate the time in a timezone",
+ parameters: [
+ {
+ name: "timezone",
+ description: "ALWAYS supply a Ruby compatible timezone",
+ type: "string",
+ required: true,
+ },
+ ],
+ }
+ end
+
+ def self.name
+ "time"
+ end
+
+ def timezone
+ parameters[:timezone].to_s
+ end
+
+ def invoke(_bot_user, _llm)
+ time =
+ begin
+ ::Time.now.in_time_zone(timezone)
+ rescue StandardError
+ nil
+ end
+ time = ::Time.now if !time
+
+ @last_time = time.to_s
+
+ { args: { timezone: timezone }, time: time.to_s }
+ end
+
+ private
+
+ def description_args
+ { timezone: timezone, time: @last_time }
+ end
+ end
+ end
+ end
+end
diff --git a/lib/ai_bot/tools/tool.rb b/lib/ai_bot/tools/tool.rb
new file mode 100644
index 00000000..03a32417
--- /dev/null
+++ b/lib/ai_bot/tools/tool.rb
@@ -0,0 +1,124 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+ module AiBot
+ module Tools
+ class Tool
+ class << self
+ def signature
+ raise NotImplemented
+ end
+
+ def name
+ raise NotImplemented
+ end
+
+ def accepted_options
+ []
+ end
+
+ def option(name, type:)
+ Option.new(tool: self, name: name, type: type)
+ end
+
+ def help
+ I18n.t("discourse_ai.ai_bot.command_help.#{signature[:name]}")
+ end
+
+ def custom_system_message
+ nil
+ end
+ end
+
+ attr_accessor :custom_raw
+
+ def initialize(parameters, tool_call_id: "", persona_options: {})
+ @parameters = parameters
+ @tool_call_id = tool_call_id
+ @persona_options = persona_options
+ end
+
+ attr_reader :parameters, :tool_call_id
+
+ def name
+ self.class.name
+ end
+
+ def summary
+ I18n.t("discourse_ai.ai_bot.command_summary.#{name}")
+ end
+
+ def details
+ I18n.t("discourse_ai.ai_bot.command_description.#{name}", description_args)
+ end
+
+ def help
+ I18n.t("discourse_ai.ai_bot.command_help.#{name}")
+ end
+
+ def options
+ self
+ .class
+ .accepted_options
+ .reduce(HashWithIndifferentAccess.new) do |memo, option|
+ val = @persona_options[option.name]
+ memo[option.name] = val if val
+ memo
+ end
+ end
+
+ def chain_next_response?
+ true
+ end
+
+ def standalone?
+ false
+ end
+
+ def low_cost?
+ false
+ end
+
+ protected
+
+ def accepted_options
+ []
+ end
+
+ def option(name, type:)
+ Option.new(tool: self, name: name, type: type)
+ end
+
+ def description_args
+ {}
+ end
+
+ def format_results(rows, column_names = nil, args: nil)
+ rows = rows&.map { |row| yield row } if block_given?
+
+ if !column_names
+ index = -1
+ column_indexes = {}
+
+ rows =
+ rows&.map do |data|
+ new_row = []
+ data.each do |key, value|
+ found_index = column_indexes[key.to_s] ||= (index += 1)
+ new_row[found_index] = value
+ end
+ new_row
+ end
+ column_names = column_indexes.keys
+ end
+
+ # this is not the most efficient format
+ # however this is needed cause GPT 3.5 / 4 was steered using JSON
+ result = { column_names: column_names, rows: rows }
+ result[:args] = args if args
+ result
+ end
+ end
+ end
+ end
+end
diff --git a/lib/completions/dialects/chat_gpt.rb b/lib/completions/dialects/chat_gpt.rb
index 033f29ab..0335f53f 100644
--- a/lib/completions/dialects/chat_gpt.rb
+++ b/lib/completions/dialects/chat_gpt.rb
@@ -46,10 +46,9 @@ module DiscourseAi
prompt[:tools].map do |t|
tool = t.dup
- if tool[:parameters]
- tool[:parameters] = t[:parameters].reduce(
- { type: "object", properties: {}, required: [] },
- ) do |memo, p|
+ tool[:parameters] = t[:parameters]
+ .to_a
+ .reduce({ type: "object", properties: {}, required: [] }) do |memo, p|
name = p[:name]
memo[:required] << name if p[:required]
@@ -58,7 +57,6 @@ module DiscourseAi
memo[:properties][name][:items] = { type: p[:item_type] } if p[:item_type]
memo
end
- end
{ type: "function", function: tool }
end
@@ -71,9 +69,12 @@ module DiscourseAi
trimmed_context.reverse.map do |context|
if context[:type] == "tool_call"
+ function = JSON.parse(context[:content], symbolize_names: true)
+ function[:arguments] = function[:arguments].to_json
+
{
role: "assistant",
- tool_calls: [{ type: "function", function: context[:content], id: context[:name] }],
+ tool_calls: [{ type: "function", function: function, id: context[:name] }],
}
else
translated = context.slice(:content)
diff --git a/lib/completions/dialects/claude.rb b/lib/completions/dialects/claude.rb
index 73f9d231..d7ecf0b0 100644
--- a/lib/completions/dialects/claude.rb
+++ b/lib/completions/dialects/claude.rb
@@ -39,12 +39,12 @@ module DiscourseAi
def conversation_context
return "" if prompt[:conversation_context].blank?
- trimmed_context = trim_context(prompt[:conversation_context])
+ clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
+ trimmed_context = trim_context(clean_context)
trimmed_context
.reverse
.reduce(+"") do |memo, context|
- next(memo) if context[:type] == "tool_call"
memo << (context[:type] == "user" ? "Human:" : "Assistant:")
if context[:type] == "tool"
diff --git a/lib/completions/dialects/dialect.rb b/lib/completions/dialects/dialect.rb
index 951db987..b0e6c6a3 100644
--- a/lib/completions/dialects/dialect.rb
+++ b/lib/completions/dialects/dialect.rb
@@ -97,6 +97,13 @@ module DiscourseAi
message_tokens = calculate_message_token(dupped_context)
+ # Don't trim tool call metadata.
+ if context[:type] == "tool_call"
+ current_token_count += calculate_message_token(context) + per_message_overhead
+ memo << context
+ next(memo)
+ end
+
# Trimming content to make sure we respect token limit.
while dupped_context[:content].present? &&
message_tokens + current_token_count + per_message_overhead > prompt_limit
diff --git a/lib/completions/dialects/llama2_classic.rb b/lib/completions/dialects/llama2_classic.rb
index 26e541a3..0a53c44b 100644
--- a/lib/completions/dialects/llama2_classic.rb
+++ b/lib/completions/dialects/llama2_classic.rb
@@ -39,12 +39,13 @@ module DiscourseAi
def conversation_context
return "" if prompt[:conversation_context].blank?
- trimmed_context = trim_context(prompt[:conversation_context])
+ clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
+
+ trimmed_context = trim_context(clean_context)
trimmed_context
.reverse
.reduce(+"") do |memo, context|
- next(memo) if context[:type] == "tool_call"
if context[:type] == "tool"
memo << <<~TEXT
[INST]
diff --git a/lib/completions/dialects/mixtral.rb b/lib/completions/dialects/mixtral.rb
index 464a1ac4..36a2fd43 100644
--- a/lib/completions/dialects/mixtral.rb
+++ b/lib/completions/dialects/mixtral.rb
@@ -39,12 +39,12 @@ module DiscourseAi
def conversation_context
return "" if prompt[:conversation_context].blank?
- trimmed_context = trim_context(prompt[:conversation_context])
+ clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
+ trimmed_context = trim_context(clean_context)
trimmed_context
.reverse
.reduce(+"") do |memo, context|
- next(memo) if context[:type] == "tool_call"
memo << "[INST] " if context[:type] == "user"
if context[:type] == "tool"
diff --git a/lib/completions/dialects/orca_style.rb b/lib/completions/dialects/orca_style.rb
index b89dca01..74a356f3 100644
--- a/lib/completions/dialects/orca_style.rb
+++ b/lib/completions/dialects/orca_style.rb
@@ -36,12 +36,12 @@ module DiscourseAi
def conversation_context
return "" if prompt[:conversation_context].blank?
- trimmed_context = trim_context(prompt[:conversation_context])
+ clean_context = prompt[:conversation_context].select { |cc| cc[:type] != "tool_call" }
+ trimmed_context = trim_context(clean_context)
trimmed_context
.reverse
.reduce(+"") do |memo, context|
- next(memo) if context[:type] == "tool_call"
memo << (context[:type] == "user" ? "### User:" : "### Assistant:")
if context[:type] == "tool"
diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb
index d98990d8..e4d26a8e 100644
--- a/lib/completions/endpoints/anthropic.rb
+++ b/lib/completions/endpoints/anthropic.rb
@@ -23,7 +23,7 @@ module DiscourseAi
def default_options
{
model: model,
- max_tokens_to_sample: 2_000,
+ max_tokens_to_sample: 3_000,
stop_sequences: ["\n\nHuman:", "