From 958dfc360eaffac2671cd4343a92ad475cab8ad3 Mon Sep 17 00:00:00 2001 From: Sam Date: Wed, 9 Aug 2023 07:19:56 +1000 Subject: [PATCH] FEATURE: experimental read command for bot (#129) This command is useful for reading a topics content. It allows us to perform critical analysis or suggest answers. Given 8k token limit in GPT-4 I hardcoded reading to 1500 tokens, but we can follow up and allow larger windows on models that support more tokens. On local testing even in this limited form this can be very useful. --- config/locales/server.en.yml | 2 + config/settings.yml | 1 + lib/modules/ai_bot/commands/read_command.rb | 61 +++++++++++++++++++ lib/modules/ai_bot/entry_point.rb | 1 + lib/modules/ai_bot/open_ai_bot.rb | 1 + .../ai_bot/commands/read_command_spec.rb | 20 ++++++ 6 files changed, 86 insertions(+) create mode 100644 lib/modules/ai_bot/commands/read_command.rb create mode 100644 spec/lib/modules/ai_bot/commands/read_command_spec.rb diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 0618ee77..eccf08b4 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -100,7 +100,9 @@ en: summarize: "Summarize" image: "Generate image" google: "Search Google" + read: "Read topic" command_description: + read: "Reading: %{title}" time: "Time in %{timezone} is %{time}" summarize: "Summarized %{title}" image: "%{prompt}" diff --git a/config/settings.yml b/config/settings.yml index f869c212..85a08ac9 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -203,6 +203,7 @@ plugins: - image - search - summarize + - read - tags - time ai_helper_add_ai_pm_to_header: diff --git a/lib/modules/ai_bot/commands/read_command.rb b/lib/modules/ai_bot/commands/read_command.rb new file mode 100644 index 00000000..406cdc44 --- /dev/null +++ b/lib/modules/ai_bot/commands/read_command.rb @@ -0,0 +1,61 @@ +#frozen_string_literal: true + +module DiscourseAi::AiBot::Commands + class ReadCommand < Command + class << self + def name + "read" + end + + def desc + "Will read a topic or a post on this Discourse instance" + end + + def parameters + [ + Parameter.new( + name: "topic_id", + description: "the id of the topic to read", + type: "integer", + required: true, + ), + Parameter.new( + name: "post_number", + description: "the post number to read", + type: "integer", + required: false, + ), + ] + end + end + + def description_args + { title: @title } + end + + def process(topic_id:, post_number: nil) + not_found = { topic_id: topic_id, description: "Topic not found" } + + @title = "" + + topic_id = topic_id.to_i + + topic = Topic.find_by(id: topic_id) + return not_found if !topic || !Guardian.new.can_see?(topic) + + @title = topic.title + + posts = Post.secured(Guardian.new).where(topic_id: topic_id).order(:post_number).limit(40) + posts = posts.where("post_number = ?", post_number) if post_number + + content = +"title: #{topic.title}\n\n" + + posts.each { |post| content << "\n\n#{post.username} said:\n\n#{post.raw}" } + + # TODO: 16k or 100k models can handle a lot more tokens + content = ::DiscourseAi::Tokenizer::BertTokenizer.truncate(content, 1500).squish + + { topic_id: topic_id, post_number: post_number, content: content } + end + end +end diff --git a/lib/modules/ai_bot/entry_point.rb b/lib/modules/ai_bot/entry_point.rb index 6af851c4..116e520e 100644 --- a/lib/modules/ai_bot/entry_point.rb +++ b/lib/modules/ai_bot/entry_point.rb @@ -35,6 +35,7 @@ module DiscourseAi require_relative "commands/summarize_command" require_relative "commands/image_command" require_relative "commands/google_command" + require_relative "commands/read_command" end def inject_into(plugin) diff --git a/lib/modules/ai_bot/open_ai_bot.rb b/lib/modules/ai_bot/open_ai_bot.rb index 5c35e78a..bb064a83 100644 --- a/lib/modules/ai_bot/open_ai_bot.rb +++ b/lib/modules/ai_bot/open_ai_bot.rb @@ -95,6 +95,7 @@ module DiscourseAi Commands::TimeCommand, Commands::SearchCommand, Commands::SummarizeCommand, + Commands::ReadCommand, ].tap do |cmds| cmds << Commands::TagsCommand if SiteSetting.tagging_enabled cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present? diff --git a/spec/lib/modules/ai_bot/commands/read_command_spec.rb b/spec/lib/modules/ai_bot/commands/read_command_spec.rb new file mode 100644 index 00000000..fdd0de77 --- /dev/null +++ b/spec/lib/modules/ai_bot/commands/read_command_spec.rb @@ -0,0 +1,20 @@ +#frozen_string_literal: true + +RSpec.describe DiscourseAi::AiBot::Commands::ReadCommand do + fab!(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) } + + describe "#process" do + it "can read a topic" do + post1 = Fabricate(:post, raw: "hello there") + Fabricate(:post, raw: "mister sam", topic: post1.topic) + + read = described_class.new(bot_user, post1) + + results = read.process(topic_id: post1.topic_id) + + expect(results[:topic_id]).to eq(post1.topic_id) + expect(results[:content]).to include("hello") + expect(results[:content]).to include("sam") + end + end +end