FEATURE: experimental read command for bot (#129)

This command is useful for reading a topics content. It allows us to perform
critical analysis or suggest answers.

Given 8k token limit in GPT-4 I hardcoded reading to 1500 tokens, but we can
follow up and allow larger windows on models that support more tokens.

On local testing even in this limited form this can be very useful.
This commit is contained in:
Sam 2023-08-09 07:19:56 +10:00 committed by GitHub
parent 8318c4374c
commit 958dfc360e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 86 additions and 0 deletions

View File

@ -100,7 +100,9 @@ en:
summarize: "Summarize"
image: "Generate image"
google: "Search Google"
read: "Read topic"
command_description:
read: "Reading: %{title}"
time: "Time in %{timezone} is %{time}"
summarize: "Summarized <a href='%{url}'>%{title}</a>"
image: "%{prompt}"

View File

@ -203,6 +203,7 @@ plugins:
- image
- search
- summarize
- read
- tags
- time
ai_helper_add_ai_pm_to_header:

View File

@ -0,0 +1,61 @@
#frozen_string_literal: true
module DiscourseAi::AiBot::Commands
class ReadCommand < Command
class << self
def name
"read"
end
def desc
"Will read a topic or a post on this Discourse instance"
end
def parameters
[
Parameter.new(
name: "topic_id",
description: "the id of the topic to read",
type: "integer",
required: true,
),
Parameter.new(
name: "post_number",
description: "the post number to read",
type: "integer",
required: false,
),
]
end
end
def description_args
{ title: @title }
end
def process(topic_id:, post_number: nil)
not_found = { topic_id: topic_id, description: "Topic not found" }
@title = ""
topic_id = topic_id.to_i
topic = Topic.find_by(id: topic_id)
return not_found if !topic || !Guardian.new.can_see?(topic)
@title = topic.title
posts = Post.secured(Guardian.new).where(topic_id: topic_id).order(:post_number).limit(40)
posts = posts.where("post_number = ?", post_number) if post_number
content = +"title: #{topic.title}\n\n"
posts.each { |post| content << "\n\n#{post.username} said:\n\n#{post.raw}" }
# TODO: 16k or 100k models can handle a lot more tokens
content = ::DiscourseAi::Tokenizer::BertTokenizer.truncate(content, 1500).squish
{ topic_id: topic_id, post_number: post_number, content: content }
end
end
end

View File

@ -35,6 +35,7 @@ module DiscourseAi
require_relative "commands/summarize_command"
require_relative "commands/image_command"
require_relative "commands/google_command"
require_relative "commands/read_command"
end
def inject_into(plugin)

View File

@ -95,6 +95,7 @@ module DiscourseAi
Commands::TimeCommand,
Commands::SearchCommand,
Commands::SummarizeCommand,
Commands::ReadCommand,
].tap do |cmds|
cmds << Commands::TagsCommand if SiteSetting.tagging_enabled
cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present?

View File

@ -0,0 +1,20 @@
#frozen_string_literal: true
RSpec.describe DiscourseAi::AiBot::Commands::ReadCommand do
fab!(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) }
describe "#process" do
it "can read a topic" do
post1 = Fabricate(:post, raw: "hello there")
Fabricate(:post, raw: "mister sam", topic: post1.topic)
read = described_class.new(bot_user, post1)
results = read.process(topic_id: post1.topic_id)
expect(results[:topic_id]).to eq(post1.topic_id)
expect(results[:content]).to include("hello")
expect(results[:content]).to include("sam")
end
end
end