FIX: avoid storing corrupt prompts (#92)

```
prompt << build_message(bot_user.username, reply)
```

Would store a "cooked" prompt which is invalid, instead just store the raw
values which are later passed to build_message

Additionally:

1. Disable summary command which needs honing
2. Stop storing decorations (searched for X) in prompt which leads to straying
3. Ship username directly to model, avoiding "user: content" in prompts. This
 was causing GPT to stray
This commit is contained in:
Sam 2023-06-20 15:44:03 +10:00 committed by GitHub
parent 70c158cae1
commit 30778d8af8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 49 additions and 15 deletions

View File

@ -100,7 +100,7 @@ en:
command_description: command_description:
time: "Time in %{timezone} is %{time}" time: "Time in %{timezone} is %{time}"
summarize: "Summarized <a href='%{url}'>%{title}</a>" summarize: "Summarized <a href='%{url}'>%{title}</a>"
image: "Prompt: %{prompt}" image: "%{prompt}"
categories: categories:
one: "Found %{count} category" one: "Found %{count} category"
other: "Found %{count} categories" other: "Found %{count} categories"
@ -115,6 +115,6 @@ en:
other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'" other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'"
summarization: summarization:
configuration_hint: configuration_hint:
one: "Configure the `%{setting}` setting first." one: "Configure the `%{setting}` setting first."
other: "Configure these settings first: %{settings}" other: "Configure these settings first: %{settings}"

View File

@ -25,7 +25,7 @@ module DiscourseAi
attr_reader :bot_user attr_reader :bot_user
BOT_NOT_FOUND = Class.new(StandardError) BOT_NOT_FOUND = Class.new(StandardError)
MAX_COMPLETIONS = 3 MAX_COMPLETIONS = 6
def self.as(bot_user) def self.as(bot_user)
available_bots = [DiscourseAi::AiBot::OpenAiBot, DiscourseAi::AiBot::AnthropicBot] available_bots = [DiscourseAi::AiBot::OpenAiBot, DiscourseAi::AiBot::AnthropicBot]
@ -79,6 +79,7 @@ module DiscourseAi
end end
redis_stream_key = nil redis_stream_key = nil
partial_reply = +""
reply = +(bot_reply_post ? bot_reply_post.raw.dup : "") reply = +(bot_reply_post ? bot_reply_post.raw.dup : "")
start = Time.now start = Time.now
@ -87,7 +88,9 @@ module DiscourseAi
functions = Functions.new functions = Functions.new
submit_prompt(prompt, prefer_low_cost: prefer_low_cost) do |partial, cancel| submit_prompt(prompt, prefer_low_cost: prefer_low_cost) do |partial, cancel|
reply << get_delta(partial, context) current_delta = get_delta(partial, context)
partial_reply << current_delta
reply << current_delta
populate_functions(partial, functions) populate_functions(partial, functions)
if redis_stream_key && !Discourse.redis.get(redis_stream_key) if redis_stream_key && !Discourse.redis.get(redis_stream_key)
@ -135,7 +138,8 @@ module DiscourseAi
bot_reply_post.post_custom_prompt ||= post.build_post_custom_prompt(custom_prompt: []) bot_reply_post.post_custom_prompt ||= post.build_post_custom_prompt(custom_prompt: [])
prompt = post.post_custom_prompt.custom_prompt || [] prompt = post.post_custom_prompt.custom_prompt || []
prompt << build_message(bot_user.username, reply) prompt << [partial_reply, bot_user.username]
post.post_custom_prompt.update!(custom_prompt: prompt) post.post_custom_prompt.update!(custom_prompt: prompt)
end end

View File

@ -36,7 +36,7 @@ module DiscourseAi::AiBot::Commands
end end
def description_args def description_args
{ prompt: @last_prompt || 0 } { prompt: @last_prompt }
end end
def chain_next_response def chain_next_response

View File

@ -20,7 +20,8 @@ module DiscourseAi::AiBot::Commands
), ),
Parameter.new( Parameter.new(
name: "user", name: "user",
description: "Filter search results to this username", description:
"Filter search results to this username (only include if user explicitly asks to filter by user)",
type: "string", type: "string",
), ),
Parameter.new( Parameter.new(
@ -31,7 +32,8 @@ module DiscourseAi::AiBot::Commands
), ),
Parameter.new( Parameter.new(
name: "limit", name: "limit",
description: "limit number of results returned", description:
"limit number of results returned (generally prefer to just keep to default)",
type: "integer", type: "integer",
), ),
Parameter.new( Parameter.new(

View File

@ -87,12 +87,12 @@ module DiscourseAi
end end
def available_commands def available_commands
# note: Summarize command is not ready yet, leave it out for now
@cmds ||= @cmds ||=
[ [
Commands::CategoriesCommand, Commands::CategoriesCommand,
Commands::TimeCommand, Commands::TimeCommand,
Commands::SearchCommand, Commands::SearchCommand,
Commands::SummarizeCommand,
].tap do |cmds| ].tap do |cmds|
cmds << Commands::TagsCommand if SiteSetting.tagging_enabled cmds << Commands::TagsCommand if SiteSetting.tagging_enabled
cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present? cmds << Commands::ImageCommand if SiteSetting.ai_stability_api_key.present?
@ -108,6 +108,16 @@ module DiscourseAi
"gpt-3.5-turbo-16k" "gpt-3.5-turbo-16k"
end end
def clean_username(username)
if username.match?(/\0[a-zA-Z0-9_-]{1,64}\z/)
username
else
# not the best in the world, but this is what we have to work with
# if sites enable unicode usernames this can get messy
username.gsub(/[^a-zA-Z0-9_-]/, "_")[0..63]
end
end
private private
def populate_functions(partial, functions) def populate_functions(partial, functions)
@ -133,9 +143,9 @@ module DiscourseAi
if function if function
result[:name] = poster_username result[:name] = poster_username
elsif !system && poster_username != bot_user.username elsif !system && poster_username != bot_user.username && poster_username.present?
# Open AI restrict name to 64 chars and only A-Za-z._ (work around) # Open AI restrict name to 64 chars and only A-Za-z._ (work around)
result[:content] = "#{poster_username}: #{content}" result[:name] = clean_username(poster_username)
end end
result result

View File

@ -80,7 +80,9 @@ RSpec.describe DiscourseAi::AiBot::Bot do
expect(last.raw).not_to include("translation missing") expect(last.raw).not_to include("translation missing")
expect(last.raw).to include("I found nothing") expect(last.raw).to include("I found nothing")
expect(last.post_custom_prompt.custom_prompt.to_s).to include("I found nothing") expect(last.post_custom_prompt.custom_prompt).to eq(
[["[]", "search", "function"], ["I found nothing, sorry", bot_user.username]],
)
end end
end end

View File

@ -4,6 +4,11 @@ require_relative "../../../../../support/openai_completions_inference_stubs"
require_relative "../../../../../support/anthropic_completion_stubs" require_relative "../../../../../support/anthropic_completion_stubs"
RSpec.describe Jobs::CreateAiReply do RSpec.describe Jobs::CreateAiReply do
before do
# got to do this cause we include times in system message
freeze_time
end
describe "#execute" do describe "#execute" do
fab!(:topic) { Fabricate(:topic) } fab!(:topic) { Fabricate(:topic) }
fab!(:post) { Fabricate(:post, topic: topic) } fab!(:post) { Fabricate(:post, topic: topic) }

View File

@ -14,6 +14,14 @@ RSpec.describe DiscourseAi::AiBot::OpenAiBot do
subject { described_class.new(bot_user) } subject { described_class.new(bot_user) }
context "when cleaning usernames" do
it "can properly clean usernames so OpenAI allows it" do
subject.clean_username("test test").should eq("test_test")
subject.clean_username("test.test").should eq("test_test")
subject.clean_username("test😀test").should eq("test_test")
end
end
context "when the topic has one post" do context "when the topic has one post" do
fab!(:post_1) { Fabricate(:post, topic: topic, raw: post_body(1), post_number: 1) } fab!(:post_1) { Fabricate(:post, topic: topic, raw: post_body(1), post_number: 1) }
@ -23,7 +31,8 @@ RSpec.describe DiscourseAi::AiBot::OpenAiBot do
post_1_message = prompt_messages[-1] post_1_message = prompt_messages[-1]
expect(post_1_message[:role]).to eq("user") expect(post_1_message[:role]).to eq("user")
expect(post_1_message[:content]).to eq("#{post_1.user.username}: #{post_body(1)}") expect(post_1_message[:content]).to eq(post_body(1))
expect(post_1_message[:name]).to eq(post_1.user.username)
end end
end end
@ -51,13 +60,15 @@ RSpec.describe DiscourseAi::AiBot::OpenAiBot do
# negative cause we may have grounding prompts # negative cause we may have grounding prompts
expect(prompt_messages[-3][:role]).to eq("user") expect(prompt_messages[-3][:role]).to eq("user")
expect(prompt_messages[-3][:content]).to eq("#{post_1.username}: #{post_body(1)}") expect(prompt_messages[-3][:content]).to eq(post_body(1))
expect(prompt_messages[-3][:name]).to eq(post_1.username)
expect(prompt_messages[-2][:role]).to eq("assistant") expect(prompt_messages[-2][:role]).to eq("assistant")
expect(prompt_messages[-2][:content]).to eq(post_body(2)) expect(prompt_messages[-2][:content]).to eq(post_body(2))
expect(prompt_messages[-1][:role]).to eq("user") expect(prompt_messages[-1][:role]).to eq("user")
expect(prompt_messages[-1][:content]).to eq("#{post_3.username}: #{post_body(3)}") expect(prompt_messages[-1][:content]).to eq(post_body(3))
expect(prompt_messages[-1][:name]).to eq(post_3.username)
end end
end end
end end