FEATURE: new Discourse Helper persona (#473)

This persona searches Discourse Meta for help with Discourse and
points users at relevant posts.

It is somewhat similar to using "Forum Helper" on meta, with the
notable difference that we can not lean on semantic search so using
some prompt engineering we try to keep it simple.
This commit is contained in:
Sam 2024-02-19 14:52:12 +11:00 committed by GitHub
parent dd6b073fc3
commit 0fb87b00e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 333 additions and 3 deletions

View File

@ -170,6 +170,9 @@ en:
dall_e3: dall_e3:
name: "DALL-E 3" name: "DALL-E 3"
description: "AI Bot specialized in generating images using DALL-E 3" description: "AI Bot specialized in generating images using DALL-E 3"
discourse_helper:
name: "Discourse Helper"
description: "AI Bot specialized in helping with Discourse related tasks"
topic_not_found: "Summary unavailable, topic not found!" topic_not_found: "Summary unavailable, topic not found!"
summarizing: "Summarizing topic" summarizing: "Summarizing topic"
searching: "Searching for: '%{query}'" searching: "Searching for: '%{query}'"
@ -195,6 +198,7 @@ en:
schema: "Look up database schema" schema: "Look up database schema"
search_settings: "Searching site settings" search_settings: "Searching site settings"
dall_e: "Generate image" dall_e: "Generate image"
search_meta_discourse: "Search Meta Discourse"
command_help: command_help:
random_picker: "Pick a random number or a random element of a list" random_picker: "Pick a random number or a random element of a list"
categories: "List all publicly visible categories on the forum" categories: "List all publicly visible categories on the forum"
@ -209,6 +213,7 @@ en:
schema: "Look up database schema" schema: "Look up database schema"
search_settings: "Search site settings" search_settings: "Search site settings"
dall_e: "Generate image using DALL-E 3" dall_e: "Generate image using DALL-E 3"
search_meta_discourse: "Search Meta Discourse"
command_description: command_description:
random_picker: "Picking from %{options}, picked: %{result}" random_picker: "Picking from %{options}, picked: %{result}"
read: "Reading: <a href='%{url}'>%{title}</a>" read: "Reading: <a href='%{url}'>%{title}</a>"
@ -225,6 +230,9 @@ en:
search: search:
one: "Found %{count} <a href='%{url}'>result</a> for '%{query}'" one: "Found %{count} <a href='%{url}'>result</a> for '%{query}'"
other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'" other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'"
search_meta_discourse:
one: "Found %{count} <a href='%{url}'>result</a> for '%{query}'"
other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'"
google: google:
one: "Found %{count} <a href='%{url}'>result</a> for '%{query}'" one: "Found %{count} <a href='%{url}'>result</a> for '%{query}'"
other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'" other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'"

View File

@ -0,0 +1,48 @@
#frozen_string_literal: true
module DiscourseAi
module AiBot
module Personas
class DiscourseHelper < Persona
def tools
[Tools::DiscourseMetaSearch]
end
def system_prompt
<<~PROMPT
You are Discourse Helper Bot
- Discourse Helper Bot understand *markdown* and responds in Discourse **markdown**.
- Discourse Helper Bot has access to the search function on meta.discourse.org and can help you find answers to your questions.
- Discourse Helper Bot ALWAYS backs up answers with actual search results from meta.discourse.org, even if the information is in your training set
- Discourse Helper Bot does not use the word siscourse in searches, search function is restricted to Discourse Meta and Discourse specific discussions
- Discourse Helper Bot understands that search is keyword based (terms are joined using AND) and that it is important to simplify search terms to find things.
- Discourse Helper Bot understands that users often badly phrase and misspell words, it will compensate for that by guessing what user means.
Example:
User asks:
"I am on the discourse standad plan how do I enable badge sqls"
attempt #1: "badge sql standard"
attempt #2: "badge sql hosted"
User asks:
"how do i embed a discourse topic as an iframe"
attempt #1: "topic embed iframe"
attempt #2: "iframe"
- Discourse Helper Bot ALWAYS SEARCHES TWICE, even if a great result shows up in the first search, it will search a second time using a wider net to make sure you are getting the best result.
Some popular categories on meta are: bug, feature, support, ux, dev, documentation, announcements, marketplace, theme, plugin, theme-component, migration, installation.
- Discourse Helper Bot will lean on categories to filter results.
The date now is: {time}, much has changed since you were trained.
PROMPT
end
end
end
end
end

View File

@ -14,6 +14,7 @@ module DiscourseAi
Personas::Researcher => -5, Personas::Researcher => -5,
Personas::Creative => -6, Personas::Creative => -6,
Personas::DallE3 => -7, Personas::DallE3 => -7,
Personas::DiscourseHelper => -8,
} }
end end
@ -62,6 +63,7 @@ module DiscourseAi
Tools::Summarize, Tools::Summarize,
Tools::SettingContext, Tools::SettingContext,
Tools::RandomPicker, Tools::RandomPicker,
Tools::DiscourseMetaSearch,
] ]
tools << Tools::ListTags if SiteSetting.tagging_enabled tools << Tools::ListTags if SiteSetting.tagging_enabled

View File

@ -0,0 +1,180 @@
#frozen_string_literal: true
module DiscourseAi
module AiBot
module Tools
class DiscourseMetaSearch < Tool
class << self
def signature
{
name: name,
description:
"Will search topics in the current discourse instance, when rendering always prefer to link to the topics you find",
parameters: [
{
name: "search_query",
description:
"Specific keywords to search for, space separated (correct bad spelling, remove connector words)",
type: "string",
},
{
name: "user",
description:
"Filter search results to this username (only include if user explicitly asks to filter by user)",
type: "string",
},
{
name: "order",
description: "search result order",
type: "string",
enum: %w[latest latest_topic oldest views likes],
},
{
name: "max_posts",
description:
"maximum number of posts on the topics (topics where lots of people posted)",
type: "integer",
},
{
name: "tags",
description:
"list of tags to search for. Use + to join with OR, use , to join with AND",
type: "string",
},
{ name: "category", description: "category name to filter to", type: "string" },
{
name: "before",
description: "only topics created before a specific date YYYY-MM-DD",
type: "string",
},
{
name: "after",
description: "only topics created after a specific date YYYY-MM-DD",
type: "string",
},
{
name: "status",
description: "search for topics in a particular state",
type: "string",
enum: %w[open closed archived noreplies single_user],
},
],
}
end
def name
"search_meta_discourse"
end
def custom_system_message
<<~TEXT
You were trained on OLD data, lean on search to get up to date information
Discourse search joins all terms with AND. Reduce and simplify terms to find more results.
TEXT
end
end
def search_args
parameters.slice(:category, :user, :order, :max_posts, :tags, :before, :after, :status)
end
def invoke(bot_user, llm)
search_string =
search_args.reduce(+parameters[:search_query].to_s) do |memo, (key, value)|
return memo if value.blank?
memo << " " << "#{key}:#{value}"
end
@last_query = search_string
yield(I18n.t("discourse_ai.ai_bot.searching", query: search_string))
if options[:base_query].present?
search_string = "#{search_string} #{options[:base_query]}"
end
url = "https://meta.discourse.org/search.json?q=#{CGI.escape(search_string)}"
json = JSON.parse(Net::HTTP.get(URI(url)))
# let's be frugal with tokens, 50 results is too much and stuff gets cut off
max_results = calculate_max_results(llm)
results_limit = parameters[:limit] || max_results
results_limit = max_results if parameters[:limit].to_i > max_results
posts = json["posts"] || []
posts = posts[0..results_limit.to_i - 1]
@last_num_results = posts.length
if posts.blank?
{ args: parameters, rows: [], instruction: "nothing was found, expand your search" }
else
categories = self.class.categories
topics = (json["topics"]).map { |t| [t["id"], t] }.to_h
format_results(posts, args: parameters) do |post|
topic = topics[post["topic_id"]]
category = categories[topic["category_id"]]
category_names = +""
if category["parent_category_id"]
category_names << categories[category["parent_category_id"]]["name"] << " > "
end
category_names << category["name"]
row = {
title: topic["title"],
url: "https://meta.discourse.org/t/-/#{post["topic_id"]}/#{post["post_number"]}",
username: post["username"],
excerpt: post["blurb"],
created: post["created_at"],
category: category_names,
likes: post["like_count"],
tags: topic["tags"].join(", "),
}
row
end
end
end
protected
def self.categories
return @categories if defined?(@categories)
url = "https://meta.discourse.org/site.json"
json = JSON.parse(Net::HTTP.get(URI(url)))
@categories =
json["categories"]
.map do |c|
[c["id"], { "name" => c["name"], "parent_category_id" => c["parent_category_id"] }]
end
.to_h
end
def description_args
{
count: @last_num_results || 0,
query: @last_query || "",
url: "https://meta.discourse.org/search?q=#{CGI.escape(@last_query || "")}",
}
end
private
def calculate_max_results(llm)
max_results = options[:max_results].to_i
return [max_results, 100].min if max_results > 0
if llm.max_prompt_tokens > 30_000
60
elsif llm.max_prompt_tokens > 10_000
40
else
20
end
end
end
end
end
end

View File

@ -88,7 +88,7 @@ module DiscourseAi
end end
def search_args def search_args
parameters.slice(:user, :order, :max_posts, :tags, :before, :after, :status) parameters.slice(:category, :user, :order, :max_posts, :tags, :before, :after, :status)
end end
def invoke(bot_user, llm) def invoke(bot_user, llm)

1
spec/fixtures/search_meta/search.json vendored Normal file

File diff suppressed because one or more lines are too long

1
spec/fixtures/search_meta/site.json vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -144,6 +144,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
DiscourseAi::AiBot::Personas::General, DiscourseAi::AiBot::Personas::General,
DiscourseAi::AiBot::Personas::Artist, DiscourseAi::AiBot::Personas::Artist,
DiscourseAi::AiBot::Personas::Creative, DiscourseAi::AiBot::Personas::Creative,
DiscourseAi::AiBot::Personas::DiscourseHelper,
DiscourseAi::AiBot::Personas::Researcher, DiscourseAi::AiBot::Personas::Researcher,
DiscourseAi::AiBot::Personas::SettingsExplorer, DiscourseAi::AiBot::Personas::SettingsExplorer,
DiscourseAi::AiBot::Personas::SqlHelper, DiscourseAi::AiBot::Personas::SqlHelper,
@ -159,6 +160,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
DiscourseAi::AiBot::Personas::SqlHelper, DiscourseAi::AiBot::Personas::SqlHelper,
DiscourseAi::AiBot::Personas::SettingsExplorer, DiscourseAi::AiBot::Personas::SettingsExplorer,
DiscourseAi::AiBot::Personas::Creative, DiscourseAi::AiBot::Personas::Creative,
DiscourseAi::AiBot::Personas::DiscourseHelper,
) )
AiPersona.find( AiPersona.find(
@ -171,6 +173,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
DiscourseAi::AiBot::Personas::SqlHelper, DiscourseAi::AiBot::Personas::SqlHelper,
DiscourseAi::AiBot::Personas::SettingsExplorer, DiscourseAi::AiBot::Personas::SettingsExplorer,
DiscourseAi::AiBot::Personas::Creative, DiscourseAi::AiBot::Personas::Creative,
DiscourseAi::AiBot::Personas::DiscourseHelper,
) )
end end
end end

View File

@ -0,0 +1,68 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::AiBot::Tools::DiscourseMetaSearch do
before do
SiteSetting.ai_bot_enabled = true
SiteSetting.ai_openai_api_key = "asd"
end
let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) }
let(:llm) { DiscourseAi::Completions::Llm.proxy("open_ai:gpt-3.5-turbo") }
let(:progress_blk) { Proc.new {} }
let(:mock_search_json) do
File.read(File.expand_path("../../../../../fixtures/search_meta/search.json", __FILE__))
end
let(:mock_site_json) do
File.read(File.expand_path("../../../../../fixtures/search_meta/site.json", __FILE__))
end
before do
stub_request(:get, "https://meta.discourse.org/site.json").to_return(
status: 200,
body: mock_site_json,
headers: {
},
)
end
it "searches meta.discourse.org" do
stub_request(:get, "https://meta.discourse.org/search.json?q=test").to_return(
status: 200,
body: mock_search_json,
headers: {
},
)
search = described_class.new({ search_query: "test" })
results = search.invoke(bot_user, llm, &progress_blk)
expect(results[:rows].length).to eq(20)
expect(results[:rows].first[results[:column_names].index("category")]).to eq(
"documentation > developers",
)
end
it "passes on all search parameters" do
url =
"https://meta.discourse.org/search.json?q=test%20category:test%20user:test%20order:test%20max_posts:1%20tags:test%20before:test%20after:test%20status:test"
stub_request(:get, url).to_return(status: 200, body: mock_search_json, headers: {})
params =
described_class.signature[:parameters]
.map do |param|
if param[:type] == "integer"
[param[:name], 1]
else
[param[:name], "test"]
end
end
.to_h
.symbolize_keys
search = described_class.new(params)
results = search.invoke(bot_user, llm, &progress_blk)
expect(results[:args]).to eq(params)
end
end

View File

@ -37,7 +37,7 @@ RSpec.describe DiscourseAi::AiBot::Tools::Search do
search_post = Fabricate(:post, topic: topic_with_tags) search_post = Fabricate(:post, topic: topic_with_tags)
bot_post = Fabricate(:post) _bot_post = Fabricate(:post)
search = described_class.new({ order: "latest" }, persona_options: persona_options) search = described_class.new({ order: "latest" }, persona_options: persona_options)
@ -53,7 +53,7 @@ RSpec.describe DiscourseAi::AiBot::Tools::Search do
end end
it "can handle no results" do it "can handle no results" do
post1 = Fabricate(:post, topic: topic_with_tags) _post1 = Fabricate(:post, topic: topic_with_tags)
search = described_class.new({ search_query: "ABDDCDCEDGDG", order: "fake" }) search = described_class.new({ search_query: "ABDDCDCEDGDG", order: "fake" })
results = search.invoke(bot_user, llm, &progress_blk) results = search.invoke(bot_user, llm, &progress_blk)
@ -107,6 +107,25 @@ RSpec.describe DiscourseAi::AiBot::Tools::Search do
expect(results[:rows].to_s).to include("/subfolder" + post1.url) expect(results[:rows].to_s).to include("/subfolder" + post1.url)
end end
it "passes on all search params" do
params =
described_class.signature[:parameters]
.map do |param|
if param[:type] == "integer"
[param[:name], 1]
else
[param[:name], "test"]
end
end
.to_h
.symbolize_keys
search = described_class.new(params)
results = search.invoke(bot_user, llm, &progress_blk)
expect(results[:args]).to eq(params)
end
it "returns rich topic information" do it "returns rich topic information" do
post1 = Fabricate(:post, like_count: 1, topic: topic_with_tags) post1 = Fabricate(:post, like_count: 1, topic: topic_with_tags)
search = described_class.new({ user: post1.user.username }) search = described_class.new({ user: post1.user.username })