FEATURE: new Discourse Helper persona (#473)
This persona searches Discourse Meta for help with Discourse and points users at relevant posts. It is somewhat similar to using "Forum Helper" on meta, with the notable difference that we can not lean on semantic search so using some prompt engineering we try to keep it simple.
This commit is contained in:
parent
dd6b073fc3
commit
0fb87b00e2
|
@ -170,6 +170,9 @@ en:
|
|||
dall_e3:
|
||||
name: "DALL-E 3"
|
||||
description: "AI Bot specialized in generating images using DALL-E 3"
|
||||
discourse_helper:
|
||||
name: "Discourse Helper"
|
||||
description: "AI Bot specialized in helping with Discourse related tasks"
|
||||
topic_not_found: "Summary unavailable, topic not found!"
|
||||
summarizing: "Summarizing topic"
|
||||
searching: "Searching for: '%{query}'"
|
||||
|
@ -195,6 +198,7 @@ en:
|
|||
schema: "Look up database schema"
|
||||
search_settings: "Searching site settings"
|
||||
dall_e: "Generate image"
|
||||
search_meta_discourse: "Search Meta Discourse"
|
||||
command_help:
|
||||
random_picker: "Pick a random number or a random element of a list"
|
||||
categories: "List all publicly visible categories on the forum"
|
||||
|
@ -209,6 +213,7 @@ en:
|
|||
schema: "Look up database schema"
|
||||
search_settings: "Search site settings"
|
||||
dall_e: "Generate image using DALL-E 3"
|
||||
search_meta_discourse: "Search Meta Discourse"
|
||||
command_description:
|
||||
random_picker: "Picking from %{options}, picked: %{result}"
|
||||
read: "Reading: <a href='%{url}'>%{title}</a>"
|
||||
|
@ -225,6 +230,9 @@ en:
|
|||
search:
|
||||
one: "Found %{count} <a href='%{url}'>result</a> for '%{query}'"
|
||||
other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'"
|
||||
search_meta_discourse:
|
||||
one: "Found %{count} <a href='%{url}'>result</a> for '%{query}'"
|
||||
other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'"
|
||||
google:
|
||||
one: "Found %{count} <a href='%{url}'>result</a> for '%{query}'"
|
||||
other: "Found %{count} <a href='%{url}'>results</a> for '%{query}'"
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
#frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module AiBot
|
||||
module Personas
|
||||
class DiscourseHelper < Persona
|
||||
def tools
|
||||
[Tools::DiscourseMetaSearch]
|
||||
end
|
||||
|
||||
def system_prompt
|
||||
<<~PROMPT
|
||||
You are Discourse Helper Bot
|
||||
|
||||
- Discourse Helper Bot understand *markdown* and responds in Discourse **markdown**.
|
||||
- Discourse Helper Bot has access to the search function on meta.discourse.org and can help you find answers to your questions.
|
||||
- Discourse Helper Bot ALWAYS backs up answers with actual search results from meta.discourse.org, even if the information is in your training set
|
||||
- Discourse Helper Bot does not use the word siscourse in searches, search function is restricted to Discourse Meta and Discourse specific discussions
|
||||
- Discourse Helper Bot understands that search is keyword based (terms are joined using AND) and that it is important to simplify search terms to find things.
|
||||
- Discourse Helper Bot understands that users often badly phrase and misspell words, it will compensate for that by guessing what user means.
|
||||
|
||||
Example:
|
||||
|
||||
User asks:
|
||||
|
||||
"I am on the discourse standad plan how do I enable badge sqls"
|
||||
attempt #1: "badge sql standard"
|
||||
attempt #2: "badge sql hosted"
|
||||
|
||||
User asks:
|
||||
|
||||
"how do i embed a discourse topic as an iframe"
|
||||
attempt #1: "topic embed iframe"
|
||||
attempt #2: "iframe"
|
||||
|
||||
- Discourse Helper Bot ALWAYS SEARCHES TWICE, even if a great result shows up in the first search, it will search a second time using a wider net to make sure you are getting the best result.
|
||||
|
||||
Some popular categories on meta are: bug, feature, support, ux, dev, documentation, announcements, marketplace, theme, plugin, theme-component, migration, installation.
|
||||
|
||||
- Discourse Helper Bot will lean on categories to filter results.
|
||||
|
||||
The date now is: {time}, much has changed since you were trained.
|
||||
PROMPT
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -14,6 +14,7 @@ module DiscourseAi
|
|||
Personas::Researcher => -5,
|
||||
Personas::Creative => -6,
|
||||
Personas::DallE3 => -7,
|
||||
Personas::DiscourseHelper => -8,
|
||||
}
|
||||
end
|
||||
|
||||
|
@ -62,6 +63,7 @@ module DiscourseAi
|
|||
Tools::Summarize,
|
||||
Tools::SettingContext,
|
||||
Tools::RandomPicker,
|
||||
Tools::DiscourseMetaSearch,
|
||||
]
|
||||
|
||||
tools << Tools::ListTags if SiteSetting.tagging_enabled
|
||||
|
|
|
@ -0,0 +1,180 @@
|
|||
#frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module AiBot
|
||||
module Tools
|
||||
class DiscourseMetaSearch < Tool
|
||||
class << self
|
||||
def signature
|
||||
{
|
||||
name: name,
|
||||
description:
|
||||
"Will search topics in the current discourse instance, when rendering always prefer to link to the topics you find",
|
||||
parameters: [
|
||||
{
|
||||
name: "search_query",
|
||||
description:
|
||||
"Specific keywords to search for, space separated (correct bad spelling, remove connector words)",
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
name: "user",
|
||||
description:
|
||||
"Filter search results to this username (only include if user explicitly asks to filter by user)",
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
name: "order",
|
||||
description: "search result order",
|
||||
type: "string",
|
||||
enum: %w[latest latest_topic oldest views likes],
|
||||
},
|
||||
{
|
||||
name: "max_posts",
|
||||
description:
|
||||
"maximum number of posts on the topics (topics where lots of people posted)",
|
||||
type: "integer",
|
||||
},
|
||||
{
|
||||
name: "tags",
|
||||
description:
|
||||
"list of tags to search for. Use + to join with OR, use , to join with AND",
|
||||
type: "string",
|
||||
},
|
||||
{ name: "category", description: "category name to filter to", type: "string" },
|
||||
{
|
||||
name: "before",
|
||||
description: "only topics created before a specific date YYYY-MM-DD",
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
name: "after",
|
||||
description: "only topics created after a specific date YYYY-MM-DD",
|
||||
type: "string",
|
||||
},
|
||||
{
|
||||
name: "status",
|
||||
description: "search for topics in a particular state",
|
||||
type: "string",
|
||||
enum: %w[open closed archived noreplies single_user],
|
||||
},
|
||||
],
|
||||
}
|
||||
end
|
||||
|
||||
def name
|
||||
"search_meta_discourse"
|
||||
end
|
||||
|
||||
def custom_system_message
|
||||
<<~TEXT
|
||||
You were trained on OLD data, lean on search to get up to date information
|
||||
Discourse search joins all terms with AND. Reduce and simplify terms to find more results.
|
||||
TEXT
|
||||
end
|
||||
end
|
||||
|
||||
def search_args
|
||||
parameters.slice(:category, :user, :order, :max_posts, :tags, :before, :after, :status)
|
||||
end
|
||||
|
||||
def invoke(bot_user, llm)
|
||||
search_string =
|
||||
search_args.reduce(+parameters[:search_query].to_s) do |memo, (key, value)|
|
||||
return memo if value.blank?
|
||||
memo << " " << "#{key}:#{value}"
|
||||
end
|
||||
|
||||
@last_query = search_string
|
||||
|
||||
yield(I18n.t("discourse_ai.ai_bot.searching", query: search_string))
|
||||
|
||||
if options[:base_query].present?
|
||||
search_string = "#{search_string} #{options[:base_query]}"
|
||||
end
|
||||
|
||||
url = "https://meta.discourse.org/search.json?q=#{CGI.escape(search_string)}"
|
||||
|
||||
json = JSON.parse(Net::HTTP.get(URI(url)))
|
||||
|
||||
# let's be frugal with tokens, 50 results is too much and stuff gets cut off
|
||||
max_results = calculate_max_results(llm)
|
||||
results_limit = parameters[:limit] || max_results
|
||||
results_limit = max_results if parameters[:limit].to_i > max_results
|
||||
|
||||
posts = json["posts"] || []
|
||||
posts = posts[0..results_limit.to_i - 1]
|
||||
|
||||
@last_num_results = posts.length
|
||||
|
||||
if posts.blank?
|
||||
{ args: parameters, rows: [], instruction: "nothing was found, expand your search" }
|
||||
else
|
||||
categories = self.class.categories
|
||||
topics = (json["topics"]).map { |t| [t["id"], t] }.to_h
|
||||
|
||||
format_results(posts, args: parameters) do |post|
|
||||
topic = topics[post["topic_id"]]
|
||||
|
||||
category = categories[topic["category_id"]]
|
||||
category_names = +""
|
||||
if category["parent_category_id"]
|
||||
category_names << categories[category["parent_category_id"]]["name"] << " > "
|
||||
end
|
||||
category_names << category["name"]
|
||||
row = {
|
||||
title: topic["title"],
|
||||
url: "https://meta.discourse.org/t/-/#{post["topic_id"]}/#{post["post_number"]}",
|
||||
username: post["username"],
|
||||
excerpt: post["blurb"],
|
||||
created: post["created_at"],
|
||||
category: category_names,
|
||||
likes: post["like_count"],
|
||||
tags: topic["tags"].join(", "),
|
||||
}
|
||||
|
||||
row
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def self.categories
|
||||
return @categories if defined?(@categories)
|
||||
url = "https://meta.discourse.org/site.json"
|
||||
json = JSON.parse(Net::HTTP.get(URI(url)))
|
||||
@categories =
|
||||
json["categories"]
|
||||
.map do |c|
|
||||
[c["id"], { "name" => c["name"], "parent_category_id" => c["parent_category_id"] }]
|
||||
end
|
||||
.to_h
|
||||
end
|
||||
|
||||
def description_args
|
||||
{
|
||||
count: @last_num_results || 0,
|
||||
query: @last_query || "",
|
||||
url: "https://meta.discourse.org/search?q=#{CGI.escape(@last_query || "")}",
|
||||
}
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def calculate_max_results(llm)
|
||||
max_results = options[:max_results].to_i
|
||||
return [max_results, 100].min if max_results > 0
|
||||
|
||||
if llm.max_prompt_tokens > 30_000
|
||||
60
|
||||
elsif llm.max_prompt_tokens > 10_000
|
||||
40
|
||||
else
|
||||
20
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -88,7 +88,7 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
def search_args
|
||||
parameters.slice(:user, :order, :max_posts, :tags, :before, :after, :status)
|
||||
parameters.slice(:category, :user, :order, :max_posts, :tags, :before, :after, :status)
|
||||
end
|
||||
|
||||
def invoke(bot_user, llm)
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -144,6 +144,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
|
|||
DiscourseAi::AiBot::Personas::General,
|
||||
DiscourseAi::AiBot::Personas::Artist,
|
||||
DiscourseAi::AiBot::Personas::Creative,
|
||||
DiscourseAi::AiBot::Personas::DiscourseHelper,
|
||||
DiscourseAi::AiBot::Personas::Researcher,
|
||||
DiscourseAi::AiBot::Personas::SettingsExplorer,
|
||||
DiscourseAi::AiBot::Personas::SqlHelper,
|
||||
|
@ -159,6 +160,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
|
|||
DiscourseAi::AiBot::Personas::SqlHelper,
|
||||
DiscourseAi::AiBot::Personas::SettingsExplorer,
|
||||
DiscourseAi::AiBot::Personas::Creative,
|
||||
DiscourseAi::AiBot::Personas::DiscourseHelper,
|
||||
)
|
||||
|
||||
AiPersona.find(
|
||||
|
@ -171,6 +173,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
|
|||
DiscourseAi::AiBot::Personas::SqlHelper,
|
||||
DiscourseAi::AiBot::Personas::SettingsExplorer,
|
||||
DiscourseAi::AiBot::Personas::Creative,
|
||||
DiscourseAi::AiBot::Personas::DiscourseHelper,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
# frozen_string_literal: true
|
||||
RSpec.describe DiscourseAi::AiBot::Tools::DiscourseMetaSearch do
|
||||
before do
|
||||
SiteSetting.ai_bot_enabled = true
|
||||
SiteSetting.ai_openai_api_key = "asd"
|
||||
end
|
||||
|
||||
let(:bot_user) { User.find(DiscourseAi::AiBot::EntryPoint::GPT3_5_TURBO_ID) }
|
||||
let(:llm) { DiscourseAi::Completions::Llm.proxy("open_ai:gpt-3.5-turbo") }
|
||||
let(:progress_blk) { Proc.new {} }
|
||||
|
||||
let(:mock_search_json) do
|
||||
File.read(File.expand_path("../../../../../fixtures/search_meta/search.json", __FILE__))
|
||||
end
|
||||
|
||||
let(:mock_site_json) do
|
||||
File.read(File.expand_path("../../../../../fixtures/search_meta/site.json", __FILE__))
|
||||
end
|
||||
|
||||
before do
|
||||
stub_request(:get, "https://meta.discourse.org/site.json").to_return(
|
||||
status: 200,
|
||||
body: mock_site_json,
|
||||
headers: {
|
||||
},
|
||||
)
|
||||
end
|
||||
|
||||
it "searches meta.discourse.org" do
|
||||
stub_request(:get, "https://meta.discourse.org/search.json?q=test").to_return(
|
||||
status: 200,
|
||||
body: mock_search_json,
|
||||
headers: {
|
||||
},
|
||||
)
|
||||
|
||||
search = described_class.new({ search_query: "test" })
|
||||
results = search.invoke(bot_user, llm, &progress_blk)
|
||||
expect(results[:rows].length).to eq(20)
|
||||
|
||||
expect(results[:rows].first[results[:column_names].index("category")]).to eq(
|
||||
"documentation > developers",
|
||||
)
|
||||
end
|
||||
|
||||
it "passes on all search parameters" do
|
||||
url =
|
||||
"https://meta.discourse.org/search.json?q=test%20category:test%20user:test%20order:test%20max_posts:1%20tags:test%20before:test%20after:test%20status:test"
|
||||
|
||||
stub_request(:get, url).to_return(status: 200, body: mock_search_json, headers: {})
|
||||
params =
|
||||
described_class.signature[:parameters]
|
||||
.map do |param|
|
||||
if param[:type] == "integer"
|
||||
[param[:name], 1]
|
||||
else
|
||||
[param[:name], "test"]
|
||||
end
|
||||
end
|
||||
.to_h
|
||||
.symbolize_keys
|
||||
|
||||
search = described_class.new(params)
|
||||
results = search.invoke(bot_user, llm, &progress_blk)
|
||||
|
||||
expect(results[:args]).to eq(params)
|
||||
end
|
||||
end
|
|
@ -37,7 +37,7 @@ RSpec.describe DiscourseAi::AiBot::Tools::Search do
|
|||
|
||||
search_post = Fabricate(:post, topic: topic_with_tags)
|
||||
|
||||
bot_post = Fabricate(:post)
|
||||
_bot_post = Fabricate(:post)
|
||||
|
||||
search = described_class.new({ order: "latest" }, persona_options: persona_options)
|
||||
|
||||
|
@ -53,7 +53,7 @@ RSpec.describe DiscourseAi::AiBot::Tools::Search do
|
|||
end
|
||||
|
||||
it "can handle no results" do
|
||||
post1 = Fabricate(:post, topic: topic_with_tags)
|
||||
_post1 = Fabricate(:post, topic: topic_with_tags)
|
||||
search = described_class.new({ search_query: "ABDDCDCEDGDG", order: "fake" })
|
||||
|
||||
results = search.invoke(bot_user, llm, &progress_blk)
|
||||
|
@ -107,6 +107,25 @@ RSpec.describe DiscourseAi::AiBot::Tools::Search do
|
|||
expect(results[:rows].to_s).to include("/subfolder" + post1.url)
|
||||
end
|
||||
|
||||
it "passes on all search params" do
|
||||
params =
|
||||
described_class.signature[:parameters]
|
||||
.map do |param|
|
||||
if param[:type] == "integer"
|
||||
[param[:name], 1]
|
||||
else
|
||||
[param[:name], "test"]
|
||||
end
|
||||
end
|
||||
.to_h
|
||||
.symbolize_keys
|
||||
|
||||
search = described_class.new(params)
|
||||
results = search.invoke(bot_user, llm, &progress_blk)
|
||||
|
||||
expect(results[:args]).to eq(params)
|
||||
end
|
||||
|
||||
it "returns rich topic information" do
|
||||
post1 = Fabricate(:post, like_count: 1, topic: topic_with_tags)
|
||||
search = described_class.new({ user: post1.user.username })
|
||||
|
|
Loading…
Reference in New Issue