mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-07-09 15:43:28 +00:00
182 lines
6.1 KiB
Ruby
182 lines
6.1 KiB
Ruby
|
# frozen_string_literal: true
|
||
|
|
||
|
module DiscourseAi
|
||
|
module Personas
|
||
|
module Tools
|
||
|
class Researcher < Tool
|
||
|
attr_reader :filter, :result_count, :goals, :dry_run
|
||
|
|
||
|
class << self
|
||
|
def signature
|
||
|
{
|
||
|
name: name,
|
||
|
description:
|
||
|
"Analyze and extract information from content across the forum based on specified filters",
|
||
|
parameters: [
|
||
|
{ name: "filter", description: filter_description, type: "string" },
|
||
|
{
|
||
|
name: "goals",
|
||
|
description:
|
||
|
"The specific information you want to extract or analyze from the filtered content, you may specify multiple goals",
|
||
|
type: "string",
|
||
|
},
|
||
|
{
|
||
|
name: "dry_run",
|
||
|
description: "When true, only count matching items without processing data",
|
||
|
type: "boolean",
|
||
|
},
|
||
|
],
|
||
|
}
|
||
|
end
|
||
|
|
||
|
def filter_description
|
||
|
<<~TEXT
|
||
|
Filter string to target specific content.
|
||
|
- Supports user (@username)
|
||
|
- date ranges (after:YYYY-MM-DD, before:YYYY-MM-DD for posts; topic_after:YYYY-MM-DD, topic_before:YYYY-MM-DD for topics)
|
||
|
- categories (category:category1,category2)
|
||
|
- tags (tag:tag1,tag2)
|
||
|
- groups (group:group1,group2).
|
||
|
- status (status:open, status:closed, status:archived, status:noreplies, status:single_user)
|
||
|
- keywords (keywords:keyword1,keyword2) - specific words to search for in posts
|
||
|
- max_results (max_results:10) the maximum number of results to return (optional)
|
||
|
- order (order:latest, order:oldest, order:latest_topic, order:oldest_topic) - the order of the results (optional)
|
||
|
|
||
|
If multiple tags or categories are specified, they are treated as OR conditions.
|
||
|
|
||
|
Multiple filters can be combined with spaces. Example: '@sam after:2023-01-01 tag:feature'
|
||
|
TEXT
|
||
|
end
|
||
|
|
||
|
def name
|
||
|
"researcher"
|
||
|
end
|
||
|
|
||
|
def accepted_options
|
||
|
[
|
||
|
option(:max_results, type: :integer),
|
||
|
option(:include_private, type: :boolean),
|
||
|
option(:max_tokens_per_post, type: :integer),
|
||
|
]
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def invoke(&blk)
|
||
|
max_results = options[:max_results] || 1000
|
||
|
|
||
|
@filter = parameters[:filter] || ""
|
||
|
@goals = parameters[:goals] || ""
|
||
|
@dry_run = parameters[:dry_run].nil? ? false : parameters[:dry_run]
|
||
|
|
||
|
post = Post.find_by(id: context.post_id)
|
||
|
goals = parameters[:goals] || ""
|
||
|
dry_run = parameters[:dry_run].nil? ? false : parameters[:dry_run]
|
||
|
|
||
|
return { error: "No goals provided" } if goals.blank?
|
||
|
return { error: "No filter provided" } if @filter.blank?
|
||
|
|
||
|
guardian = nil
|
||
|
guardian = Guardian.new(context.user) if options[:include_private]
|
||
|
|
||
|
filter =
|
||
|
DiscourseAi::Utils::Research::Filter.new(
|
||
|
@filter,
|
||
|
limit: max_results,
|
||
|
guardian: guardian,
|
||
|
)
|
||
|
@result_count = filter.search.count
|
||
|
|
||
|
blk.call details
|
||
|
|
||
|
if dry_run
|
||
|
{ dry_run: true, goals: goals, filter: @filter, number_of_results: @result_count }
|
||
|
else
|
||
|
process_filter(filter, goals, post, &blk)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def details
|
||
|
if @dry_run
|
||
|
I18n.t("discourse_ai.ai_bot.tool_description.researcher_dry_run", description_args)
|
||
|
else
|
||
|
I18n.t("discourse_ai.ai_bot.tool_description.researcher", description_args)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def description_args
|
||
|
{ count: @result_count || 0, filter: @filter || "", goals: @goals || "" }
|
||
|
end
|
||
|
|
||
|
protected
|
||
|
|
||
|
MIN_TOKENS_FOR_RESEARCH = 8000
|
||
|
def process_filter(filter, goals, post, &blk)
|
||
|
if llm.max_prompt_tokens < MIN_TOKENS_FOR_RESEARCH
|
||
|
raise ArgumentError,
|
||
|
"LLM max tokens too low for research. Minimum is #{MIN_TOKENS_FOR_RESEARCH}."
|
||
|
end
|
||
|
formatter =
|
||
|
DiscourseAi::Utils::Research::LlmFormatter.new(
|
||
|
filter,
|
||
|
max_tokens_per_batch: llm.max_prompt_tokens - 2000,
|
||
|
tokenizer: llm.tokenizer,
|
||
|
max_tokens_per_post: options[:max_tokens_per_post] || 2000,
|
||
|
)
|
||
|
|
||
|
results = []
|
||
|
|
||
|
formatter.each_chunk { |chunk| results << run_inference(chunk[:text], goals, post, &blk) }
|
||
|
{ dry_run: false, goals: goals, filter: @filter, results: results }
|
||
|
end
|
||
|
|
||
|
def run_inference(chunk_text, goals, post, &blk)
|
||
|
system_prompt = goal_system_prompt(goals)
|
||
|
user_prompt = goal_user_prompt(goals, chunk_text)
|
||
|
|
||
|
prompt =
|
||
|
DiscourseAi::Completions::Prompt.new(
|
||
|
system_prompt,
|
||
|
messages: [{ type: :user, content: user_prompt }],
|
||
|
post_id: post.id,
|
||
|
topic_id: post.topic_id,
|
||
|
)
|
||
|
|
||
|
results = []
|
||
|
llm.generate(
|
||
|
prompt,
|
||
|
user: post.user,
|
||
|
feature_name: context.feature_name,
|
||
|
cancel_manager: context.cancel_manager,
|
||
|
) { |partial| results << partial }
|
||
|
|
||
|
@progress_dots ||= 0
|
||
|
@progress_dots += 1
|
||
|
blk.call(details + "\n\n#{"." * @progress_dots}")
|
||
|
results.join
|
||
|
end
|
||
|
|
||
|
def goal_system_prompt(goals)
|
||
|
<<~TEXT
|
||
|
You are a researcher tool designed to analyze and extract information from forum content.
|
||
|
Your task is to process the provided content and extract relevant information based on the specified goal.
|
||
|
|
||
|
Your goal is: #{goals}
|
||
|
TEXT
|
||
|
end
|
||
|
|
||
|
def goal_user_prompt(goals, chunk_text)
|
||
|
<<~TEXT
|
||
|
Here is the content to analyze:
|
||
|
|
||
|
{{{
|
||
|
#{chunk_text}
|
||
|
}}}
|
||
|
|
||
|
Your goal is: #{goals}
|
||
|
TEXT
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|