FEATURE: LLM based peroidical summary report (#357)

Introduce a Discourse Automation based periodical report. Depends on Discourse Automation.

Report works best with very large context language models such as GPT-4-Turbo and Claude 2.

- Introduces final_insts to generic llm format, for claude to work best it is better to guide the last assistant message (we should add this to other spots as well)
- Adds GPT-4 turbo support to generic llm interface
This commit is contained in:
Sam 2023-12-19 12:04:15 +11:00 committed by GitHub
parent e0bf6adb5b
commit d0f54443ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 955 additions and 215 deletions

View File

@ -6,12 +6,59 @@ en:
discourse_ai: "Discourse AI" discourse_ai: "Discourse AI"
js: js:
discourse_automation: discourse_automation:
ai_models:
gpt_4_turbo: GPT 4 Turbo
gpt_4: GPT 4
gpt_3_5_turbo: GPT 3.5 Turbo
claude_2: Claude 2
gemini_pro: Gemini Pro
scriptables: scriptables:
llm_report:
fields:
sender:
label: "Sender"
description: "The user that will send the report"
receivers:
label: "Receivers"
description: "The users that will receive the report (can be email or usernames)"
title:
label: "Title"
description: "The title of the report"
days:
label: "Days"
description: "The timespan of the report"
offset:
label: "Offset"
description: "When testing you may want to run the report historically, use offset to start the report in an earlier date"
instructions:
label: "Instructions"
description: "The instructions provided to the large language model"
sample_size:
label: "Sample Size"
description: "The number of posts to sample for the report"
tokens_per_post:
label: "Tokens per post"
description: "The number of llm tokens to use per post"
model:
label: "Model"
description: "LLM to use for report generation"
categories:
label: "Categories"
description: "Filter topics only to these category"
tags:
label: "Tags"
description: "Filter topics only to these tags"
allow_secure_categories:
label: "Allow secure categories"
description: "Allow the report to be generated for topics in secure categories"
debug_mode:
label: "Debug Mode"
description: "Enable debug mode to see the raw input and output of the LLM"
priority_group:
label: "Priority Group"
description: "Priotize content from this group in the report"
llm_triage: llm_triage:
models:
gpt_4: GPT 4
gpt_3_5_turbo: GPT 3.5 Turbo
claude_2: Claude 2
fields: fields:
system_prompt: system_prompt:
label: "System Prompt" label: "System Prompt"

View File

@ -5,6 +5,9 @@ en:
title: Triage posts using AI title: Triage posts using AI
description: "Triage posts using a large language model" description: "Triage posts using a large language model"
system_prompt_missing_post_placeholder: "System prompt must contain a placeholder for the post: %%POST%%" system_prompt_missing_post_placeholder: "System prompt must contain a placeholder for the post: %%POST%%"
llm_report:
title: Periodic report using AI
description: "Periodic report based on a large language model"
site_settings: site_settings:
discourse_ai_enabled: "Enable the discourse AI plugin." discourse_ai_enabled: "Enable the discourse AI plugin."
ai_toxicity_enabled: "Enable the toxicity module." ai_toxicity_enabled: "Enable the toxicity module."

View File

@ -0,0 +1,81 @@
# frozen_string_literal: true
if defined?(DiscourseAutomation)
module DiscourseAutomation::LlmReport
end
DiscourseAutomation::Scriptable::LLM_REPORT = "llm_report"
DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_REPORT) do
version 1
triggerables %i[recurring]
field :sender, component: :user, required: true
field :receivers, component: :users, required: true
field :title, component: :text, required: true
field :days, component: :text, required: true, default_value: 7
field :offset, component: :text, required: true, default_value: 0
field :instructions,
component: :message,
required: true,
default_value: DiscourseAi::Automation::ReportRunner.default_instructions
field :sample_size, component: :text, required: true, default_value: 100
field :tokens_per_post, component: :text, required: true, default_value: 150
field :model,
component: :choices,
required: true,
extra: {
content: DiscourseAi::Automation::AVAILABLE_MODELS,
}
field :priority_group, component: :group
field :categories, component: :categories
field :tags, component: :tags
field :allow_secure_categories, component: :boolean
field :debug_mode, component: :boolean
script do |context, fields, automation|
begin
sender = fields.dig("sender", "value")
receivers = fields.dig("receivers", "value")
title = fields.dig("title", "value")
model = fields.dig("model", "value")
category_ids = fields.dig("categories", "value")
tags = fields.dig("tags", "value")
allow_secure_categories = !!fields.dig("allow_secure_categories", "value")
debug_mode = !!fields.dig("debug_mode", "value")
sample_size = fields.dig("sample_size", "value")
instructions = fields.dig("instructions", "value")
days = fields.dig("days", "value")
offset = fields.dig("offset", "value").to_i
priority_group = fields.dig("priority_group", "value")
tokens_per_post = fields.dig("tokens_per_post", "value")
DiscourseAi::Automation::ReportRunner.run!(
sender_username: sender,
receivers: receivers,
title: title,
model: model,
category_ids: category_ids,
tags: tags,
allow_secure_categories: allow_secure_categories,
debug_mode: debug_mode,
sample_size: sample_size,
instructions: instructions,
days: days,
offset: offset,
priority_group_id: priority_group,
tokens_per_post: tokens_per_post,
)
rescue => e
Discourse.warn_exception e, message: "Error running LLM report!"
if Rails.env.development?
p e
puts e.backtrace
end
end
end
end
end

View File

@ -1,104 +1,8 @@
# frozen_string_literal: true # frozen_string_literal: true
if defined?(DiscourseAutomation) if defined?(DiscourseAutomation)
module DiscourseAutomation::LlmTriage
def self.handle(
post:,
model:,
search_for_text:,
system_prompt:,
category_id: nil,
tags: nil,
canned_reply: nil,
canned_reply_user: nil,
hide_topic: nil
)
if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank?
raise ArgumentError, "llm_triage: no action specified!"
end
post_template = +""
post_template << "title: #{post.topic.title}\n"
post_template << "#{post.raw}"
filled_system_prompt = system_prompt.sub("%%POST%%", post_template)
if filled_system_prompt == system_prompt
raise ArgumentError, "llm_triage: system_prompt does not contain %%POST%% placeholder"
end
result = nil
if model == "claude-2"
# allowing double + 10 tokens
# technically maybe just token count is fine, but this will allow for more creative bad responses
result =
DiscourseAi::Inference::AnthropicCompletions.perform!(
filled_system_prompt,
model,
temperature: 0,
max_tokens:
DiscourseAi::Tokenizer::AnthropicTokenizer.tokenize(search_for_text).length * 2 + 10,
).dig(:completion)
else
result =
DiscourseAi::Inference::OpenAiCompletions.perform!(
[{ :role => "system", "content" => filled_system_prompt }],
model,
temperature: 0,
max_tokens:
DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(search_for_text).length * 2 + 10,
).dig(:choices, 0, :message, :content)
end
if result.strip == search_for_text.strip
user = User.find_by_username(canned_reply_user) if canned_reply_user.present?
user = user || Discourse.system_user
if canned_reply.present?
PostCreator.create!(
user,
topic_id: post.topic_id,
raw: canned_reply,
reply_to_post_number: post.post_number,
skip_validations: true,
)
end
changes = {}
changes[:category_id] = category_id if category_id.present?
changes[:tags] = tags if SiteSetting.tagging_enabled? && tags.present?
if changes.present?
first_post = post.topic.posts.where(post_number: 1).first
changes[:bypass_bump] = true
changes[:skip_validations] = true
first_post.revise(Discourse.system_user, changes)
end
post.topic.update!(visible: false) if hide_topic
end
end
end
DiscourseAutomation::Scriptable::LLM_TRIAGE = "llm_triage" DiscourseAutomation::Scriptable::LLM_TRIAGE = "llm_triage"
AVAILABLE_MODELS = [
{
id: "gpt-4",
name:
"discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_4",
},
{
id: "gpt-3-5-turbo",
name:
"discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_3_5_turbo",
},
{
id: "claude-2",
name:
"discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.claude_2",
},
]
DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_TRIAGE) do DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_TRIAGE) do
version 1 version 1
run_in_background run_in_background
@ -119,7 +23,12 @@ if defined?(DiscourseAutomation)
end, end,
accepts_placeholders: true accepts_placeholders: true
field :search_for_text, component: :text, required: true field :search_for_text, component: :text, required: true
field :model, component: :choices, required: true, extra: { content: AVAILABLE_MODELS } field :model,
component: :choices,
required: true,
extra: {
content: DiscourseAi::Automation::AVAILABLE_MODELS,
}
field :category, component: :category field :category, component: :category
field :tags, component: :tags field :tags, component: :tags
field :hide_topic, component: :boolean field :hide_topic, component: :boolean
@ -149,7 +58,7 @@ if defined?(DiscourseAutomation)
end end
begin begin
DiscourseAutomation::LlmTriage.handle( DiscourseAi::Automation::LlmTriage.handle(
post: post, post: post,
model: model, model: model,
search_for_text: search_for_text, search_for_text: search_for_text,

13
lib/automation.rb Normal file
View File

@ -0,0 +1,13 @@
# frozen_string_literal: true
module DiscourseAi
module Automation
AVAILABLE_MODELS = [
{ id: "gpt-4-turbo", name: "discourse_automation.ai_models.gpt_4_turbo" },
{ id: "gpt-4", name: "discourse_automation.ai_models.gpt_4" },
{ id: "gpt-3-5-turbo", name: "discourse_automation.ai_models.gpt_3_5_turbo" },
{ id: "claude-2", name: "discourse_automation.ai_models.claude_2" },
{ id: "gemini-pro", name: "discourse_automation.ai_models.gemini_pro" },
]
end
end

View File

@ -0,0 +1,75 @@
# frozen_string_literal: true
#
module DiscourseAi
module Automation
module LlmTriage
def self.handle(
post:,
model:,
search_for_text:,
system_prompt:,
category_id: nil,
tags: nil,
canned_reply: nil,
canned_reply_user: nil,
hide_topic: nil
)
if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank?
raise ArgumentError, "llm_triage: no action specified!"
end
post_template = +""
post_template << "title: #{post.topic.title}\n"
post_template << "#{post.raw}"
filled_system_prompt = system_prompt.sub("%%POST%%", post_template)
if filled_system_prompt == system_prompt
raise ArgumentError, "llm_triage: system_prompt does not contain %%POST%% placeholder"
end
result = nil
llm = DiscourseAi::Completions::Llm.proxy(model)
prompt = {
insts: filled_system_prompt,
params: {
model => {
max_tokens: (llm.tokenizer.tokenize(search_for_text).length * 2 + 10),
temperature: 0,
},
},
}
result = llm.completion!(prompt, Discourse.system_user)
if result.strip == search_for_text.strip
user = User.find_by_username(canned_reply_user) if canned_reply_user.present?
user = user || Discourse.system_user
if canned_reply.present?
PostCreator.create!(
user,
topic_id: post.topic_id,
raw: canned_reply,
reply_to_post_number: post.post_number,
skip_validations: true,
)
end
changes = {}
changes[:category_id] = category_id if category_id.present?
changes[:tags] = tags if SiteSetting.tagging_enabled? && tags.present?
if changes.present?
first_post = post.topic.posts.where(post_number: 1).first
changes[:bypass_bump] = true
changes[:skip_validations] = true
first_post.revise(Discourse.system_user, changes)
end
post.topic.update!(visible: false) if hide_topic
end
end
end
end
end

View File

@ -0,0 +1,225 @@
# frozen_string_literal: true
module DiscourseAi
module Automation
class ReportContextGenerator
def self.generate(**args)
new(**args).generate
end
def initialize(
start_date:,
duration:,
category_ids: nil,
tags: nil,
allow_secure_categories: false,
max_posts: 200,
tokens_per_post: 100,
tokenizer: nil,
prioritized_group_ids: []
)
@start_date = start_date
@duration = duration
@category_ids = category_ids
@tags = tags
@allow_secure_categories = allow_secure_categories
@max_posts = max_posts
@tokenizer = tokenizer || DiscourseAi::Tokenizer::OpenAiTokenizer
@tokens_per_post = tokens_per_post
@prioritized_group_ids = prioritized_group_ids
@posts =
Post
.where("posts.created_at >= ?", @start_date)
.joins(topic: :category)
.includes(:topic, :user)
.where("posts.created_at < ?", @start_date + @duration)
.where("posts.post_type = ?", Post.types[:regular])
.where("posts.hidden_at IS NULL")
.where("topics.deleted_at IS NULL")
.where("topics.archetype = ?", Archetype.default)
@posts = @posts.where("categories.read_restricted = ?", false) if !@allow_secure_categories
@posts = @posts.where("categories.id IN (?)", @category_ids) if @category_ids.present?
if @tags.present?
tag_ids = Tag.where(name: @tags).select(:id)
topic_ids_with_tags = TopicTag.where(tag_id: tag_ids).select(:topic_id)
@posts = @posts.where(topic_id: topic_ids_with_tags)
end
@solutions = {}
if defined?(::DiscourseSolved)
TopicCustomField
.where(name: ::DiscourseSolved::ACCEPTED_ANSWER_POST_ID_CUSTOM_FIELD)
.where(topic_id: @posts.select(:topic_id))
.pluck(:topic_id, :value)
.each do |topic_id, post_id|
@solutions[topic_id] ||= Set.new
@solutions[topic_id] << post_id.to_i
end
end
end
def format_topic(topic)
info = []
info << ""
info << "### #{topic.title}"
info << "topic_id: #{topic.id}"
info << "solved: true" if @solutions.key?(topic.id)
info << "category: #{topic.category&.name}"
tags = topic.tags.pluck(:name)
info << "tags: #{topic.tags.pluck(:name).join(", ")}" if tags.present?
info << topic.created_at.strftime("%Y-%m-%d %H:%M")
{ created_at: topic.created_at, info: info.join("\n"), posts: {} }
end
def format_post(post)
buffer = []
buffer << ""
buffer << "post_number: #{post.post_number}"
if @solutions.key?(post.topic_id) && @solutions[post.topic_id].include?(post.id)
buffer << "solution: true"
end
buffer << post.created_at.strftime("%Y-%m-%d %H:%M")
buffer << "user: #{post.user&.username}"
buffer << "likes: #{post.like_count}"
excerpt = @tokenizer.truncate(post.raw, @tokens_per_post)
excerpt = "excerpt: #{excerpt}..." if excerpt.length < post.raw.length
buffer << "#{excerpt}"
{ likes: post.like_count, info: buffer.join("\n") }
end
def format_summary
topic_count =
@posts
.where("topics.created_at > ?", @start_date)
.select(:topic_id)
.distinct(:topic_id)
.count
buffer = []
buffer << "Start Date: #{@start_date.to_date}"
buffer << "End Date: #{(@start_date + @duration).to_date}"
buffer << "New posts: #{@posts.count}"
buffer << "New topics: #{topic_count}"
top_users =
Post
.where(id: @posts.select(:id))
.joins(:user)
.group(:user_id, :username)
.select(
"user_id, username, sum(posts.like_count) like_count, count(posts.id) post_count",
)
.order("sum(posts.like_count) desc")
.limit(10)
buffer << "Top users:"
top_users.each do |user|
buffer << "@#{user.username} (#{user.like_count} likes, #{user.post_count} posts)"
end
if @prioritized_group_ids.present?
group_names =
Group
.where(id: @prioritized_group_ids)
.pluck(:name, :full_name)
.map do |name, full_name|
if full_name.present?
"#{name} (#{full_name[0..100].gsub("\n", " ")})"
else
name
end
end
.join(", ")
buffer << ""
buffer << "Top users in #{group_names} group#{group_names.include?(",") ? "s" : ""}:"
group_users = GroupUser.where(group_id: @prioritized_group_ids).select(:user_id)
top_users
.where(user_id: group_users)
.each do |user|
buffer << "@#{user.username} (#{user.like_count} likes, #{user.post_count} posts)"
end
end
buffer.join("\n")
end
def format_topics
buffer = []
topics = {}
post_count = 0
@posts = @posts.order("posts.like_count desc, posts.created_at desc")
if @prioritized_group_ids.present?
user_groups = GroupUser.where(group_id: @prioritized_group_ids)
prioritized_posts = @posts.where(user_id: user_groups.select(:user_id)).limit(@max_posts)
post_count += add_posts(prioritized_posts, topics)
end
add_posts(@posts.limit(@max_posts), topics, limit: @max_posts - post_count)
# we need last posts in all topics
# they may have important info
last_posts =
@posts.where("posts.post_number = topics.highest_post_number").where(
"topics.id IN (?)",
topics.keys,
)
add_posts(last_posts, topics)
topics.each do |topic_id, topic_info|
topic_info[:post_likes] = topic_info[:posts].sum { |_, post_info| post_info[:likes] }
end
topics = topics.sort { |a, b| b[1][:post_likes] <=> a[1][:post_likes] }
topics.each do |topic_id, topic_info|
buffer << topic_info[:info]
last_post_number = 0
topic_info[:posts]
.sort { |a, b| a[0] <=> b[0] }
.each do |post_number, post_info|
buffer << "\n..." if post_number > last_post_number + 1
buffer << post_info[:info]
last_post_number = post_number
end
end
buffer.join("\n")
end
def generate
buffer = []
buffer << "## Summary"
buffer << format_summary
buffer << "\n## Topics"
buffer << format_topics
buffer.join("\n")
end
def add_posts(relation, topics, limit: nil)
post_count = 0
relation.each do |post|
topics[post.topic_id] ||= format_topic(post.topic)
if !topics[post.topic_id][:posts][post.post_number]
topics[post.topic_id][:posts][post.post_number] = format_post(post)
post_count += 1
limit -= 1 if limit
end
break if limit && limit <= 0
end
post_count
end
end
end
end

View File

@ -0,0 +1,146 @@
# frozen_string_literal: true
module DiscourseAi
module Automation
class ReportRunner
def self.default_instructions
# not localizing for now cause non English LLM will require
# a fair bit of experimentation
<<~TEXT
Generate report:
## Report Guidelines:
- Length & Style: Aim for 12 dense paragraphs in a narrative style, focusing on internal forum discussions.
- Accuracy: Only include verified information with no embellishments.
- Sourcing: ALWAYS Back statements with links to forum discussions.
- Markdown Usage: Enhance readability with **bold**, *italic*, and > quotes.
- Linking: Use `#{Discourse.base_url}/t/-/TOPIC_ID/POST_NUMBER` for direct references.
- User Mentions: Reference users with @USERNAME
- Context tips: Staff are denoted with Username *. For example: jane * means that jane is a staff member. Do not render the * in the report.
- Add many topic links: strive to link to at least 30 topics in the report. Topic Id is meaningless to end users if you need to throw in a link use [ref](...) or better still just embed it into the [sentence](...)
- Categories and tags: use the format #TAG and #CATEGORY to denote tags and categories
## Structure:
- Key statistics: Specify date range, call out important stats like number of new topics and posts
- Overview: Briefly state trends within period.
- Highlighted content: 5 paragaraphs highlighting important topics people should know about. If possible have each paragraph link to multiple related topics.
- Key insights and trends linking to a selection of posts that back them
TEXT
end
def self.run!(**args)
new(**args).run!
end
def initialize(
sender_username:,
receivers:,
title:,
model:,
category_ids:,
tags:,
allow_secure_categories:,
debug_mode:,
sample_size:,
instructions:,
days:,
offset:,
priority_group_id:,
tokens_per_post:
)
@sender = User.find_by(username: sender_username)
@receivers = User.where(username: receivers)
@title = title
@model = model
@llm = DiscourseAi::Completions::Llm.proxy(model)
@category_ids = category_ids
@tags = tags
@allow_secure_categories = allow_secure_categories
@debug_mode = debug_mode
@sample_size = sample_size.to_i < 10 ? 10 : sample_size.to_i
@instructions = instructions
@days = days.to_i
@offset = offset.to_i
@priority_group_id = priority_group_id
@tokens_per_post = tokens_per_post.to_i
end
def run!
start_date = (@offset + @days).days.ago
prioritized_group_ids = [@priority_group_id] if @priority_group_id.present?
context =
DiscourseAi::Automation::ReportContextGenerator.generate(
start_date: start_date,
duration: @days.days,
max_posts: @sample_size,
tags: @tags,
category_ids: @category_ids,
prioritized_group_ids: prioritized_group_ids,
allow_secure_categories: @allow_secure_categories,
tokens_per_post: @tokens_per_post,
tokenizer: @llm.tokenizer,
)
input = <<~INPUT
#{@instructions}
<context>
#{context}
</context>
#{@instructions}
INPUT
prompt = {
insts: "You are a helpful bot specializing in summarizing activity Discourse sites",
input: input,
final_insts: "Here is the report I generated for you",
params: {
@model => {
temperature: 0,
},
},
}
result = +""
puts if Rails.env.development? && @debug_mode
@llm.completion!(prompt, Discourse.system_user) do |response|
print response if Rails.env.development? && @debug_mode
result << response
end
post =
PostCreator.create!(
@sender,
raw: result,
title: @title,
archetype: Archetype.private_message,
target_usernames: @receivers.map(&:username).join(","),
skip_validations: true,
)
if @debug_mode
input = input.split("\n").map { |line| " #{line}" }.join("\n")
raw = <<~RAW
```
start_date: #{start_date},
duration: #{@days.days},
max_posts: #{@sample_size},
tags: #{@tags},
category_ids: #{@category_ids},
priority_group: #{@priority_group_id}
LLM context was:
```
#{input}
RAW
PostCreator.create!(@sender, raw: raw, topic_id: post.topic_id, skip_validations: true)
end
end
end
end
end

View File

@ -6,7 +6,14 @@ module DiscourseAi
class ChatGpt < Dialect class ChatGpt < Dialect
class << self class << self
def can_translate?(model_name) def can_translate?(model_name)
%w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name) %w[
gpt-3.5-turbo
gpt-4
gpt-3.5-turbo-16k
gpt-4-32k
gpt-4-1106-preview
gpt-4-turbo
].include?(model_name)
end end
def tokenizer def tokenizer

View File

@ -27,7 +27,9 @@ module DiscourseAi
claude_prompt << "#{prompt[:post_insts]}\n" if prompt[:post_insts] claude_prompt << "#{prompt[:post_insts]}\n" if prompt[:post_insts]
claude_prompt << "Assistant:\n" claude_prompt << "Assistant:"
claude_prompt << " #{prompt[:final_insts]}:" if prompt[:final_insts]
claude_prompt << "\n"
end end
def max_prompt_tokens def max_prompt_tokens

View File

@ -17,9 +17,10 @@ module DiscourseAi
DiscourseAi::Completions::Dialects::OrcaStyle, DiscourseAi::Completions::Dialects::OrcaStyle,
DiscourseAi::Completions::Dialects::Gemini, DiscourseAi::Completions::Dialects::Gemini,
] ]
dialects.detect(-> { raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL }) do |d|
d.can_translate?(model_name) dialect = dialects.find { |d| d.can_translate?(model_name) }
end raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL if !dialect
dialect
end end
def tokenizer def tokenizer

View File

@ -5,11 +5,18 @@ module DiscourseAi
module Endpoints module Endpoints
class OpenAi < Base class OpenAi < Base
def self.can_contact?(model_name) def self.can_contact?(model_name)
%w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name) %w[
gpt-3.5-turbo
gpt-4
gpt-3.5-turbo-16k
gpt-4-32k
gpt-4-1106-preview
gpt-4-turbo
].include?(model_name)
end end
def default_options def default_options
{ model: model } { model: model == "gpt-4-turbo" ? "gpt-4-1106-preview" : model }
end end
def provider_id def provider_id
@ -24,7 +31,11 @@ module DiscourseAi
if model.include?("32k") if model.include?("32k")
SiteSetting.ai_openai_gpt4_32k_url SiteSetting.ai_openai_gpt4_32k_url
else else
SiteSetting.ai_openai_gpt4_url if model.include?("1106") || model.include?("turbo")
SiteSetting.ai_openai_gpt4_turbo_url
else
SiteSetting.ai_openai_gpt4_url
end
end end
else else
if model.include?("16k") if model.include?("16k")

View File

@ -7,6 +7,7 @@ module DiscourseAi
foldable_models = [ foldable_models = [
Models::OpenAi.new("gpt-4", max_tokens: 8192), Models::OpenAi.new("gpt-4", max_tokens: 8192),
Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768), Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768),
Models::OpenAi.new("gpt-4-1106-preview", max_tokens: 100_000),
Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096), Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384), Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
Models::Anthropic.new("claude-2", max_tokens: 100_000), Models::Anthropic.new("claude-2", max_tokens: 100_000),

View File

@ -40,6 +40,7 @@ register_svg_icon "meh"
after_initialize do after_initialize do
# do not autoload this cause we may have no namespace # do not autoload this cause we may have no namespace
require_relative "discourse_automation/llm_triage" require_relative "discourse_automation/llm_triage"
require_relative "discourse_automation/llm_report"
add_admin_route "discourse_ai.title", "discourse-ai" add_admin_route "discourse_ai.title", "discourse-ai"

View File

@ -0,0 +1,34 @@
# frozen_string_literal: true
return if !defined?(DiscourseAutomation)
describe DiscourseAutomation do
let(:automation) { Fabricate(:automation, script: "llm_report", enabled: true) }
def add_automation_field(name, value, type: "text")
automation.fields.create!(
component: type,
name: name,
metadata: {
value: value,
},
target: "script",
)
end
it "can trigger via automation" do
user = Fabricate(:user)
add_automation_field("sender", user.username, type: "user")
add_automation_field("receivers", [user.username], type: "users")
add_automation_field("model", "gpt-4-turbo")
add_automation_field("title", "Weekly report")
DiscourseAi::Completions::Llm.with_prepared_responses(["An Amazing Report!!!"]) do
automation.trigger!
end
pm = Topic.where(title: "Weekly report").first
expect(pm.posts.first.raw).to eq("An Amazing Report!!!")
end
end

View File

@ -2,106 +2,9 @@
return if !defined?(DiscourseAutomation) return if !defined?(DiscourseAutomation)
describe DiscourseAutomation::LlmTriage do describe DiscourseAi::Automation::LlmTriage do
fab!(:post) { Fabricate(:post) } fab!(:post) { Fabricate(:post) }
def triage(**args)
DiscourseAutomation::LlmTriage.handle(**args)
end
it "does nothing if it does not pass triage" do
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: { choices: [{ message: { content: "good" } }] }.to_json,
)
triage(
post: post,
model: "gpt-4",
hide_topic: true,
system_prompt: "test %%POST%%",
search_for_text: "bad",
)
expect(post.topic.reload.visible).to eq(true)
end
it "can hide topics on triage with claude" do
stub_request(:post, "https://api.anthropic.com/v1/complete").to_return(
status: 200,
body: { completion: "bad" }.to_json,
)
triage(
post: post,
model: "claude-2",
hide_topic: true,
system_prompt: "test %%POST%%",
search_for_text: "bad",
)
expect(post.topic.reload.visible).to eq(false)
end
it "can hide topics on triage with claude" do
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: { choices: [{ message: { content: "bad" } }] }.to_json,
)
triage(
post: post,
model: "gpt-4",
hide_topic: true,
system_prompt: "test %%POST%%",
search_for_text: "bad",
)
expect(post.topic.reload.visible).to eq(false)
end
it "can categorize topics on triage" do
category = Fabricate(:category)
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: { choices: [{ message: { content: "bad" } }] }.to_json,
)
triage(
post: post,
model: "gpt-4",
category_id: category.id,
system_prompt: "test %%POST%%",
search_for_text: "bad",
)
expect(post.topic.reload.category_id).to eq(category.id)
end
it "can reply to topics on triage" do
user = Fabricate(:user)
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
status: 200,
body: { choices: [{ message: { content: "bad" } }] }.to_json,
)
triage(
post: post,
model: "gpt-4",
system_prompt: "test %%POST%%",
search_for_text: "bad",
canned_reply: "test canned reply 123",
canned_reply_user: user.username,
)
reply = post.topic.posts.order(:post_number).last
expect(reply.raw).to eq("test canned reply 123")
expect(reply.user.id).to eq(user.id)
end
let(:automation) { Fabricate(:automation, script: "llm_triage", enabled: true) } let(:automation) { Fabricate(:automation, script: "llm_triage", enabled: true) }
def add_automation_field(name, value, type: "text") def add_automation_field(name, value, type: "text")
@ -130,13 +33,10 @@ describe DiscourseAutomation::LlmTriage do
add_automation_field("canned_reply", "Yo this is a reply") add_automation_field("canned_reply", "Yo this is a reply")
add_automation_field("canned_reply_user", user.username, type: "user") add_automation_field("canned_reply_user", user.username, type: "user")
stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
status: 200, automation.running_in_background!
body: { choices: [{ message: { content: "bad" } }] }.to_json, automation.trigger!({ "post" => post })
) end
automation.running_in_background!
automation.trigger!({ "post" => post })
topic = post.topic.reload topic = post.topic.reload
expect(topic.category_id).to eq(category.id) expect(topic.category_id).to eq(category.id)

View File

@ -0,0 +1,85 @@
# frozen_string_literal: true
describe DiscourseAi::Automation::LlmTriage do
fab!(:post) { Fabricate(:post) }
def triage(**args)
DiscourseAi::Automation::LlmTriage.handle(**args)
end
it "does nothing if it does not pass triage" do
DiscourseAi::Completions::Llm.with_prepared_responses(["good"]) do
triage(
post: post,
model: "gpt-4",
hide_topic: true,
system_prompt: "test %%POST%%",
search_for_text: "bad",
)
end
expect(post.topic.reload.visible).to eq(true)
end
it "can hide topics on triage with claude" do
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
triage(
post: post,
model: "claude-2",
hide_topic: true,
system_prompt: "test %%POST%%",
search_for_text: "bad",
)
end
expect(post.topic.reload.visible).to eq(false)
end
it "can hide topics on triage with claude" do
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
triage(
post: post,
model: "gpt-4",
hide_topic: true,
system_prompt: "test %%POST%%",
search_for_text: "bad",
)
end
expect(post.topic.reload.visible).to eq(false)
end
it "can categorize topics on triage" do
category = Fabricate(:category)
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
triage(
post: post,
model: "gpt-4",
category_id: category.id,
system_prompt: "test %%POST%%",
search_for_text: "bad",
)
end
expect(post.topic.reload.category_id).to eq(category.id)
end
it "can reply to topics on triage" do
user = Fabricate(:user)
DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
triage(
post: post,
model: "gpt-4",
system_prompt: "test %%POST%%",
search_for_text: "bad",
canned_reply: "test canned reply 123",
canned_reply_user: user.username,
)
end
reply = post.topic.posts.order(:post_number).last
expect(reply.raw).to eq("test canned reply 123")
expect(reply.user.id).to eq(user.id)
end
end

View File

@ -0,0 +1,152 @@
# frozen_string_literal: true
require "rails_helper"
module DiscourseAi
module Automation
describe ReportContextGenerator do
describe ".generate" do
fab!(:private_message_post)
fab!(:post_in_other_category) { Fabricate(:post) }
fab!(:category)
fab!(:topic) { Fabricate(:topic, category: category) }
fab!(:post_in_category) { Fabricate(:post, topic: topic) }
fab!(:reply_in_category) { Fabricate(:post, topic: topic, reply_to_post_number: 1) }
fab!(:group)
fab!(:private_category) { Fabricate(:private_category, group: group) }
fab!(:secure_topic) do
Fabricate(:topic, title: "category in secure category", category: private_category)
end
fab!(:user_in_group) { Fabricate(:user, groups: [group]) }
fab!(:post_in_private_category) do
Fabricate(:post, user: user_in_group, topic: secure_topic)
end
fab!(:tag)
fab!(:tag2) { Fabricate(:tag) }
fab!(:topic_with_tag) { Fabricate(:topic, tags: [tag, tag2]) }
fab!(:post_with_tag) { Fabricate(:post, topic: topic_with_tag) }
fab!(:long_post) do
Fabricate(
:post,
raw: (1..100).map { |i| "testing#{i}" }.join(" "),
topic: Fabricate(:topic, category: category),
)
end
fab!(:topic_with_likes) { Fabricate(:topic, like_count: 10) }
fab!(:post_with_likes) { Fabricate(:post, topic: topic_with_likes, like_count: 10) }
fab!(:post_with_likes2) { Fabricate(:post, topic: topic_with_likes, like_count: 5) }
fab!(:post_with_likes3) { Fabricate(:post, topic: topic_with_likes, like_count: 3) }
if defined?(::DiscourseSolved)
it "will correctly denote solved topics" do
topic_with_likes.custom_fields[
::DiscourseSolved::ACCEPTED_ANSWER_POST_ID_CUSTOM_FIELD
] = post_with_likes2.id
topic_with_likes.save_custom_fields
context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
expect(context).to include("solved: true")
expect(context).to include("solution: true")
end
end
it "always includes info from last posts on topic" do
context =
ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day, max_posts: 1)
expect(context).to include("...")
expect(context).to include("post_number: 3")
end
it "includes a summary" do
context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
expect(context).to include("New posts: 8")
expect(context).to include("New topics: 5")
end
it "orders so most liked are first" do
context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
regex = "topic_id: #{topic_with_likes.id}.*topic_id: #{long_post.topic.id}"
expect(context).to match(Regexp.new(regex, Regexp::MULTILINE))
end
it "allows you to prioritize groups" do
context =
ReportContextGenerator.generate(
start_date: 1.day.ago,
duration: 2.day,
prioritized_group_ids: [group.id],
allow_secure_categories: true,
max_posts: 1,
)
expect(context).to include(post_in_private_category.topic.title)
expect(context).not_to include(post_in_other_category.topic.title)
expect(context).to include(group.name)
end
it "can generate context (excluding PMs)" do
context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
expect(context).to include(post_in_other_category.topic.title)
expect(context).to include(topic.title)
expect(context).not_to include(private_message_post.topic.title)
expect(context).not_to include(secure_topic.title)
end
it "can filter on tag" do
context =
ReportContextGenerator.generate(
start_date: 1.day.ago,
duration: 2.day,
tags: [tag.name],
)
expect(context).not_to include(post_in_other_category.topic.title)
expect(context).not_to include(topic.title)
expect(context).not_to include(private_message_post.topic.title)
expect(context).not_to include(secure_topic.title)
expect(context).to include(post_with_tag.topic.title)
end
it "can optionally include secure categories" do
context =
ReportContextGenerator.generate(
start_date: 1.day.ago,
duration: 2.day,
allow_secure_categories: true,
)
expect(context).to include(post_in_other_category.topic.title)
expect(context).to include(topic.title)
expect(context).not_to include(private_message_post.topic.title)
expect(context).to include(secure_topic.title)
end
it "can filter to a categories" do
context =
ReportContextGenerator.generate(
start_date: 1.day.ago,
duration: 2.day,
category_ids: [category.id],
)
expect(context).not_to include(post_in_other_category.topic.title)
expect(context).to include(topic.title)
expect(context).not_to include(private_message_post.topic.title)
expect(context).not_to include(secure_topic.title)
end
end
end
end
end

View File

@ -0,0 +1,47 @@
# frozen_string_literal: true
require "rails_helper"
module DiscourseAi
module Automation
describe ReportRunner do
fab!(:user)
fab!(:reciever) { Fabricate(:user) }
fab!(:post) { Fabricate(:post, user: user) }
fab!(:group)
fab!(:secure_category) { Fabricate(:private_category, group: group) }
fab!(:secure_topic) { Fabricate(:topic, category: secure_category) }
fab!(:secure_post) { Fabricate(:post, raw: "Top secret date !!!!", topic: secure_topic) }
describe "#run!" do
it "generates correctly respects the params" do
DiscourseAi::Completions::Llm.with_prepared_responses(["magical report"]) do
ReportRunner.run!(
sender_username: user.username,
receivers: [reciever.username],
title: "test report",
model: "gpt-4",
category_ids: nil,
tags: nil,
allow_secure_categories: false,
debug_mode: true,
sample_size: 100,
instructions: "make a magic report",
days: 7,
offset: 0,
priority_group_id: nil,
tokens_per_post: 150,
)
end
report = Topic.where(title: "test report").first
expect(report.ordered_posts.first.raw).to eq("magical report")
debugging = report.ordered_posts.last.raw
expect(debugging).to include(post.raw)
expect(debugging).not_to include(secure_post.raw)
end
end
end
end
end