From ab7e9e31aaa63a710b76645056e4f70fe38b6c41 Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 30 Jan 2024 15:55:05 +1100 Subject: [PATCH] FEATURE: allow excluding tags and categories from LLM report (#447) Also - Better diagnostics, output model being used - Prompt LLM that true content is being injected in tag --- config/locales/client.en.yml | 6 ++ discourse_automation/llm_report.rb | 8 +++ lib/automation/report_context_generator.rb | 20 +++++- lib/automation/report_runner.rb | 11 ++- .../modules/automation/report_runner_spec.rb | 71 +++++++++++++++++++ 5 files changed, 113 insertions(+), 3 deletions(-) diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index 3fb59ef6..2d2afc6e 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -51,6 +51,12 @@ en: tags: label: "Tags" description: "Filter topics only to these tags" + exclude_tags: + label: "Exclude Tags" + description: "Exclude topics with these tags" + exclude_categories: + label: "Exclude Categories" + description: "Exclude topics with these categories" allow_secure_categories: label: "Allow secure categories" description: "Allow the report to be generated for topics in secure categories" diff --git a/discourse_automation/llm_report.rb b/discourse_automation/llm_report.rb index a4ca5b5e..215a3558 100644 --- a/discourse_automation/llm_report.rb +++ b/discourse_automation/llm_report.rb @@ -34,6 +34,9 @@ if defined?(DiscourseAutomation) field :categories, component: :categories field :tags, component: :tags + field :exclude_categories, component: :categories + field :exclude_tags, component: :tags + field :allow_secure_categories, component: :boolean field :debug_mode, component: :boolean @@ -55,6 +58,9 @@ if defined?(DiscourseAutomation) priority_group = fields.dig("priority_group", "value") tokens_per_post = fields.dig("tokens_per_post", "value") + exclude_category_ids = fields.dig("exclude_categories", "value") + exclude_tags = fields.dig("exclude_tags", "value") + DiscourseAi::Automation::ReportRunner.run!( sender_username: sender, receivers: receivers, @@ -71,6 +77,8 @@ if defined?(DiscourseAutomation) offset: offset, priority_group_id: priority_group, tokens_per_post: tokens_per_post, + exclude_category_ids: exclude_category_ids, + exclude_tags: exclude_tags, ) rescue => e Discourse.warn_exception e, message: "Error running LLM report!" diff --git a/lib/automation/report_context_generator.rb b/lib/automation/report_context_generator.rb index dc513a93..ac273dcb 100644 --- a/lib/automation/report_context_generator.rb +++ b/lib/automation/report_context_generator.rb @@ -16,7 +16,9 @@ module DiscourseAi max_posts: 200, tokens_per_post: 100, tokenizer: nil, - prioritized_group_ids: [] + prioritized_group_ids: [], + exclude_category_ids: nil, + exclude_tags: nil ) @start_date = start_date @duration = duration @@ -41,9 +43,23 @@ module DiscourseAi .where("topics.archetype = ?", Archetype.default) @posts = @posts.where("categories.read_restricted = ?", false) if !@allow_secure_categories @posts = @posts.where("categories.id IN (?)", @category_ids) if @category_ids.present? + @posts = + @posts.where( + "categories.id NOT IN (?)", + exclude_category_ids, + ) if exclude_category_ids.present? + + if exclude_tags.present? + exclude_tag_ids = Tag.where_name(exclude_tags).select(:id) + @posts = + @posts.where( + "topics.id NOT IN (?)", + TopicTag.where(tag_id: exclude_tag_ids).select(:topic_id), + ) + end if @tags.present? - tag_ids = Tag.where(name: @tags).select(:id) + tag_ids = Tag.where_name(@tags).select(:id) topic_ids_with_tags = TopicTag.where(tag_id: tag_ids).select(:topic_id) @posts = @posts.where(topic_id: topic_ids_with_tags) end diff --git a/lib/automation/report_runner.rb b/lib/automation/report_runner.rb index 0ec2ddef..7b4d856c 100644 --- a/lib/automation/report_runner.rb +++ b/lib/automation/report_runner.rb @@ -48,7 +48,9 @@ module DiscourseAi tags: nil, priority_group_id: nil, allow_secure_categories: false, - debug_mode: false + debug_mode: false, + exclude_category_ids: nil, + exclude_tags: nil ) @sender = User.find_by(username: sender_username) @receivers = User.where(username: receivers) @@ -72,6 +74,8 @@ module DiscourseAi @priority_group_id = priority_group_id @tokens_per_post = tokens_per_post.to_i @topic_id = topic_id.presence&.to_i + @exclude_category_ids = exclude_category_ids + @exclude_tags = exclude_tags if !@topic_id && !@receivers.present? && !@email_receivers.present? raise ArgumentError, "Must specify topic_id or receivers" @@ -100,10 +104,14 @@ module DiscourseAi allow_secure_categories: @allow_secure_categories, tokens_per_post: @tokens_per_post, tokenizer: @llm.tokenizer, + exclude_category_ids: @exclude_category_ids, + exclude_tags: @exclude_tags, ) input = <<~INPUT.strip #{@instructions} + Real and accurate context from the Discourse forum is included in the tag below. + #{context} @@ -158,6 +166,7 @@ module DiscourseAi tags: #{@tags}, category_ids: #{@category_ids}, priority_group: #{@priority_group_id} + model: #{@model} LLM context was: ``` diff --git a/spec/lib/modules/automation/report_runner_spec.rb b/spec/lib/modules/automation/report_runner_spec.rb index ca424bf2..8b80edc4 100644 --- a/spec/lib/modules/automation/report_runner_spec.rb +++ b/spec/lib/modules/automation/report_runner_spec.rb @@ -13,6 +13,16 @@ module DiscourseAi fab!(:secure_topic) { Fabricate(:topic, category: secure_category) } fab!(:secure_post) { Fabricate(:post, raw: "Top secret date !!!!", topic: secure_topic) } + fab!(:category) { Fabricate(:category) } + fab!(:topic_in_category) { Fabricate(:topic, category: category) } + fab!(:post_in_category) do + Fabricate(:post, raw: "I am in a category", topic: topic_in_category) + end + + fab!(:tag) { Fabricate(:tag) } + fab!(:topic_with_tag) { Fabricate(:topic, tags: [tag]) } + fab!(:post_with_tag) { Fabricate(:post, raw: "I am in a tag", topic: topic_with_tag) } + describe "#run!" do it "is able to generate email reports" do freeze_time @@ -42,6 +52,65 @@ module DiscourseAi ) end + it "can exclude categories" do + freeze_time + + DiscourseAi::Completions::Llm.with_prepared_responses(["magical report"]) do + ReportRunner.run!( + sender_username: user.username, + receivers: [receiver.username], + title: "test report", + model: "gpt-4", + category_ids: nil, + tags: nil, + allow_secure_categories: false, + debug_mode: true, + sample_size: 100, + instructions: "make a magic report", + days: 7, + offset: 0, + priority_group_id: nil, + tokens_per_post: 150, + exclude_category_ids: [category.id], + ) + end + + report = Topic.where(title: "test report").first + debugging = report.ordered_posts.last.raw + + expect(debugging).not_to include(post_in_category.raw) + end + + it "can exclude tags" do + freeze_time + + DiscourseAi::Completions::Llm.with_prepared_responses(["magical report"]) do + ReportRunner.run!( + sender_username: user.username, + receivers: [receiver.username], + title: "test report", + model: "gpt-4", + category_ids: nil, + tags: nil, + allow_secure_categories: false, + debug_mode: true, + sample_size: 100, + instructions: "make a magic report", + days: 7, + offset: 0, + priority_group_id: nil, + tokens_per_post: 150, + exclude_tags: [tag.name], + ) + end + + report = Topic.where(title: "test report").first + debugging = report.ordered_posts.last.raw + + expect(debugging).to include(post_in_category.raw) + expect(debugging).not_to include(post_with_tag.raw) + end + it "generates correctly respects the params" do DiscourseAi::Completions::Llm.with_prepared_responses(["magical report"]) do ReportRunner.run!( @@ -67,6 +136,8 @@ module DiscourseAi debugging = report.ordered_posts.last.raw expect(debugging).to include(post.raw) + expect(debugging).to include(post_in_category.raw) + expect(debugging).to include(post_with_tag.raw) expect(debugging).not_to include(secure_post.raw) end end