FEATURE: allow excluding tags and categories from LLM report (#447)

Also - Better diagnostics, output model being used - Prompt LLM that true content is being injected in <context> tag
2024-01-30 15:55:05 +11:00 · 2024-01-30 15:55:05 +11:00 · ab7e9e31aa
parent bae71eb047
commit ab7e9e31aa
5 changed files with 113 additions and 3 deletions
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -51,6 +51,12 @@ en:
            tags:
              label: "Tags"
              description: "Filter topics only to these tags"
+            exclude_tags:
+              label: "Exclude Tags"
+              description: "Exclude topics with these tags"
+            exclude_categories:
+              label: "Exclude Categories"
+              description: "Exclude topics with these categories"
            allow_secure_categories:
              label: "Allow secure categories"
              description: "Allow the report to be generated for topics in secure categories"
--- a/discourse_automation/llm_report.rb
+++ b/discourse_automation/llm_report.rb
@ -34,6 +34,9 @@ if defined?(DiscourseAutomation)
    field :categories, component: :categories
    field :tags, component: :tags

+    field :exclude_categories, component: :categories
+    field :exclude_tags, component: :tags
+
    field :allow_secure_categories, component: :boolean
    field :debug_mode, component: :boolean

@ -55,6 +58,9 @@ if defined?(DiscourseAutomation)
        priority_group = fields.dig("priority_group", "value")
        tokens_per_post = fields.dig("tokens_per_post", "value")

+        exclude_category_ids = fields.dig("exclude_categories", "value")
+        exclude_tags = fields.dig("exclude_tags", "value")
+
        DiscourseAi::Automation::ReportRunner.run!(
          sender_username: sender,
          receivers: receivers,
@ -71,6 +77,8 @@ if defined?(DiscourseAutomation)
          offset: offset,
          priority_group_id: priority_group,
          tokens_per_post: tokens_per_post,
+          exclude_category_ids: exclude_category_ids,
+          exclude_tags: exclude_tags,
        )
      rescue => e
        Discourse.warn_exception e, message: "Error running LLM report!"
--- a/lib/automation/report_context_generator.rb
+++ b/lib/automation/report_context_generator.rb
@ -16,7 +16,9 @@ module DiscourseAi
        max_posts: 200,
        tokens_per_post: 100,
        tokenizer: nil,
-        prioritized_group_ids: []
+        prioritized_group_ids: [],
+        exclude_category_ids: nil,
+        exclude_tags: nil
      )
        @start_date = start_date
        @duration = duration
@ -41,9 +43,23 @@ module DiscourseAi
            .where("topics.archetype = ?", Archetype.default)
        @posts = @posts.where("categories.read_restricted = ?", false) if !@allow_secure_categories
        @posts = @posts.where("categories.id IN (?)", @category_ids) if @category_ids.present?
+        @posts =
+          @posts.where(
+            "categories.id NOT IN (?)",
+            exclude_category_ids,
+          ) if exclude_category_ids.present?
+
+        if exclude_tags.present?
+          exclude_tag_ids = Tag.where_name(exclude_tags).select(:id)
+          @posts =
+            @posts.where(
+              "topics.id NOT IN (?)",
+              TopicTag.where(tag_id: exclude_tag_ids).select(:topic_id),
+            )
+        end

        if @tags.present?
-          tag_ids = Tag.where(name: @tags).select(:id)
+          tag_ids = Tag.where_name(@tags).select(:id)
          topic_ids_with_tags = TopicTag.where(tag_id: tag_ids).select(:topic_id)
          @posts = @posts.where(topic_id: topic_ids_with_tags)
        end
--- a/lib/automation/report_runner.rb
+++ b/lib/automation/report_runner.rb
@ -48,7 +48,9 @@ module DiscourseAi
        tags: nil,
        priority_group_id: nil,
        allow_secure_categories: false,
-        debug_mode: false
+        debug_mode: false,
+        exclude_category_ids: nil,
+        exclude_tags: nil
      )
        @sender = User.find_by(username: sender_username)
        @receivers = User.where(username: receivers)
@ -72,6 +74,8 @@ module DiscourseAi
        @priority_group_id = priority_group_id
        @tokens_per_post = tokens_per_post.to_i
        @topic_id = topic_id.presence&.to_i
+        @exclude_category_ids = exclude_category_ids
+        @exclude_tags = exclude_tags

        if !@topic_id && !@receivers.present? && !@email_receivers.present?
          raise ArgumentError, "Must specify topic_id or receivers"
@ -100,10 +104,14 @@ module DiscourseAi
            allow_secure_categories: @allow_secure_categories,
            tokens_per_post: @tokens_per_post,
            tokenizer: @llm.tokenizer,
+            exclude_category_ids: @exclude_category_ids,
+            exclude_tags: @exclude_tags,
          )
        input = <<~INPUT.strip
          #{@instructions}

+          Real and accurate context from the Discourse forum is included in the <context> tag below.
+
          <context>
          #{context}
          </context>
@ -158,6 +166,7 @@ module DiscourseAi
            tags: #{@tags},
            category_ids: #{@category_ids},
            priority_group: #{@priority_group_id}
+            model: #{@model}
            LLM context was:
            ```

--- a/spec/lib/modules/automation/report_runner_spec.rb
+++ b/spec/lib/modules/automation/report_runner_spec.rb
@ -13,6 +13,16 @@ module DiscourseAi
      fab!(:secure_topic) { Fabricate(:topic, category: secure_category) }
      fab!(:secure_post) { Fabricate(:post, raw: "Top secret date !!!!", topic: secure_topic) }

+      fab!(:category) { Fabricate(:category) }
+      fab!(:topic_in_category) { Fabricate(:topic, category: category) }
+      fab!(:post_in_category) do
+        Fabricate(:post, raw: "I am in a category", topic: topic_in_category)
+      end
+
+      fab!(:tag) { Fabricate(:tag) }
+      fab!(:topic_with_tag) { Fabricate(:topic, tags: [tag]) }
+      fab!(:post_with_tag) { Fabricate(:post, raw: "I am in a tag", topic: topic_with_tag) }
+
      describe "#run!" do
        it "is able to generate email reports" do
          freeze_time
@ -42,6 +52,65 @@ module DiscourseAi
          )
        end

+        it "can exclude categories" do
+          freeze_time
+
+          DiscourseAi::Completions::Llm.with_prepared_responses(["magical report"]) do
+            ReportRunner.run!(
+              sender_username: user.username,
+              receivers: [receiver.username],
+              title: "test report",
+              model: "gpt-4",
+              category_ids: nil,
+              tags: nil,
+              allow_secure_categories: false,
+              debug_mode: true,
+              sample_size: 100,
+              instructions: "make a magic report",
+              days: 7,
+              offset: 0,
+              priority_group_id: nil,
+              tokens_per_post: 150,
+              exclude_category_ids: [category.id],
+            )
+          end
+
+          report = Topic.where(title: "test report").first
+          debugging = report.ordered_posts.last.raw
+
+          expect(debugging).not_to include(post_in_category.raw)
+        end
+
+        it "can exclude tags" do
+          freeze_time
+
+          DiscourseAi::Completions::Llm.with_prepared_responses(["magical report"]) do
+            ReportRunner.run!(
+              sender_username: user.username,
+              receivers: [receiver.username],
+              title: "test report",
+              model: "gpt-4",
+              category_ids: nil,
+              tags: nil,
+              allow_secure_categories: false,
+              debug_mode: true,
+              sample_size: 100,
+              instructions: "make a magic report",
+              days: 7,
+              offset: 0,
+              priority_group_id: nil,
+              tokens_per_post: 150,
+              exclude_tags: [tag.name],
+            )
+          end
+
+          report = Topic.where(title: "test report").first
+          debugging = report.ordered_posts.last.raw
+
+          expect(debugging).to include(post_in_category.raw)
+          expect(debugging).not_to include(post_with_tag.raw)
+        end
+
        it "generates correctly respects the params" do
          DiscourseAi::Completions::Llm.with_prepared_responses(["magical report"]) do
            ReportRunner.run!(
@ -67,6 +136,8 @@ module DiscourseAi
          debugging = report.ordered_posts.last.raw

          expect(debugging).to include(post.raw)
+          expect(debugging).to include(post_in_category.raw)
+          expect(debugging).to include(post_with_tag.raw)
          expect(debugging).not_to include(secure_post.raw)
        end
      end