FEATURE: LLM based peroidical summary report (#357)

Introduce a Discourse Automation based periodical report. Depends on Discourse Automation. Report works best with very large context language models such as GPT-4-Turbo and Claude 2. - Introduces final_insts to generic llm format, for claude to work best it is better to guide the last assistant message (we should add this to other spots as well) - Adds GPT-4 turbo support to generic llm interface
2023-12-19 12:04:15 +11:00 · 2023-12-19 12:04:15 +11:00 · d0f54443ae
parent e0bf6adb5b
commit d0f54443ae
19 changed files with 955 additions and 215 deletions
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -6,12 +6,59 @@ en:
          discourse_ai: "Discourse AI"
  js:
    discourse_automation:
+      ai_models:
+        gpt_4_turbo: GPT 4 Turbo
+        gpt_4: GPT 4
+        gpt_3_5_turbo: GPT 3.5 Turbo
+        claude_2: Claude 2
+        gemini_pro: Gemini Pro
      scriptables:
+        llm_report:
+          fields:
+            sender:
+              label: "Sender"
+              description: "The user that will send the report"
+            receivers:
+              label: "Receivers"
+              description: "The users that will receive the report (can be email or usernames)"
+            title:
+              label: "Title"
+              description: "The title of the report"
+            days:
+              label: "Days"
+              description: "The timespan of the report"
+            offset:
+              label: "Offset"
+              description: "When testing you may want to run the report historically, use offset to start the report in an earlier date"
+            instructions:
+              label: "Instructions"
+              description: "The instructions provided to the large language model"
+            sample_size:
+              label: "Sample Size"
+              description: "The number of posts to sample for the report"
+            tokens_per_post:
+              label: "Tokens per post"
+              description: "The number of llm tokens to use per post"
+            model:
+              label: "Model"
+              description: "LLM to use for report generation"
+            categories:
+              label: "Categories"
+              description: "Filter topics only to these category"
+            tags:
+              label: "Tags"
+              description: "Filter topics only to these tags"
+            allow_secure_categories:
+              label: "Allow secure categories"
+              description: "Allow the report to be generated for topics in secure categories"
+            debug_mode:
+              label: "Debug Mode"
+              description: "Enable debug mode to see the raw input and output of the LLM"
+            priority_group:
+              label: "Priority Group"
+              description: "Priotize content from this group in the report"
+
        llm_triage:
-          models:
-            gpt_4: GPT 4
-            gpt_3_5_turbo: GPT 3.5 Turbo
-            claude_2: Claude 2
          fields:
            system_prompt:
              label: "System Prompt"
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -5,6 +5,9 @@ en:
        title: Triage posts using AI
        description: "Triage posts using a large language model"
        system_prompt_missing_post_placeholder: "System prompt must contain a placeholder for the post: %%POST%%"
+      llm_report:
+        title: Periodic report using AI
+        description: "Periodic report based on a large language model"
  site_settings:
    discourse_ai_enabled: "Enable the discourse AI plugin."
    ai_toxicity_enabled: "Enable the toxicity module."
--- a/discourse_automation/llm_report.rb
+++ b/discourse_automation/llm_report.rb
@ -0,0 +1,81 @@
+# frozen_string_literal: true
+
+if defined?(DiscourseAutomation)
+  module DiscourseAutomation::LlmReport
+  end
+
+  DiscourseAutomation::Scriptable::LLM_REPORT = "llm_report"
+
+  DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_REPORT) do
+    version 1
+    triggerables %i[recurring]
+
+    field :sender, component: :user, required: true
+    field :receivers, component: :users, required: true
+    field :title, component: :text, required: true
+    field :days, component: :text, required: true, default_value: 7
+    field :offset, component: :text, required: true, default_value: 0
+    field :instructions,
+          component: :message,
+          required: true,
+          default_value: DiscourseAi::Automation::ReportRunner.default_instructions
+    field :sample_size, component: :text, required: true, default_value: 100
+    field :tokens_per_post, component: :text, required: true, default_value: 150
+
+    field :model,
+          component: :choices,
+          required: true,
+          extra: {
+            content: DiscourseAi::Automation::AVAILABLE_MODELS,
+          }
+
+    field :priority_group, component: :group
+    field :categories, component: :categories
+    field :tags, component: :tags
+
+    field :allow_secure_categories, component: :boolean
+    field :debug_mode, component: :boolean
+
+    script do |context, fields, automation|
+      begin
+        sender = fields.dig("sender", "value")
+        receivers = fields.dig("receivers", "value")
+        title = fields.dig("title", "value")
+        model = fields.dig("model", "value")
+        category_ids = fields.dig("categories", "value")
+        tags = fields.dig("tags", "value")
+        allow_secure_categories = !!fields.dig("allow_secure_categories", "value")
+        debug_mode = !!fields.dig("debug_mode", "value")
+        sample_size = fields.dig("sample_size", "value")
+        instructions = fields.dig("instructions", "value")
+        days = fields.dig("days", "value")
+        offset = fields.dig("offset", "value").to_i
+        priority_group = fields.dig("priority_group", "value")
+        tokens_per_post = fields.dig("tokens_per_post", "value")
+
+        DiscourseAi::Automation::ReportRunner.run!(
+          sender_username: sender,
+          receivers: receivers,
+          title: title,
+          model: model,
+          category_ids: category_ids,
+          tags: tags,
+          allow_secure_categories: allow_secure_categories,
+          debug_mode: debug_mode,
+          sample_size: sample_size,
+          instructions: instructions,
+          days: days,
+          offset: offset,
+          priority_group_id: priority_group,
+          tokens_per_post: tokens_per_post,
+        )
+      rescue => e
+        Discourse.warn_exception e, message: "Error running LLM report!"
+        if Rails.env.development?
+          p e
+          puts e.backtrace
+        end
+      end
+    end
+  end
+end
--- a/discourse_automation/llm_triage.rb
+++ b/discourse_automation/llm_triage.rb
@ -1,104 +1,8 @@
 # frozen_string_literal: true

 if defined?(DiscourseAutomation)
-  module DiscourseAutomation::LlmTriage
-    def self.handle(
-      post:,
-      model:,
-      search_for_text:,
-      system_prompt:,
-      category_id: nil,
-      tags: nil,
-      canned_reply: nil,
-      canned_reply_user: nil,
-      hide_topic: nil
-    )
-      if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank?
-        raise ArgumentError, "llm_triage: no action specified!"
-      end
-
-      post_template = +""
-      post_template << "title: #{post.topic.title}\n"
-      post_template << "#{post.raw}"
-
-      filled_system_prompt = system_prompt.sub("%%POST%%", post_template)
-
-      if filled_system_prompt == system_prompt
-        raise ArgumentError, "llm_triage: system_prompt does not contain %%POST%% placeholder"
-      end
-
-      result = nil
-      if model == "claude-2"
-        # allowing double + 10 tokens
-        # technically maybe just token count is fine, but this will allow for more creative bad responses
-        result =
-          DiscourseAi::Inference::AnthropicCompletions.perform!(
-            filled_system_prompt,
-            model,
-            temperature: 0,
-            max_tokens:
-              DiscourseAi::Tokenizer::AnthropicTokenizer.tokenize(search_for_text).length * 2 + 10,
-          ).dig(:completion)
-      else
-        result =
-          DiscourseAi::Inference::OpenAiCompletions.perform!(
-            [{ :role => "system", "content" => filled_system_prompt }],
-            model,
-            temperature: 0,
-            max_tokens:
-              DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(search_for_text).length * 2 + 10,
-          ).dig(:choices, 0, :message, :content)
-      end
-
-      if result.strip == search_for_text.strip
-        user = User.find_by_username(canned_reply_user) if canned_reply_user.present?
-        user = user || Discourse.system_user
-        if canned_reply.present?
-          PostCreator.create!(
-            user,
-            topic_id: post.topic_id,
-            raw: canned_reply,
-            reply_to_post_number: post.post_number,
-            skip_validations: true,
-          )
-        end
-
-        changes = {}
-        changes[:category_id] = category_id if category_id.present?
-        changes[:tags] = tags if SiteSetting.tagging_enabled? && tags.present?
-
-        if changes.present?
-          first_post = post.topic.posts.where(post_number: 1).first
-          changes[:bypass_bump] = true
-          changes[:skip_validations] = true
-          first_post.revise(Discourse.system_user, changes)
-        end
-
-        post.topic.update!(visible: false) if hide_topic
-      end
-    end
-  end
-
  DiscourseAutomation::Scriptable::LLM_TRIAGE = "llm_triage"

-  AVAILABLE_MODELS = [
-    {
-      id: "gpt-4",
-      name:
-        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_4",
-    },
-    {
-      id: "gpt-3-5-turbo",
-      name:
-        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_3_5_turbo",
-    },
-    {
-      id: "claude-2",
-      name:
-        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.claude_2",
-    },
-  ]
-
  DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_TRIAGE) do
    version 1
    run_in_background
@ -119,7 +23,12 @@ if defined?(DiscourseAutomation)
          end,
          accepts_placeholders: true
    field :search_for_text, component: :text, required: true
-    field :model, component: :choices, required: true, extra: { content: AVAILABLE_MODELS }
+    field :model,
+          component: :choices,
+          required: true,
+          extra: {
+            content: DiscourseAi::Automation::AVAILABLE_MODELS,
+          }
    field :category, component: :category
    field :tags, component: :tags
    field :hide_topic, component: :boolean
@ -149,7 +58,7 @@ if defined?(DiscourseAutomation)
      end

      begin
-        DiscourseAutomation::LlmTriage.handle(
+        DiscourseAi::Automation::LlmTriage.handle(
          post: post,
          model: model,
          search_for_text: search_for_text,
--- a/lib/automation.rb
+++ b/lib/automation.rb
@ -0,0 +1,13 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Automation
+    AVAILABLE_MODELS = [
+      { id: "gpt-4-turbo", name: "discourse_automation.ai_models.gpt_4_turbo" },
+      { id: "gpt-4", name: "discourse_automation.ai_models.gpt_4" },
+      { id: "gpt-3-5-turbo", name: "discourse_automation.ai_models.gpt_3_5_turbo" },
+      { id: "claude-2", name: "discourse_automation.ai_models.claude_2" },
+      { id: "gemini-pro", name: "discourse_automation.ai_models.gemini_pro" },
+    ]
+  end
+end
--- a/lib/automation/llm_triage.rb
+++ b/lib/automation/llm_triage.rb
@ -0,0 +1,75 @@
+# frozen_string_literal: true
+#
+module DiscourseAi
+  module Automation
+    module LlmTriage
+      def self.handle(
+        post:,
+        model:,
+        search_for_text:,
+        system_prompt:,
+        category_id: nil,
+        tags: nil,
+        canned_reply: nil,
+        canned_reply_user: nil,
+        hide_topic: nil
+      )
+        if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank?
+          raise ArgumentError, "llm_triage: no action specified!"
+        end
+
+        post_template = +""
+        post_template << "title: #{post.topic.title}\n"
+        post_template << "#{post.raw}"
+
+        filled_system_prompt = system_prompt.sub("%%POST%%", post_template)
+
+        if filled_system_prompt == system_prompt
+          raise ArgumentError, "llm_triage: system_prompt does not contain %%POST%% placeholder"
+        end
+
+        result = nil
+
+        llm = DiscourseAi::Completions::Llm.proxy(model)
+        prompt = {
+          insts: filled_system_prompt,
+          params: {
+            model => {
+              max_tokens: (llm.tokenizer.tokenize(search_for_text).length * 2 + 10),
+              temperature: 0,
+            },
+          },
+        }
+
+        result = llm.completion!(prompt, Discourse.system_user)
+
+        if result.strip == search_for_text.strip
+          user = User.find_by_username(canned_reply_user) if canned_reply_user.present?
+          user = user || Discourse.system_user
+          if canned_reply.present?
+            PostCreator.create!(
+              user,
+              topic_id: post.topic_id,
+              raw: canned_reply,
+              reply_to_post_number: post.post_number,
+              skip_validations: true,
+            )
+          end
+
+          changes = {}
+          changes[:category_id] = category_id if category_id.present?
+          changes[:tags] = tags if SiteSetting.tagging_enabled? && tags.present?
+
+          if changes.present?
+            first_post = post.topic.posts.where(post_number: 1).first
+            changes[:bypass_bump] = true
+            changes[:skip_validations] = true
+            first_post.revise(Discourse.system_user, changes)
+          end
+
+          post.topic.update!(visible: false) if hide_topic
+        end
+      end
+    end
+  end
+end
--- a/lib/automation/report_context_generator.rb
+++ b/lib/automation/report_context_generator.rb
@ -0,0 +1,225 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Automation
+    class ReportContextGenerator
+      def self.generate(**args)
+        new(**args).generate
+      end
+
+      def initialize(
+        start_date:,
+        duration:,
+        category_ids: nil,
+        tags: nil,
+        allow_secure_categories: false,
+        max_posts: 200,
+        tokens_per_post: 100,
+        tokenizer: nil,
+        prioritized_group_ids: []
+      )
+        @start_date = start_date
+        @duration = duration
+        @category_ids = category_ids
+        @tags = tags
+        @allow_secure_categories = allow_secure_categories
+        @max_posts = max_posts
+        @tokenizer = tokenizer || DiscourseAi::Tokenizer::OpenAiTokenizer
+        @tokens_per_post = tokens_per_post
+        @prioritized_group_ids = prioritized_group_ids
+
+        @posts =
+          Post
+            .where("posts.created_at >= ?", @start_date)
+            .joins(topic: :category)
+            .includes(:topic, :user)
+            .where("posts.created_at < ?", @start_date + @duration)
+            .where("posts.post_type = ?", Post.types[:regular])
+            .where("posts.hidden_at IS NULL")
+            .where("topics.deleted_at IS NULL")
+            .where("topics.archetype = ?", Archetype.default)
+        @posts = @posts.where("categories.read_restricted = ?", false) if !@allow_secure_categories
+        @posts = @posts.where("categories.id IN (?)", @category_ids) if @category_ids.present?
+
+        if @tags.present?
+          tag_ids = Tag.where(name: @tags).select(:id)
+          topic_ids_with_tags = TopicTag.where(tag_id: tag_ids).select(:topic_id)
+          @posts = @posts.where(topic_id: topic_ids_with_tags)
+        end
+
+        @solutions = {}
+        if defined?(::DiscourseSolved)
+          TopicCustomField
+            .where(name: ::DiscourseSolved::ACCEPTED_ANSWER_POST_ID_CUSTOM_FIELD)
+            .where(topic_id: @posts.select(:topic_id))
+            .pluck(:topic_id, :value)
+            .each do |topic_id, post_id|
+              @solutions[topic_id] ||= Set.new
+              @solutions[topic_id] << post_id.to_i
+            end
+        end
+      end
+
+      def format_topic(topic)
+        info = []
+        info << ""
+        info << "### #{topic.title}"
+        info << "topic_id: #{topic.id}"
+        info << "solved: true" if @solutions.key?(topic.id)
+        info << "category: #{topic.category&.name}"
+        tags = topic.tags.pluck(:name)
+        info << "tags: #{topic.tags.pluck(:name).join(", ")}" if tags.present?
+        info << topic.created_at.strftime("%Y-%m-%d %H:%M")
+        { created_at: topic.created_at, info: info.join("\n"), posts: {} }
+      end
+
+      def format_post(post)
+        buffer = []
+        buffer << ""
+        buffer << "post_number: #{post.post_number}"
+        if @solutions.key?(post.topic_id) && @solutions[post.topic_id].include?(post.id)
+          buffer << "solution: true"
+        end
+        buffer << post.created_at.strftime("%Y-%m-%d %H:%M")
+        buffer << "user: #{post.user&.username}"
+        buffer << "likes: #{post.like_count}"
+        excerpt = @tokenizer.truncate(post.raw, @tokens_per_post)
+        excerpt = "excerpt: #{excerpt}..." if excerpt.length < post.raw.length
+        buffer << "#{excerpt}"
+        { likes: post.like_count, info: buffer.join("\n") }
+      end
+
+      def format_summary
+        topic_count =
+          @posts
+            .where("topics.created_at > ?", @start_date)
+            .select(:topic_id)
+            .distinct(:topic_id)
+            .count
+
+        buffer = []
+        buffer << "Start Date: #{@start_date.to_date}"
+        buffer << "End Date: #{(@start_date + @duration).to_date}"
+        buffer << "New posts: #{@posts.count}"
+        buffer << "New topics: #{topic_count}"
+
+        top_users =
+          Post
+            .where(id: @posts.select(:id))
+            .joins(:user)
+            .group(:user_id, :username)
+            .select(
+              "user_id, username, sum(posts.like_count) like_count, count(posts.id) post_count",
+            )
+            .order("sum(posts.like_count) desc")
+            .limit(10)
+
+        buffer << "Top users:"
+        top_users.each do |user|
+          buffer << "@#{user.username} (#{user.like_count} likes, #{user.post_count} posts)"
+        end
+
+        if @prioritized_group_ids.present?
+          group_names =
+            Group
+              .where(id: @prioritized_group_ids)
+              .pluck(:name, :full_name)
+              .map do |name, full_name|
+                if full_name.present?
+                  "#{name} (#{full_name[0..100].gsub("\n", " ")})"
+                else
+                  name
+                end
+              end
+              .join(", ")
+          buffer << ""
+          buffer << "Top users in #{group_names} group#{group_names.include?(",") ? "s" : ""}:"
+
+          group_users = GroupUser.where(group_id: @prioritized_group_ids).select(:user_id)
+          top_users
+            .where(user_id: group_users)
+            .each do |user|
+              buffer << "@#{user.username} (#{user.like_count} likes, #{user.post_count} posts)"
+            end
+        end
+
+        buffer.join("\n")
+      end
+
+      def format_topics
+        buffer = []
+        topics = {}
+
+        post_count = 0
+
+        @posts = @posts.order("posts.like_count desc, posts.created_at desc")
+
+        if @prioritized_group_ids.present?
+          user_groups = GroupUser.where(group_id: @prioritized_group_ids)
+          prioritized_posts = @posts.where(user_id: user_groups.select(:user_id)).limit(@max_posts)
+
+          post_count += add_posts(prioritized_posts, topics)
+        end
+
+        add_posts(@posts.limit(@max_posts), topics, limit: @max_posts - post_count)
+
+        # we need last posts in all topics
+        # they may have important info
+        last_posts =
+          @posts.where("posts.post_number = topics.highest_post_number").where(
+            "topics.id IN (?)",
+            topics.keys,
+          )
+
+        add_posts(last_posts, topics)
+
+        topics.each do |topic_id, topic_info|
+          topic_info[:post_likes] = topic_info[:posts].sum { |_, post_info| post_info[:likes] }
+        end
+
+        topics = topics.sort { |a, b| b[1][:post_likes] <=> a[1][:post_likes] }
+
+        topics.each do |topic_id, topic_info|
+          buffer << topic_info[:info]
+
+          last_post_number = 0
+
+          topic_info[:posts]
+            .sort { |a, b| a[0] <=> b[0] }
+            .each do |post_number, post_info|
+              buffer << "\n..." if post_number > last_post_number + 1
+              buffer << post_info[:info]
+              last_post_number = post_number
+            end
+        end
+
+        buffer.join("\n")
+      end
+
+      def generate
+        buffer = []
+
+        buffer << "## Summary"
+        buffer << format_summary
+        buffer << "\n## Topics"
+        buffer << format_topics
+
+        buffer.join("\n")
+      end
+
+      def add_posts(relation, topics, limit: nil)
+        post_count = 0
+        relation.each do |post|
+          topics[post.topic_id] ||= format_topic(post.topic)
+          if !topics[post.topic_id][:posts][post.post_number]
+            topics[post.topic_id][:posts][post.post_number] = format_post(post)
+            post_count += 1
+            limit -= 1 if limit
+          end
+          break if limit && limit <= 0
+        end
+        post_count
+      end
+    end
+  end
+end
--- a/lib/automation/report_runner.rb
+++ b/lib/automation/report_runner.rb
@ -0,0 +1,146 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module Automation
+    class ReportRunner
+      def self.default_instructions
+        # not localizing for now cause non English LLM will require
+        # a fair bit of experimentation
+        <<~TEXT
+        Generate report:
+
+        ## Report Guidelines:
+
+        - Length & Style: Aim for 12 dense paragraphs in a narrative style, focusing on internal forum discussions.
+        - Accuracy: Only include verified information with no embellishments.
+        - Sourcing: ALWAYS Back statements with links to forum discussions.
+        - Markdown Usage: Enhance readability with **bold**, *italic*, and > quotes.
+        - Linking: Use `#{Discourse.base_url}/t/-/TOPIC_ID/POST_NUMBER` for direct references.
+        - User Mentions: Reference users with @USERNAME
+        - Context tips: Staff are denoted with Username *. For example: jane * means that jane is a staff member. Do not render the * in the report.
+        - Add many topic links: strive to link to at least 30 topics in the report. Topic Id is meaningless to end users if you need to throw in a link use [ref](...) or better still just embed it into the [sentence](...)
+        - Categories and tags: use the format #TAG and #CATEGORY to denote tags and categories
+
+        ## Structure:
+
+        - Key statistics: Specify date range, call out important stats like number of new topics and posts
+        - Overview: Briefly state trends within period.
+        - Highlighted content: 5 paragaraphs highlighting important topics people should know about. If possible have each paragraph link to multiple related topics.
+        - Key insights and trends linking to a selection of posts that back them
+        TEXT
+      end
+
+      def self.run!(**args)
+        new(**args).run!
+      end
+
+      def initialize(
+        sender_username:,
+        receivers:,
+        title:,
+        model:,
+        category_ids:,
+        tags:,
+        allow_secure_categories:,
+        debug_mode:,
+        sample_size:,
+        instructions:,
+        days:,
+        offset:,
+        priority_group_id:,
+        tokens_per_post:
+      )
+        @sender = User.find_by(username: sender_username)
+        @receivers = User.where(username: receivers)
+        @title = title
+
+        @model = model
+        @llm = DiscourseAi::Completions::Llm.proxy(model)
+        @category_ids = category_ids
+        @tags = tags
+        @allow_secure_categories = allow_secure_categories
+        @debug_mode = debug_mode
+        @sample_size = sample_size.to_i < 10 ? 10 : sample_size.to_i
+        @instructions = instructions
+        @days = days.to_i
+        @offset = offset.to_i
+        @priority_group_id = priority_group_id
+        @tokens_per_post = tokens_per_post.to_i
+      end
+
+      def run!
+        start_date = (@offset + @days).days.ago
+        prioritized_group_ids = [@priority_group_id] if @priority_group_id.present?
+        context =
+          DiscourseAi::Automation::ReportContextGenerator.generate(
+            start_date: start_date,
+            duration: @days.days,
+            max_posts: @sample_size,
+            tags: @tags,
+            category_ids: @category_ids,
+            prioritized_group_ids: prioritized_group_ids,
+            allow_secure_categories: @allow_secure_categories,
+            tokens_per_post: @tokens_per_post,
+            tokenizer: @llm.tokenizer,
+          )
+        input = <<~INPUT
+          #{@instructions}
+
+          <context>
+          #{context}
+          </context>
+
+          #{@instructions}
+        INPUT
+
+        prompt = {
+          insts: "You are a helpful bot specializing in summarizing activity Discourse sites",
+          input: input,
+          final_insts: "Here is the report I generated for you",
+          params: {
+            @model => {
+              temperature: 0,
+            },
+          },
+        }
+
+        result = +""
+
+        puts if Rails.env.development? && @debug_mode
+
+        @llm.completion!(prompt, Discourse.system_user) do |response|
+          print response if Rails.env.development? && @debug_mode
+          result << response
+        end
+
+        post =
+          PostCreator.create!(
+            @sender,
+            raw: result,
+            title: @title,
+            archetype: Archetype.private_message,
+            target_usernames: @receivers.map(&:username).join(","),
+            skip_validations: true,
+          )
+
+        if @debug_mode
+          input = input.split("\n").map { |line| "    #{line}" }.join("\n")
+          raw = <<~RAW
+            ```
+            start_date: #{start_date},
+            duration: #{@days.days},
+            max_posts: #{@sample_size},
+            tags: #{@tags},
+            category_ids: #{@category_ids},
+            priority_group: #{@priority_group_id}
+            LLM context was:
+            ```
+
+            #{input}
+          RAW
+          PostCreator.create!(@sender, raw: raw, topic_id: post.topic_id, skip_validations: true)
+        end
+      end
+    end
+  end
+end
--- a/lib/completions/dialects/chat_gpt.rb
+++ b/lib/completions/dialects/chat_gpt.rb
@ -6,7 +6,14 @@ module DiscourseAi
      class ChatGpt < Dialect
        class << self
          def can_translate?(model_name)
-            %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
+            %w[
+              gpt-3.5-turbo
+              gpt-4
+              gpt-3.5-turbo-16k
+              gpt-4-32k
+              gpt-4-1106-preview
+              gpt-4-turbo
+            ].include?(model_name)
          end

          def tokenizer
--- a/lib/completions/dialects/claude.rb
+++ b/lib/completions/dialects/claude.rb
@ -27,7 +27,9 @@ module DiscourseAi

          claude_prompt << "#{prompt[:post_insts]}\n" if prompt[:post_insts]

-          claude_prompt << "Assistant:\n"
+          claude_prompt << "Assistant:"
+          claude_prompt << " #{prompt[:final_insts]}:" if prompt[:final_insts]
+          claude_prompt << "\n"
        end

        def max_prompt_tokens
--- a/lib/completions/dialects/dialect.rb
+++ b/lib/completions/dialects/dialect.rb
@ -17,9 +17,10 @@ module DiscourseAi
              DiscourseAi::Completions::Dialects::OrcaStyle,
              DiscourseAi::Completions::Dialects::Gemini,
            ]
-            dialects.detect(-> { raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL }) do |d|
-              d.can_translate?(model_name)
-            end
+
+            dialect = dialects.find { |d| d.can_translate?(model_name) }
+            raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL if !dialect
+            dialect
          end

          def tokenizer
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@ -5,11 +5,18 @@ module DiscourseAi
    module Endpoints
      class OpenAi < Base
        def self.can_contact?(model_name)
-          %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
+          %w[
+            gpt-3.5-turbo
+            gpt-4
+            gpt-3.5-turbo-16k
+            gpt-4-32k
+            gpt-4-1106-preview
+            gpt-4-turbo
+          ].include?(model_name)
        end

        def default_options
-          { model: model }
+          { model: model == "gpt-4-turbo" ? "gpt-4-1106-preview" : model }
        end

        def provider_id
@ -24,7 +31,11 @@ module DiscourseAi
              if model.include?("32k")
                SiteSetting.ai_openai_gpt4_32k_url
              else
-                SiteSetting.ai_openai_gpt4_url
+                if model.include?("1106") || model.include?("turbo")
+                  SiteSetting.ai_openai_gpt4_turbo_url
+                else
+                  SiteSetting.ai_openai_gpt4_url
+                end
              end
            else
              if model.include?("16k")
--- a/lib/summarization/entry_point.rb
+++ b/lib/summarization/entry_point.rb
@ -7,6 +7,7 @@ module DiscourseAi
        foldable_models = [
          Models::OpenAi.new("gpt-4", max_tokens: 8192),
          Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768),
+          Models::OpenAi.new("gpt-4-1106-preview", max_tokens: 100_000),
          Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
          Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
          Models::Anthropic.new("claude-2", max_tokens: 100_000),
--- a/plugin.rb
+++ b/plugin.rb
@ -40,6 +40,7 @@ register_svg_icon "meh"
 after_initialize do
  # do not autoload this cause we may have no namespace
  require_relative "discourse_automation/llm_triage"
+  require_relative "discourse_automation/llm_report"

  add_admin_route "discourse_ai.title", "discourse-ai"

--- a/spec/lib/discourse_automation/llm_report_spec.rb
+++ b/spec/lib/discourse_automation/llm_report_spec.rb
@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+return if !defined?(DiscourseAutomation)
+
+describe DiscourseAutomation do
+  let(:automation) { Fabricate(:automation, script: "llm_report", enabled: true) }
+
+  def add_automation_field(name, value, type: "text")
+    automation.fields.create!(
+      component: type,
+      name: name,
+      metadata: {
+        value: value,
+      },
+      target: "script",
+    )
+  end
+
+  it "can trigger via automation" do
+    user = Fabricate(:user)
+
+    add_automation_field("sender", user.username, type: "user")
+    add_automation_field("receivers", [user.username], type: "users")
+    add_automation_field("model", "gpt-4-turbo")
+    add_automation_field("title", "Weekly report")
+
+    DiscourseAi::Completions::Llm.with_prepared_responses(["An Amazing Report!!!"]) do
+      automation.trigger!
+    end
+
+    pm = Topic.where(title: "Weekly report").first
+    expect(pm.posts.first.raw).to eq("An Amazing Report!!!")
+  end
+end
--- a/spec/lib/discourse_automation/llm_triage_spec.rb
+++ b/spec/lib/discourse_automation/llm_triage_spec.rb
@ -2,106 +2,9 @@

 return if !defined?(DiscourseAutomation)

-describe DiscourseAutomation::LlmTriage do
+describe DiscourseAi::Automation::LlmTriage do
  fab!(:post) { Fabricate(:post) }

-  def triage(**args)
-    DiscourseAutomation::LlmTriage.handle(**args)
-  end
-
-  it "does nothing if it does not pass triage" do
-    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
-      status: 200,
-      body: { choices: [{ message: { content: "good" } }] }.to_json,
-    )
-
-    triage(
-      post: post,
-      model: "gpt-4",
-      hide_topic: true,
-      system_prompt: "test %%POST%%",
-      search_for_text: "bad",
-    )
-
-    expect(post.topic.reload.visible).to eq(true)
-  end
-
-  it "can hide topics on triage with claude" do
-    stub_request(:post, "https://api.anthropic.com/v1/complete").to_return(
-      status: 200,
-      body: { completion: "bad" }.to_json,
-    )
-
-    triage(
-      post: post,
-      model: "claude-2",
-      hide_topic: true,
-      system_prompt: "test %%POST%%",
-      search_for_text: "bad",
-    )
-
-    expect(post.topic.reload.visible).to eq(false)
-  end
-
-  it "can hide topics on triage with claude" do
-    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
-      status: 200,
-      body: { choices: [{ message: { content: "bad" } }] }.to_json,
-    )
-
-    triage(
-      post: post,
-      model: "gpt-4",
-      hide_topic: true,
-      system_prompt: "test %%POST%%",
-      search_for_text: "bad",
-    )
-
-    expect(post.topic.reload.visible).to eq(false)
-  end
-
-  it "can categorize topics on triage" do
-    category = Fabricate(:category)
-
-    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
-      status: 200,
-      body: { choices: [{ message: { content: "bad" } }] }.to_json,
-    )
-
-    triage(
-      post: post,
-      model: "gpt-4",
-      category_id: category.id,
-      system_prompt: "test %%POST%%",
-      search_for_text: "bad",
-    )
-
-    expect(post.topic.reload.category_id).to eq(category.id)
-  end
-
-  it "can reply to topics on triage" do
-    user = Fabricate(:user)
-
-    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
-      status: 200,
-      body: { choices: [{ message: { content: "bad" } }] }.to_json,
-    )
-
-    triage(
-      post: post,
-      model: "gpt-4",
-      system_prompt: "test %%POST%%",
-      search_for_text: "bad",
-      canned_reply: "test canned reply 123",
-      canned_reply_user: user.username,
-    )
-
-    reply = post.topic.posts.order(:post_number).last
-
-    expect(reply.raw).to eq("test canned reply 123")
-    expect(reply.user.id).to eq(user.id)
-  end
-
  let(:automation) { Fabricate(:automation, script: "llm_triage", enabled: true) }

  def add_automation_field(name, value, type: "text")
@ -130,13 +33,10 @@ describe DiscourseAutomation::LlmTriage do
    add_automation_field("canned_reply", "Yo this is a reply")
    add_automation_field("canned_reply_user", user.username, type: "user")

-    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
-      status: 200,
-      body: { choices: [{ message: { content: "bad" } }] }.to_json,
-    )
-
-    automation.running_in_background!
-    automation.trigger!({ "post" => post })
+    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
+      automation.running_in_background!
+      automation.trigger!({ "post" => post })
+    end

    topic = post.topic.reload
    expect(topic.category_id).to eq(category.id)
--- a/spec/lib/modules/automation/llm_triage_spec.rb
+++ b/spec/lib/modules/automation/llm_triage_spec.rb
@ -0,0 +1,85 @@
+# frozen_string_literal: true
+describe DiscourseAi::Automation::LlmTriage do
+  fab!(:post) { Fabricate(:post) }
+
+  def triage(**args)
+    DiscourseAi::Automation::LlmTriage.handle(**args)
+  end
+
+  it "does nothing if it does not pass triage" do
+    DiscourseAi::Completions::Llm.with_prepared_responses(["good"]) do
+      triage(
+        post: post,
+        model: "gpt-4",
+        hide_topic: true,
+        system_prompt: "test %%POST%%",
+        search_for_text: "bad",
+      )
+    end
+
+    expect(post.topic.reload.visible).to eq(true)
+  end
+
+  it "can hide topics on triage with claude" do
+    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
+      triage(
+        post: post,
+        model: "claude-2",
+        hide_topic: true,
+        system_prompt: "test %%POST%%",
+        search_for_text: "bad",
+      )
+    end
+
+    expect(post.topic.reload.visible).to eq(false)
+  end
+
+  it "can hide topics on triage with claude" do
+    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
+      triage(
+        post: post,
+        model: "gpt-4",
+        hide_topic: true,
+        system_prompt: "test %%POST%%",
+        search_for_text: "bad",
+      )
+    end
+
+    expect(post.topic.reload.visible).to eq(false)
+  end
+
+  it "can categorize topics on triage" do
+    category = Fabricate(:category)
+
+    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
+      triage(
+        post: post,
+        model: "gpt-4",
+        category_id: category.id,
+        system_prompt: "test %%POST%%",
+        search_for_text: "bad",
+      )
+    end
+
+    expect(post.topic.reload.category_id).to eq(category.id)
+  end
+
+  it "can reply to topics on triage" do
+    user = Fabricate(:user)
+    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
+      triage(
+        post: post,
+        model: "gpt-4",
+        system_prompt: "test %%POST%%",
+        search_for_text: "bad",
+        canned_reply: "test canned reply 123",
+        canned_reply_user: user.username,
+      )
+    end
+
+    reply = post.topic.posts.order(:post_number).last
+
+    expect(reply.raw).to eq("test canned reply 123")
+    expect(reply.user.id).to eq(user.id)
+  end
+end
--- a/spec/lib/modules/automation/report_context_generator_spec.rb
+++ b/spec/lib/modules/automation/report_context_generator_spec.rb
@ -0,0 +1,152 @@
+# frozen_string_literal: true
+
+require "rails_helper"
+
+module DiscourseAi
+  module Automation
+    describe ReportContextGenerator do
+      describe ".generate" do
+        fab!(:private_message_post)
+        fab!(:post_in_other_category) { Fabricate(:post) }
+
+        fab!(:category)
+        fab!(:topic) { Fabricate(:topic, category: category) }
+        fab!(:post_in_category) { Fabricate(:post, topic: topic) }
+        fab!(:reply_in_category) { Fabricate(:post, topic: topic, reply_to_post_number: 1) }
+
+        fab!(:group)
+        fab!(:private_category) { Fabricate(:private_category, group: group) }
+        fab!(:secure_topic) do
+          Fabricate(:topic, title: "category in secure category", category: private_category)
+        end
+        fab!(:user_in_group) { Fabricate(:user, groups: [group]) }
+        fab!(:post_in_private_category) do
+          Fabricate(:post, user: user_in_group, topic: secure_topic)
+        end
+
+        fab!(:tag)
+        fab!(:tag2) { Fabricate(:tag) }
+        fab!(:topic_with_tag) { Fabricate(:topic, tags: [tag, tag2]) }
+        fab!(:post_with_tag) { Fabricate(:post, topic: topic_with_tag) }
+
+        fab!(:long_post) do
+          Fabricate(
+            :post,
+            raw: (1..100).map { |i| "testing#{i}" }.join(" "),
+            topic: Fabricate(:topic, category: category),
+          )
+        end
+
+        fab!(:topic_with_likes) { Fabricate(:topic, like_count: 10) }
+
+        fab!(:post_with_likes) { Fabricate(:post, topic: topic_with_likes, like_count: 10) }
+
+        fab!(:post_with_likes2) { Fabricate(:post, topic: topic_with_likes, like_count: 5) }
+
+        fab!(:post_with_likes3) { Fabricate(:post, topic: topic_with_likes, like_count: 3) }
+
+        if defined?(::DiscourseSolved)
+          it "will correctly denote solved topics" do
+            topic_with_likes.custom_fields[
+              ::DiscourseSolved::ACCEPTED_ANSWER_POST_ID_CUSTOM_FIELD
+            ] = post_with_likes2.id
+            topic_with_likes.save_custom_fields
+
+            context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
+
+            expect(context).to include("solved: true")
+            expect(context).to include("solution: true")
+          end
+        end
+
+        it "always includes info from last posts on topic" do
+          context =
+            ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day, max_posts: 1)
+
+          expect(context).to include("...")
+          expect(context).to include("post_number: 3")
+        end
+
+        it "includes a summary" do
+          context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
+
+          expect(context).to include("New posts: 8")
+          expect(context).to include("New topics: 5")
+        end
+
+        it "orders so most liked are first" do
+          context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
+
+          regex = "topic_id: #{topic_with_likes.id}.*topic_id: #{long_post.topic.id}"
+          expect(context).to match(Regexp.new(regex, Regexp::MULTILINE))
+        end
+
+        it "allows you to prioritize groups" do
+          context =
+            ReportContextGenerator.generate(
+              start_date: 1.day.ago,
+              duration: 2.day,
+              prioritized_group_ids: [group.id],
+              allow_secure_categories: true,
+              max_posts: 1,
+            )
+
+          expect(context).to include(post_in_private_category.topic.title)
+          expect(context).not_to include(post_in_other_category.topic.title)
+          expect(context).to include(group.name)
+        end
+
+        it "can generate context (excluding PMs)" do
+          context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
+
+          expect(context).to include(post_in_other_category.topic.title)
+          expect(context).to include(topic.title)
+          expect(context).not_to include(private_message_post.topic.title)
+          expect(context).not_to include(secure_topic.title)
+        end
+
+        it "can filter on tag" do
+          context =
+            ReportContextGenerator.generate(
+              start_date: 1.day.ago,
+              duration: 2.day,
+              tags: [tag.name],
+            )
+
+          expect(context).not_to include(post_in_other_category.topic.title)
+          expect(context).not_to include(topic.title)
+          expect(context).not_to include(private_message_post.topic.title)
+          expect(context).not_to include(secure_topic.title)
+          expect(context).to include(post_with_tag.topic.title)
+        end
+
+        it "can optionally include secure categories" do
+          context =
+            ReportContextGenerator.generate(
+              start_date: 1.day.ago,
+              duration: 2.day,
+              allow_secure_categories: true,
+            )
+          expect(context).to include(post_in_other_category.topic.title)
+          expect(context).to include(topic.title)
+          expect(context).not_to include(private_message_post.topic.title)
+          expect(context).to include(secure_topic.title)
+        end
+
+        it "can filter to a categories" do
+          context =
+            ReportContextGenerator.generate(
+              start_date: 1.day.ago,
+              duration: 2.day,
+              category_ids: [category.id],
+            )
+
+          expect(context).not_to include(post_in_other_category.topic.title)
+          expect(context).to include(topic.title)
+          expect(context).not_to include(private_message_post.topic.title)
+          expect(context).not_to include(secure_topic.title)
+        end
+      end
+    end
+  end
+end
--- a/spec/lib/modules/automation/report_runner_spec.rb
+++ b/spec/lib/modules/automation/report_runner_spec.rb
@ -0,0 +1,47 @@
+# frozen_string_literal: true
+
+require "rails_helper"
+
+module DiscourseAi
+  module Automation
+    describe ReportRunner do
+      fab!(:user)
+      fab!(:reciever) { Fabricate(:user) }
+      fab!(:post) { Fabricate(:post, user: user) }
+      fab!(:group)
+      fab!(:secure_category) { Fabricate(:private_category, group: group) }
+      fab!(:secure_topic) { Fabricate(:topic, category: secure_category) }
+      fab!(:secure_post) { Fabricate(:post, raw: "Top secret date !!!!", topic: secure_topic) }
+
+      describe "#run!" do
+        it "generates correctly respects the params" do
+          DiscourseAi::Completions::Llm.with_prepared_responses(["magical report"]) do
+            ReportRunner.run!(
+              sender_username: user.username,
+              receivers: [reciever.username],
+              title: "test report",
+              model: "gpt-4",
+              category_ids: nil,
+              tags: nil,
+              allow_secure_categories: false,
+              debug_mode: true,
+              sample_size: 100,
+              instructions: "make a magic report",
+              days: 7,
+              offset: 0,
+              priority_group_id: nil,
+              tokens_per_post: 150,
+            )
+          end
+
+          report = Topic.where(title: "test report").first
+          expect(report.ordered_posts.first.raw).to eq("magical report")
+          debugging = report.ordered_posts.last.raw
+
+          expect(debugging).to include(post.raw)
+          expect(debugging).not_to include(secure_post.raw)
+        end
+      end
+    end
+  end
+end