FEATURE: LLM based peroidical summary report (#357)

Introduce a Discourse Automation based periodical report. Depends on Discourse Automation. Report works best with very large context language models such as GPT-4-Turbo and Claude 2. - Introduces final_insts to generic llm format, for claude to work best it is better to guide the last assistant message (we should add this to other spots as well) - Adds GPT-4 turbo support to generic llm interface
2023-12-19 12:04:15 +11:00 · 2023-12-19 12:04:15 +11:00 · d0f54443ae
parent e0bf6adb5b
commit d0f54443ae
19 changed files with 955 additions and 215 deletions
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -6,12 +6,59 @@ en:
          discourse_ai: "Discourse AI"
  js:
    discourse_automation:
      ai_models:
        gpt_4_turbo: GPT 4 Turbo
        gpt_4: GPT 4
        gpt_3_5_turbo: GPT 3.5 Turbo
        claude_2: Claude 2
        gemini_pro: Gemini Pro
      scriptables:
        llm_report:
          fields:
            sender:
              label: "Sender"
              description: "The user that will send the report"
            receivers:
              label: "Receivers"
              description: "The users that will receive the report (can be email or usernames)"
            title:
              label: "Title"
              description: "The title of the report"
            days:
              label: "Days"
              description: "The timespan of the report"
            offset:
              label: "Offset"
              description: "When testing you may want to run the report historically, use offset to start the report in an earlier date"
            instructions:
              label: "Instructions"
              description: "The instructions provided to the large language model"
            sample_size:
              label: "Sample Size"
              description: "The number of posts to sample for the report"
            tokens_per_post:
              label: "Tokens per post"
              description: "The number of llm tokens to use per post"
            model:
              label: "Model"
              description: "LLM to use for report generation"
            categories:
              label: "Categories"
              description: "Filter topics only to these category"
            tags:
              label: "Tags"
              description: "Filter topics only to these tags"
            allow_secure_categories:
              label: "Allow secure categories"
              description: "Allow the report to be generated for topics in secure categories"
            debug_mode:
              label: "Debug Mode"
              description: "Enable debug mode to see the raw input and output of the LLM"
            priority_group:
              label: "Priority Group"
              description: "Priotize content from this group in the report"
        llm_triage:
          models:
            gpt_4: GPT 4
            gpt_3_5_turbo: GPT 3.5 Turbo
            claude_2: Claude 2
          fields:
            system_prompt:
              label: "System Prompt"
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -5,6 +5,9 @@ en:
        title: Triage posts using AI
        description: "Triage posts using a large language model"
        system_prompt_missing_post_placeholder: "System prompt must contain a placeholder for the post: %%POST%%"
      llm_report:
        title: Periodic report using AI
        description: "Periodic report based on a large language model"
  site_settings:
    discourse_ai_enabled: "Enable the discourse AI plugin."
    ai_toxicity_enabled: "Enable the toxicity module."
--- a/discourse_automation/llm_report.rb
+++ b/discourse_automation/llm_report.rb
@ -0,0 +1,81 @@
 # frozen_string_literal: true
 if defined?(DiscourseAutomation)
  module DiscourseAutomation::LlmReport
  end
  DiscourseAutomation::Scriptable::LLM_REPORT = "llm_report"
  DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_REPORT) do
    version 1
    triggerables %i[recurring]
    field :sender, component: :user, required: true
    field :receivers, component: :users, required: true
    field :title, component: :text, required: true
    field :days, component: :text, required: true, default_value: 7
    field :offset, component: :text, required: true, default_value: 0
    field :instructions,
          component: :message,
          required: true,
          default_value: DiscourseAi::Automation::ReportRunner.default_instructions
    field :sample_size, component: :text, required: true, default_value: 100
    field :tokens_per_post, component: :text, required: true, default_value: 150
    field :model,
          component: :choices,
          required: true,
          extra: {
            content: DiscourseAi::Automation::AVAILABLE_MODELS,
          }
    field :priority_group, component: :group
    field :categories, component: :categories
    field :tags, component: :tags
    field :allow_secure_categories, component: :boolean
    field :debug_mode, component: :boolean
    script do |context, fields, automation|
      begin
        sender = fields.dig("sender", "value")
        receivers = fields.dig("receivers", "value")
        title = fields.dig("title", "value")
        model = fields.dig("model", "value")
        category_ids = fields.dig("categories", "value")
        tags = fields.dig("tags", "value")
        allow_secure_categories = !!fields.dig("allow_secure_categories", "value")
        debug_mode = !!fields.dig("debug_mode", "value")
        sample_size = fields.dig("sample_size", "value")
        instructions = fields.dig("instructions", "value")
        days = fields.dig("days", "value")
        offset = fields.dig("offset", "value").to_i
        priority_group = fields.dig("priority_group", "value")
        tokens_per_post = fields.dig("tokens_per_post", "value")
        DiscourseAi::Automation::ReportRunner.run!(
          sender_username: sender,
          receivers: receivers,
          title: title,
          model: model,
          category_ids: category_ids,
          tags: tags,
          allow_secure_categories: allow_secure_categories,
          debug_mode: debug_mode,
          sample_size: sample_size,
          instructions: instructions,
          days: days,
          offset: offset,
          priority_group_id: priority_group,
          tokens_per_post: tokens_per_post,
        )
      rescue => e
        Discourse.warn_exception e, message: "Error running LLM report!"
        if Rails.env.development?
          p e
          puts e.backtrace
        end
      end
    end
  end
 end
--- a/discourse_automation/llm_triage.rb
+++ b/discourse_automation/llm_triage.rb
@ -1,104 +1,8 @@
 # frozen_string_literal: true
 if defined?(DiscourseAutomation)
  module DiscourseAutomation::LlmTriage
    def self.handle(
      post:,
      model:,
      search_for_text:,
      system_prompt:,
      category_id: nil,
      tags: nil,
      canned_reply: nil,
      canned_reply_user: nil,
      hide_topic: nil
    )
      if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank?
        raise ArgumentError, "llm_triage: no action specified!"
      end
      post_template = +""
      post_template << "title: #{post.topic.title}\n"
      post_template << "#{post.raw}"
      filled_system_prompt = system_prompt.sub("%%POST%%", post_template)
      if filled_system_prompt == system_prompt
        raise ArgumentError, "llm_triage: system_prompt does not contain %%POST%% placeholder"
      end
      result = nil
      if model == "claude-2"
        # allowing double + 10 tokens
        # technically maybe just token count is fine, but this will allow for more creative bad responses
        result =
          DiscourseAi::Inference::AnthropicCompletions.perform!(
            filled_system_prompt,
            model,
            temperature: 0,
            max_tokens:
              DiscourseAi::Tokenizer::AnthropicTokenizer.tokenize(search_for_text).length * 2 + 10,
          ).dig(:completion)
      else
        result =
          DiscourseAi::Inference::OpenAiCompletions.perform!(
            [{ :role => "system", "content" => filled_system_prompt }],
            model,
            temperature: 0,
            max_tokens:
              DiscourseAi::Tokenizer::OpenAiTokenizer.tokenize(search_for_text).length * 2 + 10,
          ).dig(:choices, 0, :message, :content)
      end
      if result.strip == search_for_text.strip
        user = User.find_by_username(canned_reply_user) if canned_reply_user.present?
        user = user || Discourse.system_user
        if canned_reply.present?
          PostCreator.create!(
            user,
            topic_id: post.topic_id,
            raw: canned_reply,
            reply_to_post_number: post.post_number,
            skip_validations: true,
          )
        end
        changes = {}
        changes[:category_id] = category_id if category_id.present?
        changes[:tags] = tags if SiteSetting.tagging_enabled? && tags.present?
        if changes.present?
          first_post = post.topic.posts.where(post_number: 1).first
          changes[:bypass_bump] = true
          changes[:skip_validations] = true
          first_post.revise(Discourse.system_user, changes)
        end
        post.topic.update!(visible: false) if hide_topic
      end
    end
  end
  DiscourseAutomation::Scriptable::LLM_TRIAGE = "llm_triage"
  AVAILABLE_MODELS = [
    {
      id: "gpt-4",
      name:
        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_4",
    },
    {
      id: "gpt-3-5-turbo",
      name:
        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.gpt_3_5_turbo",
    },
    {
      id: "claude-2",
      name:
        "discourse_automation.scriptables.#{DiscourseAutomation::Scriptable::LLM_TRIAGE}.models.claude_2",
    },
  ]
  DiscourseAutomation::Scriptable.add(DiscourseAutomation::Scriptable::LLM_TRIAGE) do
    version 1
    run_in_background
@ -119,7 +23,12 @@ if defined?(DiscourseAutomation)
          end,
          accepts_placeholders: true
    field :search_for_text, component: :text, required: true
-    field :model, component: :choices, required: true, extra: { content: AVAILABLE_MODELS }
+    field :model,
          component: :choices,
          required: true,
          extra: {
            content: DiscourseAi::Automation::AVAILABLE_MODELS,
          }
    field :category, component: :category
    field :tags, component: :tags
    field :hide_topic, component: :boolean
@ -149,7 +58,7 @@ if defined?(DiscourseAutomation)
      end
      begin
-        DiscourseAutomation::LlmTriage.handle(
+        DiscourseAi::Automation::LlmTriage.handle(
          post: post,
          model: model,
          search_for_text: search_for_text,
--- a/lib/automation.rb
+++ b/lib/automation.rb
@ -0,0 +1,13 @@
 # frozen_string_literal: true
 module DiscourseAi
  module Automation
    AVAILABLE_MODELS = [
      { id: "gpt-4-turbo", name: "discourse_automation.ai_models.gpt_4_turbo" },
      { id: "gpt-4", name: "discourse_automation.ai_models.gpt_4" },
      { id: "gpt-3-5-turbo", name: "discourse_automation.ai_models.gpt_3_5_turbo" },
      { id: "claude-2", name: "discourse_automation.ai_models.claude_2" },
      { id: "gemini-pro", name: "discourse_automation.ai_models.gemini_pro" },
    ]
  end
 end
--- a/lib/automation/llm_triage.rb
+++ b/lib/automation/llm_triage.rb
@ -0,0 +1,75 @@
 # frozen_string_literal: true
 #
 module DiscourseAi
  module Automation
    module LlmTriage
      def self.handle(
        post:,
        model:,
        search_for_text:,
        system_prompt:,
        category_id: nil,
        tags: nil,
        canned_reply: nil,
        canned_reply_user: nil,
        hide_topic: nil
      )
        if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank?
          raise ArgumentError, "llm_triage: no action specified!"
        end
        post_template = +""
        post_template << "title: #{post.topic.title}\n"
        post_template << "#{post.raw}"
        filled_system_prompt = system_prompt.sub("%%POST%%", post_template)
        if filled_system_prompt == system_prompt
          raise ArgumentError, "llm_triage: system_prompt does not contain %%POST%% placeholder"
        end
        result = nil
        llm = DiscourseAi::Completions::Llm.proxy(model)
        prompt = {
          insts: filled_system_prompt,
          params: {
            model => {
              max_tokens: (llm.tokenizer.tokenize(search_for_text).length * 2 + 10),
              temperature: 0,
            },
          },
        }
        result = llm.completion!(prompt, Discourse.system_user)
        if result.strip == search_for_text.strip
          user = User.find_by_username(canned_reply_user) if canned_reply_user.present?
          user = user || Discourse.system_user
          if canned_reply.present?
            PostCreator.create!(
              user,
              topic_id: post.topic_id,
              raw: canned_reply,
              reply_to_post_number: post.post_number,
              skip_validations: true,
            )
          end
          changes = {}
          changes[:category_id] = category_id if category_id.present?
          changes[:tags] = tags if SiteSetting.tagging_enabled? && tags.present?
          if changes.present?
            first_post = post.topic.posts.where(post_number: 1).first
            changes[:bypass_bump] = true
            changes[:skip_validations] = true
            first_post.revise(Discourse.system_user, changes)
          end
          post.topic.update!(visible: false) if hide_topic
        end
      end
    end
  end
 end
--- a/lib/automation/report_context_generator.rb
+++ b/lib/automation/report_context_generator.rb
@ -0,0 +1,225 @@
 # frozen_string_literal: true
 module DiscourseAi
  module Automation
    class ReportContextGenerator
      def self.generate(**args)
        new(**args).generate
      end
      def initialize(
        start_date:,
        duration:,
        category_ids: nil,
        tags: nil,
        allow_secure_categories: false,
        max_posts: 200,
        tokens_per_post: 100,
        tokenizer: nil,
        prioritized_group_ids: []
      )
        @start_date = start_date
        @duration = duration
        @category_ids = category_ids
        @tags = tags
        @allow_secure_categories = allow_secure_categories
        @max_posts = max_posts
        @tokenizer = tokenizer || DiscourseAi::Tokenizer::OpenAiTokenizer
        @tokens_per_post = tokens_per_post
        @prioritized_group_ids = prioritized_group_ids
        @posts =
          Post
            .where("posts.created_at >= ?", @start_date)
            .joins(topic: :category)
            .includes(:topic, :user)
            .where("posts.created_at < ?", @start_date + @duration)
            .where("posts.post_type = ?", Post.types[:regular])
            .where("posts.hidden_at IS NULL")
            .where("topics.deleted_at IS NULL")
            .where("topics.archetype = ?", Archetype.default)
        @posts = @posts.where("categories.read_restricted = ?", false) if !@allow_secure_categories
        @posts = @posts.where("categories.id IN (?)", @category_ids) if @category_ids.present?
        if @tags.present?
          tag_ids = Tag.where(name: @tags).select(:id)
          topic_ids_with_tags = TopicTag.where(tag_id: tag_ids).select(:topic_id)
          @posts = @posts.where(topic_id: topic_ids_with_tags)
        end
        @solutions = {}
        if defined?(::DiscourseSolved)
          TopicCustomField
            .where(name: ::DiscourseSolved::ACCEPTED_ANSWER_POST_ID_CUSTOM_FIELD)
            .where(topic_id: @posts.select(:topic_id))
            .pluck(:topic_id, :value)
            .each do |topic_id, post_id|
              @solutions[topic_id] ||= Set.new
              @solutions[topic_id] << post_id.to_i
            end
        end
      end
      def format_topic(topic)
        info = []
        info << ""
        info << "### #{topic.title}"
        info << "topic_id: #{topic.id}"
        info << "solved: true" if @solutions.key?(topic.id)
        info << "category: #{topic.category&.name}"
        tags = topic.tags.pluck(:name)
        info << "tags: #{topic.tags.pluck(:name).join(", ")}" if tags.present?
        info << topic.created_at.strftime("%Y-%m-%d %H:%M")
        { created_at: topic.created_at, info: info.join("\n"), posts: {} }
      end
      def format_post(post)
        buffer = []
        buffer << ""
        buffer << "post_number: #{post.post_number}"
        if @solutions.key?(post.topic_id) && @solutions[post.topic_id].include?(post.id)
          buffer << "solution: true"
        end
        buffer << post.created_at.strftime("%Y-%m-%d %H:%M")
        buffer << "user: #{post.user&.username}"
        buffer << "likes: #{post.like_count}"
        excerpt = @tokenizer.truncate(post.raw, @tokens_per_post)
        excerpt = "excerpt: #{excerpt}..." if excerpt.length < post.raw.length
        buffer << "#{excerpt}"
        { likes: post.like_count, info: buffer.join("\n") }
      end
      def format_summary
        topic_count =
          @posts
            .where("topics.created_at > ?", @start_date)
            .select(:topic_id)
            .distinct(:topic_id)
            .count
        buffer = []
        buffer << "Start Date: #{@start_date.to_date}"
        buffer << "End Date: #{(@start_date + @duration).to_date}"
        buffer << "New posts: #{@posts.count}"
        buffer << "New topics: #{topic_count}"
        top_users =
          Post
            .where(id: @posts.select(:id))
            .joins(:user)
            .group(:user_id, :username)
            .select(
              "user_id, username, sum(posts.like_count) like_count, count(posts.id) post_count",
            )
            .order("sum(posts.like_count) desc")
            .limit(10)
        buffer << "Top users:"
        top_users.each do |user|
          buffer << "@#{user.username} (#{user.like_count} likes, #{user.post_count} posts)"
        end
        if @prioritized_group_ids.present?
          group_names =
            Group
              .where(id: @prioritized_group_ids)
              .pluck(:name, :full_name)
              .map do |name, full_name|
                if full_name.present?
                  "#{name} (#{full_name[0..100].gsub("\n", " ")})"
                else
                  name
                end
              end
              .join(", ")
          buffer << ""
          buffer << "Top users in #{group_names} group#{group_names.include?(",") ? "s" : ""}:"
          group_users = GroupUser.where(group_id: @prioritized_group_ids).select(:user_id)
          top_users
            .where(user_id: group_users)
            .each do |user|
              buffer << "@#{user.username} (#{user.like_count} likes, #{user.post_count} posts)"
            end
        end
        buffer.join("\n")
      end
      def format_topics
        buffer = []
        topics = {}
        post_count = 0
        @posts = @posts.order("posts.like_count desc, posts.created_at desc")
        if @prioritized_group_ids.present?
          user_groups = GroupUser.where(group_id: @prioritized_group_ids)
          prioritized_posts = @posts.where(user_id: user_groups.select(:user_id)).limit(@max_posts)
          post_count += add_posts(prioritized_posts, topics)
        end
        add_posts(@posts.limit(@max_posts), topics, limit: @max_posts - post_count)
        # we need last posts in all topics
        # they may have important info
        last_posts =
          @posts.where("posts.post_number = topics.highest_post_number").where(
            "topics.id IN (?)",
            topics.keys,
          )
        add_posts(last_posts, topics)
        topics.each do |topic_id, topic_info|
          topic_info[:post_likes] = topic_info[:posts].sum { |_, post_info| post_info[:likes] }
        end
        topics = topics.sort { |a, b| b[1][:post_likes] <=> a[1][:post_likes] }
        topics.each do |topic_id, topic_info|
          buffer << topic_info[:info]
          last_post_number = 0
          topic_info[:posts]
            .sort { |a, b| a[0] <=> b[0] }
            .each do |post_number, post_info|
              buffer << "\n..." if post_number > last_post_number + 1
              buffer << post_info[:info]
              last_post_number = post_number
            end
        end
        buffer.join("\n")
      end
      def generate
        buffer = []
        buffer << "## Summary"
        buffer << format_summary
        buffer << "\n## Topics"
        buffer << format_topics
        buffer.join("\n")
      end
      def add_posts(relation, topics, limit: nil)
        post_count = 0
        relation.each do |post|
          topics[post.topic_id] ||= format_topic(post.topic)
          if !topics[post.topic_id][:posts][post.post_number]
            topics[post.topic_id][:posts][post.post_number] = format_post(post)
            post_count += 1
            limit -= 1 if limit
          end
          break if limit && limit <= 0
        end
        post_count
      end
    end
  end
 end
--- a/lib/automation/report_runner.rb
+++ b/lib/automation/report_runner.rb
@ -0,0 +1,146 @@
 # frozen_string_literal: true
 module DiscourseAi
  module Automation
    class ReportRunner
      def self.default_instructions
        # not localizing for now cause non English LLM will require
        # a fair bit of experimentation
        <<~TEXT
        Generate report:
        ## Report Guidelines:
        - Length & Style: Aim for 12 dense paragraphs in a narrative style, focusing on internal forum discussions.
        - Accuracy: Only include verified information with no embellishments.
        - Sourcing: ALWAYS Back statements with links to forum discussions.
        - Markdown Usage: Enhance readability with **bold**, *italic*, and > quotes.
        - Linking: Use `#{Discourse.base_url}/t/-/TOPIC_ID/POST_NUMBER` for direct references.
        - User Mentions: Reference users with @USERNAME
        - Context tips: Staff are denoted with Username *. For example: jane * means that jane is a staff member. Do not render the * in the report.
        - Add many topic links: strive to link to at least 30 topics in the report. Topic Id is meaningless to end users if you need to throw in a link use [ref](...) or better still just embed it into the [sentence](...)
        - Categories and tags: use the format #TAG and #CATEGORY to denote tags and categories
        ## Structure:
        - Key statistics: Specify date range, call out important stats like number of new topics and posts
        - Overview: Briefly state trends within period.
        - Highlighted content: 5 paragaraphs highlighting important topics people should know about. If possible have each paragraph link to multiple related topics.
        - Key insights and trends linking to a selection of posts that back them
        TEXT
      end
      def self.run!(**args)
        new(**args).run!
      end
      def initialize(
        sender_username:,
        receivers:,
        title:,
        model:,
        category_ids:,
        tags:,
        allow_secure_categories:,
        debug_mode:,
        sample_size:,
        instructions:,
        days:,
        offset:,
        priority_group_id:,
        tokens_per_post:
      )
        @sender = User.find_by(username: sender_username)
        @receivers = User.where(username: receivers)
        @title = title
        @model = model
        @llm = DiscourseAi::Completions::Llm.proxy(model)
        @category_ids = category_ids
        @tags = tags
        @allow_secure_categories = allow_secure_categories
        @debug_mode = debug_mode
        @sample_size = sample_size.to_i < 10 ? 10 : sample_size.to_i
        @instructions = instructions
        @days = days.to_i
        @offset = offset.to_i
        @priority_group_id = priority_group_id
        @tokens_per_post = tokens_per_post.to_i
      end
      def run!
        start_date = (@offset + @days).days.ago
        prioritized_group_ids = [@priority_group_id] if @priority_group_id.present?
        context =
          DiscourseAi::Automation::ReportContextGenerator.generate(
            start_date: start_date,
            duration: @days.days,
            max_posts: @sample_size,
            tags: @tags,
            category_ids: @category_ids,
            prioritized_group_ids: prioritized_group_ids,
            allow_secure_categories: @allow_secure_categories,
            tokens_per_post: @tokens_per_post,
            tokenizer: @llm.tokenizer,
          )
        input = <<~INPUT
          #{@instructions}
          <context>
          #{context}
          </context>
          #{@instructions}
        INPUT
        prompt = {
          insts: "You are a helpful bot specializing in summarizing activity Discourse sites",
          input: input,
          final_insts: "Here is the report I generated for you",
          params: {
            @model => {
              temperature: 0,
            },
          },
        }
        result = +""
        puts if Rails.env.development? && @debug_mode
        @llm.completion!(prompt, Discourse.system_user) do |response|
          print response if Rails.env.development? && @debug_mode
          result << response
        end
        post =
          PostCreator.create!(
            @sender,
            raw: result,
            title: @title,
            archetype: Archetype.private_message,
            target_usernames: @receivers.map(&:username).join(","),
            skip_validations: true,
          )
        if @debug_mode
          input = input.split("\n").map { |line| "    #{line}" }.join("\n")
          raw = <<~RAW
            ```
            start_date: #{start_date},
            duration: #{@days.days},
            max_posts: #{@sample_size},
            tags: #{@tags},
            category_ids: #{@category_ids},
            priority_group: #{@priority_group_id}
            LLM context was:
            ```
            #{input}
          RAW
          PostCreator.create!(@sender, raw: raw, topic_id: post.topic_id, skip_validations: true)
        end
      end
    end
  end
 end
--- a/lib/completions/dialects/chat_gpt.rb
+++ b/lib/completions/dialects/chat_gpt.rb
@ -6,7 +6,14 @@ module DiscourseAi
      class ChatGpt < Dialect
        class << self
          def can_translate?(model_name)
-            %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
+            %w[
              gpt-3.5-turbo
              gpt-4
              gpt-3.5-turbo-16k
              gpt-4-32k
              gpt-4-1106-preview
              gpt-4-turbo
            ].include?(model_name)
          end
          def tokenizer
--- a/lib/completions/dialects/claude.rb
+++ b/lib/completions/dialects/claude.rb
@ -27,7 +27,9 @@ module DiscourseAi
          claude_prompt << "#{prompt[:post_insts]}\n" if prompt[:post_insts]
-          claude_prompt << "Assistant:\n"
+          claude_prompt << "Assistant:"
          claude_prompt << " #{prompt[:final_insts]}:" if prompt[:final_insts]
          claude_prompt << "\n"
        end
        def max_prompt_tokens
--- a/lib/completions/dialects/dialect.rb
+++ b/lib/completions/dialects/dialect.rb
@ -17,9 +17,10 @@ module DiscourseAi
              DiscourseAi::Completions::Dialects::OrcaStyle,
              DiscourseAi::Completions::Dialects::Gemini,
            ]
-            dialects.detect(-> { raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL }) do |d|
+
-              d.can_translate?(model_name)
+            dialect = dialects.find { |d| d.can_translate?(model_name) }
-            end
+            raise DiscourseAi::Completions::Llm::UNKNOWN_MODEL if !dialect
            dialect
          end
          def tokenizer
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@ -5,11 +5,18 @@ module DiscourseAi
    module Endpoints
      class OpenAi < Base
        def self.can_contact?(model_name)
-          %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name)
+          %w[
            gpt-3.5-turbo
            gpt-4
            gpt-3.5-turbo-16k
            gpt-4-32k
            gpt-4-1106-preview
            gpt-4-turbo
          ].include?(model_name)
        end
        def default_options
-          { model: model }
+          { model: model == "gpt-4-turbo" ? "gpt-4-1106-preview" : model }
        end
        def provider_id
@ -24,7 +31,11 @@ module DiscourseAi
              if model.include?("32k")
                SiteSetting.ai_openai_gpt4_32k_url
              else
-                SiteSetting.ai_openai_gpt4_url
+                if model.include?("1106") || model.include?("turbo")
                  SiteSetting.ai_openai_gpt4_turbo_url
                else
                  SiteSetting.ai_openai_gpt4_url
                end
              end
            else
              if model.include?("16k")
--- a/lib/summarization/entry_point.rb
+++ b/lib/summarization/entry_point.rb
@ -7,6 +7,7 @@ module DiscourseAi
        foldable_models = [
          Models::OpenAi.new("gpt-4", max_tokens: 8192),
          Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768),
          Models::OpenAi.new("gpt-4-1106-preview", max_tokens: 100_000),
          Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
          Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
          Models::Anthropic.new("claude-2", max_tokens: 100_000),
--- a/plugin.rb
+++ b/plugin.rb
@ -40,6 +40,7 @@ register_svg_icon "meh"
 after_initialize do
  # do not autoload this cause we may have no namespace
  require_relative "discourse_automation/llm_triage"
  require_relative "discourse_automation/llm_report"
  add_admin_route "discourse_ai.title", "discourse-ai"
--- a/spec/lib/discourse_automation/llm_report_spec.rb
+++ b/spec/lib/discourse_automation/llm_report_spec.rb
@ -0,0 +1,34 @@
 # frozen_string_literal: true
 return if !defined?(DiscourseAutomation)
 describe DiscourseAutomation do
  let(:automation) { Fabricate(:automation, script: "llm_report", enabled: true) }
  def add_automation_field(name, value, type: "text")
    automation.fields.create!(
      component: type,
      name: name,
      metadata: {
        value: value,
      },
      target: "script",
    )
  end
  it "can trigger via automation" do
    user = Fabricate(:user)
    add_automation_field("sender", user.username, type: "user")
    add_automation_field("receivers", [user.username], type: "users")
    add_automation_field("model", "gpt-4-turbo")
    add_automation_field("title", "Weekly report")
    DiscourseAi::Completions::Llm.with_prepared_responses(["An Amazing Report!!!"]) do
      automation.trigger!
    end
    pm = Topic.where(title: "Weekly report").first
    expect(pm.posts.first.raw).to eq("An Amazing Report!!!")
  end
 end
--- a/spec/lib/discourse_automation/llm_triage_spec.rb
+++ b/spec/lib/discourse_automation/llm_triage_spec.rb
@ -2,106 +2,9 @@
 return if !defined?(DiscourseAutomation)
-describe DiscourseAutomation::LlmTriage do
+describe DiscourseAi::Automation::LlmTriage do
  fab!(:post) { Fabricate(:post) }
  def triage(**args)
    DiscourseAutomation::LlmTriage.handle(**args)
  end
  it "does nothing if it does not pass triage" do
    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      status: 200,
      body: { choices: [{ message: { content: "good" } }] }.to_json,
    )
    triage(
      post: post,
      model: "gpt-4",
      hide_topic: true,
      system_prompt: "test %%POST%%",
      search_for_text: "bad",
    )
    expect(post.topic.reload.visible).to eq(true)
  end
  it "can hide topics on triage with claude" do
    stub_request(:post, "https://api.anthropic.com/v1/complete").to_return(
      status: 200,
      body: { completion: "bad" }.to_json,
    )
    triage(
      post: post,
      model: "claude-2",
      hide_topic: true,
      system_prompt: "test %%POST%%",
      search_for_text: "bad",
    )
    expect(post.topic.reload.visible).to eq(false)
  end
  it "can hide topics on triage with claude" do
    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      status: 200,
      body: { choices: [{ message: { content: "bad" } }] }.to_json,
    )
    triage(
      post: post,
      model: "gpt-4",
      hide_topic: true,
      system_prompt: "test %%POST%%",
      search_for_text: "bad",
    )
    expect(post.topic.reload.visible).to eq(false)
  end
  it "can categorize topics on triage" do
    category = Fabricate(:category)
    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      status: 200,
      body: { choices: [{ message: { content: "bad" } }] }.to_json,
    )
    triage(
      post: post,
      model: "gpt-4",
      category_id: category.id,
      system_prompt: "test %%POST%%",
      search_for_text: "bad",
    )
    expect(post.topic.reload.category_id).to eq(category.id)
  end
  it "can reply to topics on triage" do
    user = Fabricate(:user)
    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
      status: 200,
      body: { choices: [{ message: { content: "bad" } }] }.to_json,
    )
    triage(
      post: post,
      model: "gpt-4",
      system_prompt: "test %%POST%%",
      search_for_text: "bad",
      canned_reply: "test canned reply 123",
      canned_reply_user: user.username,
    )
    reply = post.topic.posts.order(:post_number).last
    expect(reply.raw).to eq("test canned reply 123")
    expect(reply.user.id).to eq(user.id)
  end
  let(:automation) { Fabricate(:automation, script: "llm_triage", enabled: true) }
  def add_automation_field(name, value, type: "text")
@ -130,13 +33,10 @@ describe DiscourseAutomation::LlmTriage do
    add_automation_field("canned_reply", "Yo this is a reply")
    add_automation_field("canned_reply_user", user.username, type: "user")
-    stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
+    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
-      status: 200,
+      automation.running_in_background!
-      body: { choices: [{ message: { content: "bad" } }] }.to_json,
+      automation.trigger!({ "post" => post })
-    )
+    end
    automation.running_in_background!
    automation.trigger!({ "post" => post })
    topic = post.topic.reload
    expect(topic.category_id).to eq(category.id)
--- a/spec/lib/modules/automation/llm_triage_spec.rb
+++ b/spec/lib/modules/automation/llm_triage_spec.rb
@ -0,0 +1,85 @@
 # frozen_string_literal: true
 describe DiscourseAi::Automation::LlmTriage do
  fab!(:post) { Fabricate(:post) }
  def triage(**args)
    DiscourseAi::Automation::LlmTriage.handle(**args)
  end
  it "does nothing if it does not pass triage" do
    DiscourseAi::Completions::Llm.with_prepared_responses(["good"]) do
      triage(
        post: post,
        model: "gpt-4",
        hide_topic: true,
        system_prompt: "test %%POST%%",
        search_for_text: "bad",
      )
    end
    expect(post.topic.reload.visible).to eq(true)
  end
  it "can hide topics on triage with claude" do
    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
      triage(
        post: post,
        model: "claude-2",
        hide_topic: true,
        system_prompt: "test %%POST%%",
        search_for_text: "bad",
      )
    end
    expect(post.topic.reload.visible).to eq(false)
  end
  it "can hide topics on triage with claude" do
    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
      triage(
        post: post,
        model: "gpt-4",
        hide_topic: true,
        system_prompt: "test %%POST%%",
        search_for_text: "bad",
      )
    end
    expect(post.topic.reload.visible).to eq(false)
  end
  it "can categorize topics on triage" do
    category = Fabricate(:category)
    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
      triage(
        post: post,
        model: "gpt-4",
        category_id: category.id,
        system_prompt: "test %%POST%%",
        search_for_text: "bad",
      )
    end
    expect(post.topic.reload.category_id).to eq(category.id)
  end
  it "can reply to topics on triage" do
    user = Fabricate(:user)
    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
      triage(
        post: post,
        model: "gpt-4",
        system_prompt: "test %%POST%%",
        search_for_text: "bad",
        canned_reply: "test canned reply 123",
        canned_reply_user: user.username,
      )
    end
    reply = post.topic.posts.order(:post_number).last
    expect(reply.raw).to eq("test canned reply 123")
    expect(reply.user.id).to eq(user.id)
  end
 end
--- a/spec/lib/modules/automation/report_context_generator_spec.rb
+++ b/spec/lib/modules/automation/report_context_generator_spec.rb
@ -0,0 +1,152 @@
 # frozen_string_literal: true
 require "rails_helper"
 module DiscourseAi
  module Automation
    describe ReportContextGenerator do
      describe ".generate" do
        fab!(:private_message_post)
        fab!(:post_in_other_category) { Fabricate(:post) }
        fab!(:category)
        fab!(:topic) { Fabricate(:topic, category: category) }
        fab!(:post_in_category) { Fabricate(:post, topic: topic) }
        fab!(:reply_in_category) { Fabricate(:post, topic: topic, reply_to_post_number: 1) }
        fab!(:group)
        fab!(:private_category) { Fabricate(:private_category, group: group) }
        fab!(:secure_topic) do
          Fabricate(:topic, title: "category in secure category", category: private_category)
        end
        fab!(:user_in_group) { Fabricate(:user, groups: [group]) }
        fab!(:post_in_private_category) do
          Fabricate(:post, user: user_in_group, topic: secure_topic)
        end
        fab!(:tag)
        fab!(:tag2) { Fabricate(:tag) }
        fab!(:topic_with_tag) { Fabricate(:topic, tags: [tag, tag2]) }
        fab!(:post_with_tag) { Fabricate(:post, topic: topic_with_tag) }
        fab!(:long_post) do
          Fabricate(
            :post,
            raw: (1..100).map { |i| "testing#{i}" }.join(" "),
            topic: Fabricate(:topic, category: category),
          )
        end
        fab!(:topic_with_likes) { Fabricate(:topic, like_count: 10) }
        fab!(:post_with_likes) { Fabricate(:post, topic: topic_with_likes, like_count: 10) }
        fab!(:post_with_likes2) { Fabricate(:post, topic: topic_with_likes, like_count: 5) }
        fab!(:post_with_likes3) { Fabricate(:post, topic: topic_with_likes, like_count: 3) }
        if defined?(::DiscourseSolved)
          it "will correctly denote solved topics" do
            topic_with_likes.custom_fields[
              ::DiscourseSolved::ACCEPTED_ANSWER_POST_ID_CUSTOM_FIELD
            ] = post_with_likes2.id
            topic_with_likes.save_custom_fields
            context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
            expect(context).to include("solved: true")
            expect(context).to include("solution: true")
          end
        end
        it "always includes info from last posts on topic" do
          context =
            ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day, max_posts: 1)
          expect(context).to include("...")
          expect(context).to include("post_number: 3")
        end
        it "includes a summary" do
          context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
          expect(context).to include("New posts: 8")
          expect(context).to include("New topics: 5")
        end
        it "orders so most liked are first" do
          context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
          regex = "topic_id: #{topic_with_likes.id}.*topic_id: #{long_post.topic.id}"
          expect(context).to match(Regexp.new(regex, Regexp::MULTILINE))
        end
        it "allows you to prioritize groups" do
          context =
            ReportContextGenerator.generate(
              start_date: 1.day.ago,
              duration: 2.day,
              prioritized_group_ids: [group.id],
              allow_secure_categories: true,
              max_posts: 1,
            )
          expect(context).to include(post_in_private_category.topic.title)
          expect(context).not_to include(post_in_other_category.topic.title)
          expect(context).to include(group.name)
        end
        it "can generate context (excluding PMs)" do
          context = ReportContextGenerator.generate(start_date: 1.day.ago, duration: 2.day)
          expect(context).to include(post_in_other_category.topic.title)
          expect(context).to include(topic.title)
          expect(context).not_to include(private_message_post.topic.title)
          expect(context).not_to include(secure_topic.title)
        end
        it "can filter on tag" do
          context =
            ReportContextGenerator.generate(
              start_date: 1.day.ago,
              duration: 2.day,
              tags: [tag.name],
            )
          expect(context).not_to include(post_in_other_category.topic.title)
          expect(context).not_to include(topic.title)
          expect(context).not_to include(private_message_post.topic.title)
          expect(context).not_to include(secure_topic.title)
          expect(context).to include(post_with_tag.topic.title)
        end
        it "can optionally include secure categories" do
          context =
            ReportContextGenerator.generate(
              start_date: 1.day.ago,
              duration: 2.day,
              allow_secure_categories: true,
            )
          expect(context).to include(post_in_other_category.topic.title)
          expect(context).to include(topic.title)
          expect(context).not_to include(private_message_post.topic.title)
          expect(context).to include(secure_topic.title)
        end
        it "can filter to a categories" do
          context =
            ReportContextGenerator.generate(
              start_date: 1.day.ago,
              duration: 2.day,
              category_ids: [category.id],
            )
          expect(context).not_to include(post_in_other_category.topic.title)
          expect(context).to include(topic.title)
          expect(context).not_to include(private_message_post.topic.title)
          expect(context).not_to include(secure_topic.title)
        end
      end
    end
  end
 end
--- a/spec/lib/modules/automation/report_runner_spec.rb
+++ b/spec/lib/modules/automation/report_runner_spec.rb
@ -0,0 +1,47 @@
 # frozen_string_literal: true
 require "rails_helper"
 module DiscourseAi
  module Automation
    describe ReportRunner do
      fab!(:user)
      fab!(:reciever) { Fabricate(:user) }
      fab!(:post) { Fabricate(:post, user: user) }
      fab!(:group)
      fab!(:secure_category) { Fabricate(:private_category, group: group) }
      fab!(:secure_topic) { Fabricate(:topic, category: secure_category) }
      fab!(:secure_post) { Fabricate(:post, raw: "Top secret date !!!!", topic: secure_topic) }
      describe "#run!" do
        it "generates correctly respects the params" do
          DiscourseAi::Completions::Llm.with_prepared_responses(["magical report"]) do
            ReportRunner.run!(
              sender_username: user.username,
              receivers: [reciever.username],
              title: "test report",
              model: "gpt-4",
              category_ids: nil,
              tags: nil,
              allow_secure_categories: false,
              debug_mode: true,
              sample_size: 100,
              instructions: "make a magic report",
              days: 7,
              offset: 0,
              priority_group_id: nil,
              tokens_per_post: 150,
            )
          end
          report = Topic.where(title: "test report").first
          expect(report.ordered_posts.first.raw).to eq("magical report")
          debugging = report.ordered_posts.last.raw
          expect(debugging).to include(post.raw)
          expect(debugging).not_to include(secure_post.raw)
        end
      end
    end
  end
 end