discourse-ai/evals/lib/llm.rb

# frozen_string_literal: true

class DiscourseAi::Evals::Llm
  CONFIGS = {
    "gpt-4o" => {
      display_name: "GPT-4o",
      name: "gpt-4o",
      tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer",
      api_key_env: "OPENAI_API_KEY",
      provider: "open_ai",
      url: "https://api.openai.com/v1/chat/completions",
      max_prompt_tokens: 131_072,
      vision_enabled: true,
    },
    "gpt-4o-mini" => {
      display_name: "GPT-4o-mini",
      name: "gpt-4o-mini",
      tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer",
      api_key_env: "OPENAI_API_KEY",
      provider: "open_ai",
      url: "https://api.openai.com/v1/chat/completions",
      max_prompt_tokens: 131_072,
      vision_enabled: true,
    },
    "claude-3.5-haiku" => {
      display_name: "Claude 3.5 Haiku",
      name: "claude-3-5-haiku-latest",
      tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer",
      api_key_env: "ANTHROPIC_API_KEY",
      provider: "anthropic",
      url: "https://api.anthropic.com/v1/messages",
      max_prompt_tokens: 200_000,
      vision_enabled: false,
    },
    "claude-3.5-sonnet" => {
      display_name: "Claude 3.5 Sonnet",
      name: "claude-3-5-sonnet-latest",
      tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer",
      api_key_env: "ANTHROPIC_API_KEY",
      provider: "anthropic",
      url: "https://api.anthropic.com/v1/messages",
      max_prompt_tokens: 200_000,
      vision_enabled: true,
    },
    "gemini-2.0-flash" => {
      display_name: "Gemini 2.0 Flash",
      name: "gemini-2-0-flash",
      tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer",
      api_key_env: "GEMINI_API_KEY",
      provider: "google",
      url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash",
      max_prompt_tokens: 1_000_000,
      vision_enabled: true,
    },
    "gemini-2.0-pro-exp" => {
      display_name: "Gemini 2.0 pro",
      name: "gemini-2-0-pro-exp",
      tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer",
      api_key_env: "GEMINI_API_KEY",
      provider: "google",
      url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp",
      max_prompt_tokens: 1_000_000,
      vision_enabled: true,
    },
  }

  def self.print
    CONFIGS
      .keys
      .map do |config_name|
        begin
          new(config_name)
        rescue StandardError
          nil
        end
      end
      .compact
      .each { |llm| puts "#{llm.config_name}: #{llm.name} (#{llm.provider})" }
  end

  def self.choose(config_name)
    if CONFIGS[config_name].nil?
      CONFIGS
        .keys
        .map do |config_name|
          begin
            new(config_name)
          rescue => e
            puts "Error initializing #{config_name}: #{e}"
            nil
          end
        end
        .compact
    elsif !CONFIGS.include?(config_name)
      raise "Invalid llm"
    else
      [new(config_name)]
    end
  end

  attr_reader :llm_model
  attr_reader :llm_proxy
  attr_reader :config_name

  def initialize(config_name)
    config = CONFIGS[config_name].dup
    api_key_env = config.delete(:api_key_env)
    if !ENV[api_key_env]
      raise "Missing API key for #{config_name}, should be set via #{api_key_env}"
    end

    config[:api_key] = ENV[api_key_env]
    @llm_model = LlmModel.new(config)
    @llm_proxy = DiscourseAi::Completions::Llm.proxy(@llm_model)
    @config_name = config_name
  end

  def provider
    @llm_model.provider
  end

  def name
    @llm_model.display_name
  end

  def vision?
    @llm_model.vision_enabled
  end
end
FEATURE: PDF support for rag pipeline (#1118) This PR introduces several enhancements and refactorings to the AI Persona and RAG (Retrieval-Augmented Generation) functionalities within the discourse-ai plugin. Here's a breakdown of the changes: 1. LLM Model Association for RAG and Personas: - New Database Columns: Adds `rag_llm_model_id` to both `ai_personas` and `ai_tools` tables. This allows specifying a dedicated LLM for RAG indexing, separate from the persona's primary LLM. Adds `default_llm_id` and `question_consolidator_llm_id` to `ai_personas`. - Migration: Includes a migration (`20250210032345_migrate_persona_to_llm_model_id.rb`) to populate the new `default_llm_id` and `question_consolidator_llm_id` columns in `ai_personas` based on the existing `default_llm` and `question_consolidator_llm` string columns, and a post migration to remove the latter. - Model Changes: The `AiPersona` and `AiTool` models now `belong_to` an `LlmModel` via `rag_llm_model_id`. The `LlmModel.proxy` method now accepts an `LlmModel` instance instead of just an identifier. `AiPersona` now has `default_llm_id` and `question_consolidator_llm_id` attributes. - UI Updates: The AI Persona and AI Tool editors in the admin panel now allow selecting an LLM for RAG indexing (if PDF/image support is enabled). The RAG options component displays an LLM selector. - Serialization: The serializers (`AiCustomToolSerializer`, `AiCustomToolListSerializer`, `LocalizedAiPersonaSerializer`) have been updated to include the new `rag_llm_model_id`, `default_llm_id` and `question_consolidator_llm_id` attributes. 2. PDF and Image Support for RAG: - Site Setting: Introduces a new hidden site setting, `ai_rag_pdf_images_enabled`, to control whether PDF and image files can be indexed for RAG. This defaults to `false`. - File Upload Validation: The `RagDocumentFragmentsController` now checks the `ai_rag_pdf_images_enabled` setting and allows PDF, PNG, JPG, and JPEG files if enabled. Error handling is included for cases where PDF/image indexing is attempted with the setting disabled. - PDF Processing: Adds a new utility class, `DiscourseAi::Utils::PdfToImages`, which uses ImageMagick (`magick`) to convert PDF pages into individual PNG images. A maximum PDF size and conversion timeout are enforced. - Image Processing: A new utility class, `DiscourseAi::Utils::ImageToText`, is included to handle OCR for the images and PDFs. - RAG Digestion Job: The `DigestRagUpload` job now handles PDF and image uploads. It uses `PdfToImages` and `ImageToText` to extract text and create document fragments. - UI Updates: The RAG uploader component now accepts PDF and image file types if `ai_rag_pdf_images_enabled` is true. The UI text is adjusted to indicate supported file types. 3. Refactoring and Improvements: - LLM Enumeration: The `DiscourseAi::Configuration::LlmEnumerator` now provides a `values_for_serialization` method, which returns a simplified array of LLM data (id, name, vision_enabled) suitable for use in serializers. This avoids exposing unnecessary details to the frontend. - AI Helper: The `AiHelper::Assistant` now takes optional `helper_llm` and `image_caption_llm` parameters in its constructor, allowing for greater flexibility. - Bot and Persona Updates: Several updates were made across the codebase, changing the string based association to a LLM to the new model based. - Audit Logs: The `DiscourseAi::Completions::Endpoints::Base` now formats raw request payloads as pretty JSON for easier auditing. - Eval Script: An evaluation script is included. 4. Testing: - The PR introduces a new eval system for LLMs, this allows us to test how functionality works across various LLM providers. This lives in `/evals` 2025-02-14 12:15:07 +11:00			`# frozen_string_literal: true`

			`class DiscourseAi::Evals::Llm`
			`CONFIGS = {`
			`"gpt-4o" => {`
			`display_name: "GPT-4o",`
			`name: "gpt-4o",`
			`tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer",`
			`api_key_env: "OPENAI_API_KEY",`
			`provider: "open_ai",`
			`url: "https://api.openai.com/v1/chat/completions",`
			`max_prompt_tokens: 131_072,`
			`vision_enabled: true,`
			`},`
			`"gpt-4o-mini" => {`
			`display_name: "GPT-4o-mini",`
			`name: "gpt-4o-mini",`
			`tokenizer: "DiscourseAi::Tokenizer::OpenAiTokenizer",`
			`api_key_env: "OPENAI_API_KEY",`
			`provider: "open_ai",`
			`url: "https://api.openai.com/v1/chat/completions",`
			`max_prompt_tokens: 131_072,`
			`vision_enabled: true,`
			`},`
			`"claude-3.5-haiku" => {`
			`display_name: "Claude 3.5 Haiku",`
			`name: "claude-3-5-haiku-latest",`
			`tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer",`
			`api_key_env: "ANTHROPIC_API_KEY",`
			`provider: "anthropic",`
			`url: "https://api.anthropic.com/v1/messages",`
			`max_prompt_tokens: 200_000,`
			`vision_enabled: false,`
			`},`
			`"claude-3.5-sonnet" => {`
			`display_name: "Claude 3.5 Sonnet",`
			`name: "claude-3-5-sonnet-latest",`
			`tokenizer: "DiscourseAi::Tokenizer::AnthropicTokenizer",`
			`api_key_env: "ANTHROPIC_API_KEY",`
			`provider: "anthropic",`
			`url: "https://api.anthropic.com/v1/messages",`
			`max_prompt_tokens: 200_000,`
			`vision_enabled: true,`
			`},`
			`"gemini-2.0-flash" => {`
			`display_name: "Gemini 2.0 Flash",`
			`name: "gemini-2-0-flash",`
			`tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer",`
			`api_key_env: "GEMINI_API_KEY",`
			`provider: "google",`
			`url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash",`
			`max_prompt_tokens: 1_000_000,`
			`vision_enabled: true,`
			`},`
			`"gemini-2.0-pro-exp" => {`
			`display_name: "Gemini 2.0 pro",`
			`name: "gemini-2-0-pro-exp",`
			`tokenizer: "DiscourseAi::Tokenizer::GeminiTokenizer",`
			`api_key_env: "GEMINI_API_KEY",`
			`provider: "google",`
			`url: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp",`
			`max_prompt_tokens: 1_000_000,`
			`vision_enabled: true,`
			`},`
			`}`

			`def self.print`
			`CONFIGS`
			`.keys`
			`.map do \|config_name\|`
			`begin`
			`new(config_name)`
			`rescue StandardError`
			`nil`
			`end`
			`end`
			`.compact`
			`.each { \|llm\| puts "#{llm.config_name}: #{llm.name} (#{llm.provider})" }`
			`end`

			`def self.choose(config_name)`
			`if CONFIGS[config_name].nil?`
			`CONFIGS`
			`.keys`
			`.map do \|config_name\|`
			`begin`
			`new(config_name)`
			`rescue => e`
			`puts "Error initializing #{config_name}: #{e}"`
			`nil`
			`end`
			`end`
			`.compact`
			`elsif !CONFIGS.include?(config_name)`
			`raise "Invalid llm"`
			`else`
			`[new(config_name)]`
			`end`
			`end`

			`attr_reader :llm_model`
			`attr_reader :llm_proxy`
			`attr_reader :config_name`

			`def initialize(config_name)`
			`config = CONFIGS[config_name].dup`
			`api_key_env = config.delete(:api_key_env)`
			`if !ENV[api_key_env]`
			`raise "Missing API key for #{config_name}, should be set via #{api_key_env}"`
			`end`

			`config[:api_key] = ENV[api_key_env]`
			`@llm_model = LlmModel.new(config)`
			`@llm_proxy = DiscourseAi::Completions::Llm.proxy(@llm_model)`
			`@config_name = config_name`
			`end`

			`def provider`
			`@llm_model.provider`
			`end`

			`def name`
			`@llm_model.display_name`
			`end`

			`def vision?`
			`@llm_model.vision_enabled`
			`end`
			`end`