discourse-ai/app/models/llm_quota_usage.rb

# frozen_string_literal: true

class LlmQuotaUsage < ActiveRecord::Base
  self.table_name = "llm_quota_usages"

  QuotaExceededError = Class.new(StandardError)

  belongs_to :user
  belongs_to :llm_quota

  validates :user_id, presence: true
  validates :llm_quota_id, presence: true
  validates :input_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
  validates :output_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }
  validates :usages, presence: true, numericality: { greater_than_or_equal_to: 0 }
  validates :started_at, presence: true
  validates :reset_at, presence: true

  def self.find_or_create_for(user:, llm_quota:)
    usage = find_or_initialize_by(user: user, llm_quota: llm_quota)

    if usage.new_record?
      now = Time.current
      usage.started_at = now
      usage.reset_at = now + llm_quota.duration_seconds.seconds
      usage.input_tokens_used = 0
      usage.output_tokens_used = 0
      usage.usages = 0
      usage.save!
    end

    usage
  end

  def reset_if_needed!
    return if Time.current < reset_at

    now = Time.current
    update!(
      input_tokens_used: 0,
      output_tokens_used: 0,
      usages: 0,
      started_at: now,
      reset_at: now + llm_quota.duration_seconds.seconds,
    )
  end

  def increment_usage!(input_tokens:, output_tokens:)
    reset_if_needed!

    increment!(:usages)
    increment!(:input_tokens_used, input_tokens)
    increment!(:output_tokens_used, output_tokens)
  end

  def check_quota!
    reset_if_needed!

    if quota_exceeded?
      raise QuotaExceededError.new(
              I18n.t(
                "discourse_ai.errors.quota_exceeded",
                relative_time: AgeWords.distance_of_time_in_words(reset_at, Time.now),
              ),
            )
    end
  end

  def quota_exceeded?
    return false if !llm_quota

    (llm_quota.max_tokens.present? && total_tokens_used > llm_quota.max_tokens) ||
      (llm_quota.max_usages.present? && usages > llm_quota.max_usages)
  end

  def total_tokens_used
    input_tokens_used + output_tokens_used
  end

  def remaining_tokens
    return nil if llm_quota.max_tokens.nil?
    [0, llm_quota.max_tokens - total_tokens_used].max
  end

  def remaining_usages
    return nil if llm_quota.max_usages.nil?
    [0, llm_quota.max_usages - usages].max
  end

  def percentage_tokens_used
    return 0 if llm_quota.max_tokens.nil? || llm_quota.max_tokens.zero?
    [(total_tokens_used.to_f / llm_quota.max_tokens * 100).round, 100].min
  end

  def percentage_usages_used
    return 0 if llm_quota.max_usages.nil? || llm_quota.max_usages.zero?
    [(usages.to_f / llm_quota.max_usages * 100).round, 100].min
  end
end

# == Schema Information
#
# Table name: llm_quota_usages
#
#  id                 :bigint           not null, primary key
#  user_id            :bigint           not null
#  llm_quota_id       :bigint           not null
#  input_tokens_used  :integer          not null
#  output_tokens_used :integer          not null
#  usages             :integer          not null
#  started_at         :datetime         not null
#  reset_at           :datetime         not null
#  created_at         :datetime         not null
#  updated_at         :datetime         not null
#
# Indexes
#
#  index_llm_quota_usages_on_llm_quota_id              (llm_quota_id)
#  index_llm_quota_usages_on_user_id_and_llm_quota_id  (user_id,llm_quota_id) UNIQUE
#
FEATURE: llm quotas (#1047) Adds a comprehensive quota management system for LLM models that allows: - Setting per-group (applied per user in the group) token and usage limits with configurable durations - Tracking and enforcing token/usage limits across user groups - Quota reset periods (hourly, daily, weekly, or custom) - Admin UI for managing quotas with real-time updates This system provides granular control over LLM API usage by allowing admins to define limits on both total tokens and number of requests per group. Supports multiple concurrent quotas per model and automatically handles quota resets. Co-authored-by: Keegan George <kgeorge13@gmail.com> 2025-01-14 15:54:09 +11:00			`# frozen_string_literal: true`

			`class LlmQuotaUsage < ActiveRecord::Base`
			`self.table_name = "llm_quota_usages"`

			`QuotaExceededError = Class.new(StandardError)`

			`belongs_to :user`
			`belongs_to :llm_quota`

			`validates :user_id, presence: true`
			`validates :llm_quota_id, presence: true`
			`validates :input_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }`
			`validates :output_tokens_used, presence: true, numericality: { greater_than_or_equal_to: 0 }`
			`validates :usages, presence: true, numericality: { greater_than_or_equal_to: 0 }`
			`validates :started_at, presence: true`
			`validates :reset_at, presence: true`

			`def self.find_or_create_for(user:, llm_quota:)`
			`usage = find_or_initialize_by(user: user, llm_quota: llm_quota)`

			`if usage.new_record?`
			`now = Time.current`
			`usage.started_at = now`
			`usage.reset_at = now + llm_quota.duration_seconds.seconds`
			`usage.input_tokens_used = 0`
			`usage.output_tokens_used = 0`
			`usage.usages = 0`
			`usage.save!`
			`end`

			`usage`
			`end`

			`def reset_if_needed!`
			`return if Time.current < reset_at`

			`now = Time.current`
			`update!(`
			`input_tokens_used: 0,`
			`output_tokens_used: 0,`
			`usages: 0,`
			`started_at: now,`
			`reset_at: now + llm_quota.duration_seconds.seconds,`
			`)`
			`end`

			`def increment_usage!(input_tokens:, output_tokens:)`
			`reset_if_needed!`

			`increment!(:usages)`
			`increment!(:input_tokens_used, input_tokens)`
			`increment!(:output_tokens_used, output_tokens)`
			`end`

			`def check_quota!`
			`reset_if_needed!`

			`if quota_exceeded?`
			`raise QuotaExceededError.new(`
			`I18n.t(`
			`"discourse_ai.errors.quota_exceeded",`
			`relative_time: AgeWords.distance_of_time_in_words(reset_at, Time.now),`
			`),`
			`)`
			`end`
			`end`

			`def quota_exceeded?`
			`return false if !llm_quota`

			`(llm_quota.max_tokens.present? && total_tokens_used > llm_quota.max_tokens) \|\|`
			`(llm_quota.max_usages.present? && usages > llm_quota.max_usages)`
			`end`

			`def total_tokens_used`
			`input_tokens_used + output_tokens_used`
			`end`

			`def remaining_tokens`
			`return nil if llm_quota.max_tokens.nil?`
			`[0, llm_quota.max_tokens - total_tokens_used].max`
			`end`

			`def remaining_usages`
			`return nil if llm_quota.max_usages.nil?`
			`[0, llm_quota.max_usages - usages].max`
			`end`

			`def percentage_tokens_used`
			`return 0 if llm_quota.max_tokens.nil? \|\| llm_quota.max_tokens.zero?`
			`[(total_tokens_used.to_f / llm_quota.max_tokens * 100).round, 100].min`
			`end`

			`def percentage_usages_used`
			`return 0 if llm_quota.max_usages.nil? \|\| llm_quota.max_usages.zero?`
			`[(usages.to_f / llm_quota.max_usages * 100).round, 100].min`
			`end`
			`end`

			`# == Schema Information`
			`#`
			`# Table name: llm_quota_usages`
			`#`
			`# id :bigint not null, primary key`
			`# user_id :bigint not null`
			`# llm_quota_id :bigint not null`
			`# input_tokens_used :integer not null`
			`# output_tokens_used :integer not null`
			`# usages :integer not null`
			`# started_at :datetime not null`
			`# reset_at :datetime not null`
			`# created_at :datetime not null`
			`# updated_at :datetime not null`
			`#`
			`# Indexes`
			`#`
			`# index_llm_quota_usages_on_llm_quota_id (llm_quota_id)`
			`# index_llm_quota_usages_on_user_id_and_llm_quota_id (user_id,llm_quota_id) UNIQUE`
			`#`