<:content>
-
+
+
{{#each this.metrics as |metric|}}
{{i18n "discourse_ai.usage.feature"}}
{{i18n "discourse_ai.usage.usage_count"}} |
{{i18n "discourse_ai.usage.total_tokens"}} |
+ {{i18n "discourse_ai.usage.total_spending"}} |
@@ -438,6 +473,13 @@ export default class AiUsage extends Component {
class="ai-usage__features-cell"
title={{feature.total_tokens}}
>{{number feature.total_tokens}}
+
+ {{this.totalSpending
+ feature.input_spending
+ feature.cached_input_spending
+ feature.output_spending
+ }}
+ |
{{/each}}
@@ -464,6 +506,8 @@ export default class AiUsage extends Component {
{{i18n "discourse_ai.usage.model"}} |
{{i18n "discourse_ai.usage.usage_count"}} |
{{i18n "discourse_ai.usage.total_tokens"}} |
+ {{i18n "discourse_ai.usage.total_spending"}} |
+
@@ -478,6 +522,13 @@ export default class AiUsage extends Component {
class="ai-usage__models-cell"
title={{model.total_tokens}}
>{{number model.total_tokens}}
+
+ {{this.totalSpending
+ model.input_spending
+ model.cached_input_spending
+ model.output_spending
+ }}
+ |
{{/each}}
@@ -511,6 +562,7 @@ export default class AiUsage extends Component {
}}
{{i18n "discourse_ai.usage.usage_count"}} |
{{i18n "discourse_ai.usage.total_tokens"}} |
+ {{i18n "discourse_ai.usage.total_spending"}} |
@@ -535,6 +587,13 @@ export default class AiUsage extends Component {
class="ai-usage__users-cell"
title={{user.total_tokens}}
>{{number user.total_tokens}}
+
+ {{this.totalSpending
+ user.input_spending
+ user.cached_input_spending
+ user.output_spending
+ }}
+ |
{{/each}}
diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml
index f50dda7f..1811de61 100644
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@@ -239,6 +239,7 @@ en:
net_request_tokens: "Net request tokens"
cached_tokens: "Cached tokens"
cached_request_tokens: "Cached request tokens"
+ total_spending: "Estimated cost"
no_users: "No user usage data found"
no_models: "No model usage data found"
no_features: "No feature usage data found"
@@ -249,6 +250,7 @@ en:
request_tokens: "Tokens used when the LLM tries to understand what you are saying"
response_tokens: "Tokens used when the LLM responds to your prompt"
cached_tokens: "Previously processed request tokens that the LLM reuses to optimize performance and cost"
+ total_spending: "Cumulative cost of all tokens used by the LLMs based on specified cost metrics added to LLM configuration settings"
periods:
last_day: "Last 24 hours"
last_week: "Last week"
@@ -404,6 +406,10 @@ en:
enabled_chat_bot: "Allow AI bot selector"
vision_enabled: "Vision enabled"
ai_bot_user: "AI bot User"
+ cost_input: "Input cost"
+ cost_cached_input: "Cached input cost"
+ cost_output: "Output cost"
+
save: "Save"
edit: "Edit"
saved: "LLM model saved"
@@ -487,6 +493,10 @@ en:
name: "We include this in the API call to specify which model we'll use"
vision_enabled: "If enabled, the AI will attempt to understand images. It depends on the model being used supporting vision. Supported by latest models from Anthropic, Google, and OpenAI."
enabled_chat_bot: "If enabled, users can select this model when creating PMs with the AI bot"
+ cost_input: "The input cost per 1M tokens for this model"
+ cost_cached_input: "The cached input cost per 1M tokens for this model"
+ cost_output: "The output cost per 1M tokens for this model"
+ cost_measure: "$/1M tokens"
providers:
aws_bedrock: "AWS Bedrock"
anthropic: "Anthropic"
diff --git a/db/migrate/20250416215039_add_cost_metrics_to_llm_model.rb b/db/migrate/20250416215039_add_cost_metrics_to_llm_model.rb
new file mode 100644
index 00000000..1df3ccac
--- /dev/null
+++ b/db/migrate/20250416215039_add_cost_metrics_to_llm_model.rb
@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+
+class AddCostMetricsToLlmModel < ActiveRecord::Migration[7.2]
+ def change
+ add_column :llm_models, :input_cost, :float
+ add_column :llm_models, :cached_input_cost, :float
+ add_column :llm_models, :output_cost, :float
+ end
+end
diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb
index a9b67c83..5f82667e 100644
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@@ -30,9 +30,26 @@ module DiscourseAi
name: "claude-3-7-sonnet",
tokens: 200_000,
display_name: "Claude 3.7 Sonnet",
+ input_cost: 3,
+ cached_input_cost: 0.30,
+ output_cost: 15,
+ },
+ {
+ name: "claude-3-5-haiku",
+ tokens: 200_000,
+ display_name: "Claude 3.5 Haiku",
+ input_cost: 0.80,
+ cached_input_cost: 0.08,
+ output_cost: 4,
+ },
+ {
+ name: "claude-3-opus",
+ tokens: 200_000,
+ display_name: "Claude 3 Opus",
+ input_cost: 15,
+ cached_input_cost: 1.50,
+ output_cost: 75,
},
- { name: "claude-3-5-haiku", tokens: 200_000, display_name: "Claude 3.5 Haiku" },
- { name: "claude-3-opus", tokens: 200_000, display_name: "Claude 3 Opus" },
],
tokenizer: DiscourseAi::Tokenizer::AnthropicTokenizer,
endpoint: "https://api.anthropic.com/v1/messages",
@@ -61,6 +78,8 @@ module DiscourseAi
endpoint:
"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-lite",
display_name: "Gemini 2.0 Flash Lite",
+ input_cost: 0.075,
+ output_cost: 0.30,
},
],
tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer,
@@ -69,11 +88,46 @@ module DiscourseAi
{
id: "open_ai",
models: [
- { name: "o3-mini", tokens: 200_000, display_name: "o3 Mini" },
- { name: "o1", tokens: 200_000, display_name: "o1" },
- { name: "gpt-4.1", tokens: 800_000, display_name: "GPT-4.1" },
- { name: "gpt-4.1-mini", tokens: 800_000, display_name: "GPT-4.1 Mini" },
- { name: "gpt-4.1-nano", tokens: 800_000, display_name: "GPT-4.1 Nano" },
+ {
+ name: "o3-mini",
+ tokens: 200_000,
+ display_name: "o3 Mini",
+ input_cost: 1.10,
+ cached_input_cost: 0.55,
+ output_cost: 4.40,
+ },
+ {
+ name: "o1",
+ tokens: 200_000,
+ display_name: "o1",
+ input_cost: 15,
+ cached_input_cost: 7.50,
+ output_cost: 60,
+ },
+ {
+ name: "gpt-4.1",
+ tokens: 800_000,
+ display_name: "GPT-4.1",
+ input_cost: 2,
+ cached_input_cost: 0.5,
+ output_cost: 8,
+ },
+ {
+ name: "gpt-4.1-mini",
+ tokens: 800_000,
+ display_name: "GPT-4.1 Mini",
+ input_cost: 0.40,
+ cached_input_cost: 0.10,
+ output_cost: 1.60,
+ },
+ {
+ name: "gpt-4.1-nano",
+ tokens: 800_000,
+ display_name: "GPT-4.1 Nano",
+ input_cost: 0.10,
+ cached_input_cost: 0.025,
+ output_cost: 0.40,
+ },
],
tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer,
endpoint: "https://api.openai.com/v1/chat/completions",
@@ -86,11 +140,15 @@ module DiscourseAi
name: "Meta-Llama-3.3-70B-Instruct",
tokens: 131_072,
display_name: "Llama 3.3 70B",
+ input_cost: 0.60,
+ output_cost: 1.20,
},
{
name: "Meta-Llama-3.1-8B-Instruct",
tokens: 16_384,
display_name: "Llama 3.1 8B",
+ input_cost: 0.1,
+ output_cost: 0.20,
},
],
tokenizer: DiscourseAi::Tokenizer::Llama3Tokenizer,
diff --git a/lib/completions/report.rb b/lib/completions/report.rb
index 6982b9a7..7290a6d3 100644
--- a/lib/completions/report.rb
+++ b/lib/completions/report.rb
@@ -33,6 +33,27 @@ module DiscourseAi
stats.total_requests || 0
end
+ def total_spending
+ total = total_input_spending + total_output_spending + total_cached_input_spending
+ total.round(2)
+ end
+
+ def total_input_spending
+ model_costs.sum { |row| row.input_cost.to_f * row.total_request_tokens.to_i / 1_000_000.0 }
+ end
+
+ def total_output_spending
+ model_costs.sum do |row|
+ row.output_cost.to_f * row.total_response_tokens.to_i / 1_000_000.0
+ end
+ end
+
+ def total_cached_input_spending
+ model_costs.sum do |row|
+ row.cached_input_cost.to_f * row.total_cached_tokens.to_i / 1_000_000.0
+ end
+ end
+
def stats
@stats ||=
base_query.select(
@@ -46,6 +67,24 @@ module DiscourseAi
]
end
+ def model_costs
+ @model_costs ||=
+ base_query
+ .joins("LEFT JOIN llm_models ON llm_models.name = language_model")
+ .group(
+ "llm_models.name, llm_models.input_cost, llm_models.output_cost, llm_models.cached_input_cost",
+ )
+ .select(
+ "llm_models.name",
+ "llm_models.input_cost",
+ "llm_models.output_cost",
+ "llm_models.cached_input_cost",
+ "SUM(COALESCE(request_tokens, 0)) as total_request_tokens",
+ "SUM(COALESCE(response_tokens, 0)) as total_response_tokens",
+ "SUM(COALESCE(cached_tokens, 0)) as total_cached_tokens",
+ )
+ end
+
def guess_period(period = nil)
period = nil if %i[day month hour].include?(period)
period ||
@@ -76,7 +115,15 @@ module DiscourseAi
def user_breakdown
base_query
.joins(:user)
- .group(:user_id, "users.username", "users.uploaded_avatar_id")
+ .joins("LEFT JOIN llm_models ON llm_models.name = language_model")
+ .group(
+ :user_id,
+ "users.username",
+ "users.uploaded_avatar_id",
+ "llm_models.input_cost",
+ "llm_models.output_cost",
+ "llm_models.cached_input_cost",
+ )
.order("usage_count DESC")
.limit(USER_LIMIT)
.select(
@@ -87,12 +134,21 @@ module DiscourseAi
"SUM(COALESCE(cached_tokens,0)) as total_cached_tokens",
"SUM(COALESCE(request_tokens,0)) as total_request_tokens",
"SUM(COALESCE(response_tokens,0)) as total_response_tokens",
+ "SUM(COALESCE(request_tokens, 0)) * COALESCE(llm_models.input_cost, 0) / 1000000.0 as input_spending",
+ "SUM(COALESCE(response_tokens, 0)) * COALESCE(llm_models.output_cost, 0) / 1000000.0 as output_spending",
+ "SUM(COALESCE(cached_tokens, 0)) * COALESCE(llm_models.cached_input_cost, 0) / 1000000.0 as cached_input_spending",
)
end
def feature_breakdown
base_query
- .group(:feature_name)
+ .joins("LEFT JOIN llm_models ON llm_models.name = language_model")
+ .group(
+ :feature_name,
+ "llm_models.input_cost",
+ "llm_models.output_cost",
+ "llm_models.cached_input_cost",
+ )
.order("usage_count DESC")
.select(
"case when coalesce(feature_name, '') = '' then '#{UNKNOWN_FEATURE}' else feature_name end as feature_name",
@@ -101,12 +157,21 @@ module DiscourseAi
"SUM(COALESCE(cached_tokens,0)) as total_cached_tokens",
"SUM(COALESCE(request_tokens,0)) as total_request_tokens",
"SUM(COALESCE(response_tokens,0)) as total_response_tokens",
+ "SUM(COALESCE(request_tokens, 0)) * COALESCE(llm_models.input_cost, 0) / 1000000.0 as input_spending",
+ "SUM(COALESCE(response_tokens, 0)) * COALESCE(llm_models.output_cost, 0) / 1000000.0 as output_spending",
+ "SUM(COALESCE(cached_tokens, 0)) * COALESCE(llm_models.cached_input_cost, 0) / 1000000.0 as cached_input_spending",
)
end
def model_breakdown
base_query
- .group(:language_model)
+ .joins("LEFT JOIN llm_models ON llm_models.name = language_model")
+ .group(
+ :language_model,
+ "llm_models.input_cost",
+ "llm_models.output_cost",
+ "llm_models.cached_input_cost",
+ )
.order("usage_count DESC")
.select(
"language_model as llm",
@@ -115,6 +180,9 @@ module DiscourseAi
"SUM(COALESCE(cached_tokens,0)) as total_cached_tokens",
"SUM(COALESCE(request_tokens,0)) as total_request_tokens",
"SUM(COALESCE(response_tokens,0)) as total_response_tokens",
+ "SUM(COALESCE(request_tokens, 0)) * COALESCE(llm_models.input_cost, 0) / 1000000.0 as input_spending",
+ "SUM(COALESCE(response_tokens, 0)) * COALESCE(llm_models.output_cost, 0) / 1000000.0 as output_spending",
+ "SUM(COALESCE(cached_tokens, 0)) * COALESCE(llm_models.cached_input_cost, 0) / 1000000.0 as cached_input_spending",
)
end
diff --git a/spec/fabricators/llm_model_fabricator.rb b/spec/fabricators/llm_model_fabricator.rb
index 87b1ef25..d61bb915 100644
--- a/spec/fabricators/llm_model_fabricator.rb
+++ b/spec/fabricators/llm_model_fabricator.rb
@@ -8,6 +8,9 @@ Fabricator(:llm_model) do
api_key "123"
url "https://api.openai.com/v1/chat/completions"
max_prompt_tokens 131_072
+ input_cost 10
+ cached_input_cost 2.5
+ output_cost 40
end
Fabricator(:anthropic_model, from: :llm_model) do
diff --git a/spec/requests/admin/ai_usage_controller_spec.rb b/spec/requests/admin/ai_usage_controller_spec.rb
index eae9285b..f411ed41 100644
--- a/spec/requests/admin/ai_usage_controller_spec.rb
+++ b/spec/requests/admin/ai_usage_controller_spec.rb
@@ -5,6 +5,7 @@ require "rails_helper"
RSpec.describe DiscourseAi::Admin::AiUsageController do
fab!(:admin)
fab!(:user)
+ fab!(:llm_model)
let(:usage_report_path) { "/admin/plugins/discourse-ai/ai-usage-report.json" }
before { SiteSetting.discourse_ai_enabled = true }
@@ -35,6 +36,18 @@ RSpec.describe DiscourseAi::Admin::AiUsageController do
)
end
+ fab!(:log3) do
+ AiApiAuditLog.create!(
+ provider_id: 1,
+ feature_name: "ai_helper",
+ language_model: llm_model.name,
+ request_tokens: 300,
+ response_tokens: 150,
+ cached_tokens: 50,
+ created_at: 3.days.ago,
+ )
+ end
+
it "returns correct data structure" do
get usage_report_path
@@ -55,7 +68,7 @@ RSpec.describe DiscourseAi::Admin::AiUsageController do
}
json = response.parsed_body
- expect(json["summary"]["total_tokens"]).to eq(450) # sum of all tokens
+ expect(json["summary"]["total_tokens"]).to eq(900) # sum of all tokens
end
it "filters by feature" do
@@ -79,6 +92,26 @@ RSpec.describe DiscourseAi::Admin::AiUsageController do
expect(models.first["total_tokens"]).to eq(300)
end
+ it "shows an estimated cost" do
+ get usage_report_path, params: { model: llm_model.name }
+
+ json = response.parsed_body
+ summary = json["summary"]
+ feature = json["features"].find { |f| f["feature_name"] == "ai_helper" }
+
+ expected_input_spending = llm_model.input_cost * log3.request_tokens / 1_000_000.0
+ expected_cached_input_spending =
+ llm_model.cached_input_cost * log3.cached_tokens / 1_000_000.0
+ expected_output_spending = llm_model.output_cost * log3.response_tokens / 1_000_000.0
+ expected_total_spending =
+ expected_input_spending + expected_cached_input_spending + expected_output_spending
+
+ expect(feature["input_spending"].to_s).to eq(expected_input_spending.to_s)
+ expect(feature["output_spending"].to_s).to eq(expected_output_spending.to_s)
+ expect(feature["cached_input_spending"].to_s).to eq(expected_cached_input_spending.to_s)
+ expect(summary["total_spending"].to_s).to eq(expected_total_spending.round(2).to_s)
+ end
+
it "handles different period groupings" do
get usage_report_path, params: { period: "hour" }
expect(response.status).to eq(200)