diff --git a/app/models/llm_model.rb b/app/models/llm_model.rb index 3613183e..937c5373 100644 --- a/app/models/llm_model.rb +++ b/app/models/llm_model.rb @@ -65,6 +65,8 @@ class LlmModel < ActiveRecord::Base google: { disable_native_tools: :checkbox, enable_thinking: :checkbox, + disable_temperature: :checkbox, + disable_top_p: :checkbox, thinking_tokens: :number, }, azure: { diff --git a/config/eval-llms.yml b/config/eval-llms.yml index dd90f5b9..0c43dcc6 100644 --- a/config/eval-llms.yml +++ b/config/eval-llms.yml @@ -1,4 +1,27 @@ llms: + o3: + display_name: O3 + name: o3 + tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer + api_key_env: OPENAI_API_KEY + provider: open_ai + url: https://api.openai.com/v1/chat/completions + max_prompt_tokens: 131072 + vision_enabled: true + provider_params: + disable_top_p: true + disable_temperature: true + + gpt-41: + display_name: GPT-4.1 + name: gpt-4.1 + tokenizer: DiscourseAi::Tokenizer::OpenAiTokenizer + api_key_env: OPENAI_API_KEY + provider: open_ai + url: https://api.openai.com/v1/chat/completions + max_prompt_tokens: 131072 + vision_enabled: true + gpt-4o: display_name: GPT-4o name: gpt-4o @@ -74,12 +97,25 @@ llms: max_prompt_tokens: 1000000 vision_enabled: true - gemini-2.0-pro-exp: - display_name: Gemini 2.0 pro - name: gemini-2-0-pro-exp + gemini-2.5-flash: + display_name: Gemini 2.5 Flash + name: gemini-2-5-flash tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer api_key_env: GEMINI_API_KEY provider: google - url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro-exp + url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash + max_prompt_tokens: 1000000 + vision_enabled: true + provider_params: + disable_top_p: true + disable_temperature: true + + gemini-2.0-pro: + display_name: Gemini 2.0 pro + name: gemini-2-0-pro + tokenizer: DiscourseAi::Tokenizer::GeminiTokenizer + api_key_env: GEMINI_API_KEY + provider: google + url: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-pro max_prompt_tokens: 1000000 vision_enabled: true diff --git a/config/locales/client.en.yml b/config/locales/client.en.yml index 62386a7e..bd774cd1 100644 --- a/config/locales/client.en.yml +++ b/config/locales/client.en.yml @@ -249,6 +249,7 @@ en: markdown_tables: "Generate Markdown table" custom_prompt: "Custom prompt" image_caption: "Caption images" + translator: "Translator" translation: name: "Translation" @@ -257,7 +258,7 @@ en: post_raw_translator: "Post raw translator" topic_title_translator: "Topic title translator" short_text_translator: "Short text translator" - + spam: name: "Spam" description: "Identifies potential spam using the selected LLM and flags it for site moderators to inspect in the review queue" diff --git a/evals/lib/eval.rb b/evals/lib/eval.rb index 2a7bf681..d322bed8 100644 --- a/evals/lib/eval.rb +++ b/evals/lib/eval.rb @@ -200,12 +200,7 @@ class DiscourseAi::Evals::Eval user.admin = true end result = - helper.generate_and_send_prompt( - name, - input, - current_user = user, - _force_default_locale = false, - ) + helper.generate_and_send_prompt(name, input, current_user = user, force_default_locale: false) result[:suggestions].first end diff --git a/lib/ai_helper/assistant.rb b/lib/ai_helper/assistant.rb index 3bf19838..eaa87834 100644 --- a/lib/ai_helper/assistant.rb +++ b/lib/ai_helper/assistant.rb @@ -82,7 +82,7 @@ module DiscourseAi context.user_language = "#{locale_hash["name"]}" if user - timezone = user.user_option.timezone || "UTC" + timezone = user&.user_option&.timezone || "UTC" current_time = Time.now.in_time_zone(timezone) temporal_context = { @@ -126,21 +126,29 @@ module DiscourseAi ) context = attach_user_context(context, user, force_default_locale: force_default_locale) - helper_response = +"" + bad_json = false + json_summary_schema_key = bot.persona.response_format&.first.to_h + + schema_key = json_summary_schema_key["key"]&.to_sym + schema_type = json_summary_schema_key["type"] + + if schema_type == "array" + helper_response = [] + else + helper_response = +"" + end buffer_blk = Proc.new do |partial, _, type| - json_summary_schema_key = bot.persona.response_format&.first.to_h - helper_response = [] if json_summary_schema_key["type"] == "array" - if type == :structured_output - helper_chunk = partial.read_buffered_property(json_summary_schema_key["key"]&.to_sym) + if type == :structured_output && schema_type + helper_chunk = partial.read_buffered_property(schema_key) if !helper_chunk.nil? && !helper_chunk.empty? - if json_summary_schema_key["type"] != "array" - helper_response = helper_chunk - else + if schema_type == "string" || schema_type == "array" helper_response << helper_chunk + else + helper_response = helper_chunk end - block.call(helper_chunk) if block + block.call(helper_chunk) if block && !bad_json end elsif type.blank? # Assume response is a regular completion. @@ -255,7 +263,7 @@ module DiscourseAi Proc.new do |partial, _, type| if type == :structured_output structured_output = partial - json_summary_schema_key = bot.persona.response_format&.first.to_h + bot.persona.response_format&.first.to_h end end @@ -287,6 +295,11 @@ module DiscourseAi end def find_ai_helper_model(helper_mode, persona_klass) + if helper_mode == IMAGE_CAPTION && @image_caption_llm.is_a?(LlmModel) + return @image_caption_llm + end + + return @helper_llm if helper_mode != IMAGE_CAPTION && @helper_llm.is_a?(LlmModel) self.class.find_ai_helper_model(helper_mode, persona_klass) end @@ -299,9 +312,9 @@ module DiscourseAi if !model_id if helper_mode == IMAGE_CAPTION - model_id = @helper_llm || SiteSetting.ai_helper_image_caption_model&.split(":")&.last + model_id = SiteSetting.ai_helper_image_caption_model&.split(":")&.last else - model_id = @image_caption_llm || SiteSetting.ai_helper_model&.split(":")&.last + model_id = SiteSetting.ai_helper_model&.split(":")&.last end end diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb index e0f74025..abacdeec 100644 --- a/lib/completions/endpoints/base.rb +++ b/lib/completions/endpoints/base.rb @@ -187,10 +187,10 @@ module DiscourseAi blk = lambda do |partial| if partial.is_a?(String) - partial = xml_stripper << partial if xml_stripper + partial = xml_stripper << partial if xml_stripper && !partial.empty? if structured_output.present? - structured_output << partial + structured_output << partial if !partial.empty? partial = structured_output end end @@ -252,6 +252,15 @@ module DiscourseAi end xml_tool_processor.finish.each { |partial| blk.call(partial) } if xml_tool_processor decode_chunk_finish.each { |partial| blk.call(partial) } + + if structured_output + structured_output.finish + if structured_output.broken? + # signal last partial output which will get parsed + # by best effort json parser + blk.call("") + end + end return response_data ensure if log @@ -448,6 +457,7 @@ module DiscourseAi if structured_output.present? response_data.each { |data| structured_output << data if data.is_a?(String) } + structured_output.finish return structured_output end diff --git a/lib/completions/endpoints/gemini.rb b/lib/completions/endpoints/gemini.rb index 7f12c3f3..f5b1cb19 100644 --- a/lib/completions/endpoints/gemini.rb +++ b/lib/completions/endpoints/gemini.rb @@ -33,7 +33,8 @@ module DiscourseAi model_params[:topP] = model_params.delete(:top_p) if model_params[:top_p] - # temperature already supported + model_params.delete(:temperature) if llm_model.lookup_custom_param("disable_temperature") + model_params.delete(:topP) if llm_model.lookup_custom_param("disable_top_p") model_params end diff --git a/lib/completions/json_streaming_parser.rb b/lib/completions/json_streaming_parser.rb index c8466c82..f3288ab6 100644 --- a/lib/completions/json_streaming_parser.rb +++ b/lib/completions/json_streaming_parser.rb @@ -53,6 +53,7 @@ module DiscourseAi # # Returns a UTF-8 encoded String. def <<(data) + data = data.dup if data.frozen? # Avoid state machine for complete UTF-8. if @buffer.empty? data.force_encoding(Encoding::UTF_8) diff --git a/lib/completions/structured_output.rb b/lib/completions/structured_output.rb index 74a41f65..b2e39b8f 100644 --- a/lib/completions/structured_output.rb +++ b/lib/completions/structured_output.rb @@ -17,23 +17,48 @@ module DiscourseAi @raw_cursor = 0 @partial_json_tracker = JsonStreamingTracker.new(self) + + @type_map = {} + json_schema_properties.each { |name, prop| @type_map[name.to_sym] = prop[:type].to_sym } + + @done = false + end + + def to_s + # we may want to also normalize the JSON here for the broken case + @raw_response end attr_reader :last_chunk_buffer def <<(raw) + raise "Cannot append to a completed StructuredOutput" if @done @raw_response << raw @partial_json_tracker << raw end + def finish + @done = true + end + + def broken? + @partial_json_tracker.broken? + end + def read_buffered_property(prop_name) - # Safeguard: If the model is misbehaving and generating something that's not a JSON, - # treat response as a normal string. - # This is a best-effort to recover from an unexpected scenario. if @partial_json_tracker.broken? - unread_chunk = @raw_response[@raw_cursor..] - @raw_cursor = @raw_response.length - return unread_chunk + if @done + return nil if @type_map[prop_name.to_sym].nil? + return( + DiscourseAi::Utils::BestEffortJsonParser.extract_key( + @raw_response, + @type_map[prop_name.to_sym], + prop_name, + ) + ) + else + return nil + end end # Maybe we haven't read that part of the JSON yet. diff --git a/lib/configuration/feature.rb b/lib/configuration/feature.rb index 86ff9b5b..ee62e8c3 100644 --- a/lib/configuration/feature.rb +++ b/lib/configuration/feature.rb @@ -103,6 +103,12 @@ module DiscourseAi DiscourseAi::Configuration::Module::AI_HELPER_ID, DiscourseAi::Configuration::Module::AI_HELPER, ), + new( + "translator", + "ai_helper_translator_persona", + DiscourseAi::Configuration::Module::AI_HELPER_ID, + DiscourseAi::Configuration::Module::AI_HELPER, + ), new( "custom_prompt", "ai_helper_custom_prompt_persona", diff --git a/lib/personas/translator.rb b/lib/personas/translator.rb index 15f7084c..faf00005 100644 --- a/lib/personas/translator.rb +++ b/lib/personas/translator.rb @@ -19,11 +19,12 @@ module DiscourseAi Format your response as a JSON object with a single key named "output", which has the translation as the value. Your output should be in the following format: - - {"output": "xx"} - + + {"output": "xx"} Where "xx" is replaced by the translation. + + reply with valid JSON only PROMPT end diff --git a/lib/utils/best_effort_json_parser.rb b/lib/utils/best_effort_json_parser.rb new file mode 100644 index 00000000..c2f2f2bf --- /dev/null +++ b/lib/utils/best_effort_json_parser.rb @@ -0,0 +1,137 @@ +# frozen_string_literal: true + +require "json" + +module DiscourseAi + module Utils + class BestEffortJsonParser + class << self + def extract_key(helper_response, schema_type, schema_key) + return helper_response unless helper_response.is_a?(String) + + schema_type = schema_type.to_sym + schema_key = schema_key&.to_sym + cleaned = remove_markdown_fences(helper_response.strip) + + parsed = + try_parse(cleaned) || try_parse(fix_common_issues(cleaned)) || + manual_extract(cleaned, schema_key, schema_type) + + value = parsed.is_a?(Hash) ? parsed[schema_key.to_s] : parsed + + cast_value(value, schema_type) + end + + private + + def remove_markdown_fences(text) + return text unless text.match?(/^```(?:json)?\s*\n/i) + + text.gsub(/^```(?:json)?\s*\n/i, "").gsub(/\n```\s*$/, "") + end + + def fix_common_issues(text) + text.gsub(/(\w+):/, '"\1":').gsub(/'/, "\"") + end + + def try_parse(text) + JSON.parse(text) + rescue JSON::ParserError + nil + end + + def manual_extract(text, key, schema_type) + return default_for(schema_type) unless key + + case schema_type + when :object + extract_object(text, key.to_s) + when :array, :string + extract_scalar(text, key.to_s, schema_type) + else + default_for(schema_type) + end + end + + def extract_scalar(text, key, schema_type) + patterns = + if schema_type == :array + [ + /"#{key}"\s*:\s*\[([^\]]+)\]/, + /'#{key}'\s*:\s*\[([^\]]+)\]/, + /#{key}\s*:\s*\[([^\]]+)\]/, + ] + else + [ + /"#{key}"\s*:\s*"([^"]+)"/, + /'#{key}'\s*:\s*'([^']+)'/, + /#{key}\s*:\s*"([^"]+)"/, + /#{key}\s*:\s*'([^']+)'/, + ] + end + + patterns.each do |pattern| + match = text.match(pattern) + next unless match + + value = match[1] + return schema_type == :array ? parse_array(value) : value + end + + default_for(schema_type) + end + + def parse_array(value) + JSON.parse("[#{value}]") + rescue JSON::ParserError + value.split(",").map { |item| item.strip.gsub(/^['"]|['"]$/, "") } + end + + def extract_object(text, key) + pattern = /("#{key}"|'#{key}'|#{key})\s*:\s*\{/ + match = text.match(pattern) or return {} + + start = match.end(0) - 1 + return {} unless text[start] == "{" + + end_pos = find_matching_brace(text, start) + return {} unless end_pos + + obj_str = text[start..end_pos] + try_parse(obj_str) || try_parse(fix_common_issues(obj_str)) || {} + end + + def find_matching_brace(text, start_pos) + brace_count = 0 + + text[start_pos..-1].each_char.with_index do |char, idx| + brace_count += 1 if char == "{" + if char == "}" + brace_count -= 1 + return start_pos + idx if brace_count.zero? + end + end + nil + end + + def cast_value(value, schema_type) + case schema_type + when :array + value.is_a?(Array) ? value : [] + when :object + value.is_a?(Hash) ? value : {} + when :boolean + return value if [true, false, nil].include?(value) + value.to_s.downcase == "true" + else + value.to_s + end + end + + def default_for(schema_type) + schema_type == :array ? [] : schema_type == :object ? {} : "" + end + end + end + end +end diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb index c5b5a456..cd95476d 100644 --- a/spec/lib/completions/endpoints/open_ai_spec.rb +++ b/spec/lib/completions/endpoints/open_ai_spec.rb @@ -59,7 +59,7 @@ class OpenAiMock < EndpointMock stub.to_return(status: 200, body: chunks) end - def stub_streamed_response(prompt, deltas, tool_call: false) + def stub_streamed_response(prompt, deltas, tool_call: false, skip_body_check: false) chunks = deltas.each_with_index.map do |_, index| if index == (deltas.length - 1) @@ -71,10 +71,13 @@ class OpenAiMock < EndpointMock chunks = (chunks.join("\n\n") << "data: [DONE]").split("") - WebMock - .stub_request(:post, "https://api.openai.com/v1/chat/completions") - .with(body: request_body(prompt, stream: true, tool_call: tool_call)) - .to_return(status: 200, body: chunks) + mock = WebMock.stub_request(:post, "https://api.openai.com/v1/chat/completions") + + if !skip_body_check + mock = mock.with(body: request_body(prompt, stream: true, tool_call: tool_call)) + end + + mock.to_return(status: 200, body: chunks) yield if block_given? end @@ -401,6 +404,41 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do end end + describe "structured outputs" do + it "falls back to best-effort parsing on broken JSON responses" do + prompt = compliance.generic_prompt + deltas = ["```json\n{ message: 'hel", "lo' }"] + + model_params = { + response_format: { + json_schema: { + schema: { + properties: { + message: { + type: "string", + }, + }, + }, + }, + }, + } + + read_properties = [] + open_ai_mock.with_chunk_array_support do + # skip body check cause of response format + open_ai_mock.stub_streamed_response(prompt, deltas, skip_body_check: true) + + dialect = compliance.dialect(prompt: prompt) + + endpoint.perform_completion!(dialect, user, model_params) do |partial| + read_properties << partial.read_buffered_property(:message) + end + end + + expect(read_properties.join).to eq("hello") + end + end + describe "disabled tool use" do it "can properly disable tool use with :none" do llm = DiscourseAi::Completions::Llm.proxy("custom:#{model.id}") diff --git a/spec/lib/completions/structured_output_spec.rb b/spec/lib/completions/structured_output_spec.rb index c4c5d882..05322567 100644 --- a/spec/lib/completions/structured_output_spec.rb +++ b/spec/lib/completions/structured_output_spec.rb @@ -127,13 +127,31 @@ RSpec.describe DiscourseAi::Completions::StructuredOutput do chunks = [+"I'm not", +"a", +"JSON :)"] structured_output << chunks[0] - expect(structured_output.read_buffered_property(nil)).to eq("I'm not") + expect(structured_output.read_buffered_property(:bob)).to eq(nil) structured_output << chunks[1] - expect(structured_output.read_buffered_property(nil)).to eq("a") + expect(structured_output.read_buffered_property(:bob)).to eq(nil) structured_output << chunks[2] - expect(structured_output.read_buffered_property(nil)).to eq("JSON :)") + + structured_output.finish + expect(structured_output.read_buffered_property(:bob)).to eq(nil) + end + + it "can handle broken JSON" do + broken_json = <<~JSON + ```json + { + "message": "This is a broken JSON", + bool: true + } + JSON + + structured_output << broken_json + structured_output.finish + + expect(structured_output.read_buffered_property(:message)).to eq("This is a broken JSON") + expect(structured_output.read_buffered_property(:bool)).to eq(true) end end end diff --git a/spec/lib/utils/best_effort_json_parser_spec.rb b/spec/lib/utils/best_effort_json_parser_spec.rb new file mode 100644 index 00000000..6fb85cf8 --- /dev/null +++ b/spec/lib/utils/best_effort_json_parser_spec.rb @@ -0,0 +1,190 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Utils::BestEffortJsonParser do + describe ".extract_key" do + context "with string type schema" do + let(:schema_type) { "string" } + let(:schema_key) { :output } + + it "handles JSON wrapped in markdown fences" do + input = <<~JSON + ```json + {"output": "Hello world"} + ``` + JSON + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq("Hello world") + end + + it "handles JSON with backticks but no language identifier" do + input = <<~JSON + ``` + {"output": "Test message"} + ``` + JSON + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq("Test message") + end + + it "extracts value from malformed JSON with single quotes" do + input = "{'output': 'Single quoted value'}" + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq("Single quoted value") + end + + it "extracts value from JSON with unquoted keys" do + input = "{output: \"Unquoted key value\"}" + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq("Unquoted key value") + end + + it "handles JSON with extra text before and after" do + input = <<~TEXT + Here is the result: + {"output": "Extracted value"} + That's all! + TEXT + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq("Extracted value") + end + + it "handles nested JSON structures" do + input = <<~JSON + { + "data": { + "nested": true + }, + "output": "Found me!" + } + JSON + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq("Found me!") + end + + it "handles strings with escaped quotes" do + input = '{"output": "She said \"Hello\" to me"}' + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq("She said \"Hello\" to me") + end + + it "accepts string keys as well as symbols" do + input = '{"output": "String key test"}' + result = described_class.extract_key(input, schema_type, "output") + expect(result).to eq("String key test") + end + end + + context "with array type schema" do + let(:schema_type) { "array" } + let(:schema_key) { :output } + + it "handles array wrapped in markdown fences" do + input = <<~JSON + ```json + {"output": ["item1", "item2", "item3"]} + ``` + JSON + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq(%w[item1 item2 item3]) + end + + it "extracts array from malformed JSON" do + input = "{output: ['value1', 'value2']}" + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq(%w[value1 value2]) + end + + it "handles empty arrays" do + input = <<~JSON + ```json + {"output": []} + ``` + JSON + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq([]) + end + + it "handles arrays with mixed quotes" do + input = '{output: ["item1", \'item2\']}' + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq(%w[item1 item2]) + end + + it "accepts string keys" do + input = '{"items": ["a", "b"]}' + result = described_class.extract_key(input, "array", "items") + expect(result).to eq(%w[a b]) + end + end + + context "with object type schema" do + let(:schema_type) { "object" } + let(:schema_key) { :data } + + it "extracts object from markdown fenced JSON" do + input = <<~JSON + ```json + { + "data": { + "name": "Test", + "value": 123 + } + } + ``` + JSON + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq({ "name" => "Test", "value" => 123 }) + end + + it "handles malformed object JSON" do + input = "{data: {name: 'Test', value: 123}}" + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq({ "name" => "Test", "value" => 123 }) + end + + it "handles nested objects" do + input = <<~JSON + { + "data": { + "user": { + "name": "John", + "age": 30 + }, + "active": true + } + } + JSON + result = described_class.extract_key(input, schema_type, schema_key) + expect(result).to eq({ "user" => { "name" => "John", "age" => 30 }, "active" => true }) + end + end + + context "when very broken JSON is entered" do + it "returns empty string when no valid JSON can be extracted for string type" do + input = "This is just plain text with no JSON" + result = described_class.extract_key(input, "string", :output) + expect(result).to eq("") + end + + it "returns empty array when array extraction fails" do + input = "No array here" + result = described_class.extract_key(input, "array", :output) + expect(result).to eq([]) + end + + it "returns empty hash when object extraction fails" do + input = "No object here" + result = described_class.extract_key(input, "object", :data) + expect(result).to eq({}) + end + + it "returns input as-is when it's not a string" do + expect(described_class.extract_key(123, "string", :output)).to eq(123) + expect(described_class.extract_key(["existing"], "array", :output)).to eq(["existing"]) + expect(described_class.extract_key({ existing: true }, "object", :output)).to eq( + { existing: true }, + ) + end + end + end +end