discourse-ai/lib/completions/json_streaming_tracker.rb
Roman Rizzi d72ad84f8f
FIX: Retry parsing escaped inner JSON to handle control chars. (#1357)
The structured output JSON comes embedded inside the API response, which is also a JSON. Since we have to parse the response to process it, any control characters inside the structured output are unescaped into regular characters, leading to invalid JSON and breaking during parsing. This change adds a retry mechanism that escapes
the string again if parsing fails, preventing the parser from breaking on malformed input and working around this issue.

For example:

```
  original = '{ "a": "{\\"key\\":\\"value with \\n newline\\"}" }'
  JSON.parse(original) => { "a" => "{\"key\":\"value with \n newline\"}" }
  # At this point, the inner JSON string contains an actual newline.
```
2025-05-21 11:25:59 -03:00

72 lines
2.1 KiB
Ruby

# frozen_string_literal: true
module DiscourseAi
module Completions
class JsonStreamingTracker
attr_reader :current_key, :current_value, :stream_consumer
def initialize(stream_consumer)
@stream_consumer = stream_consumer
@current_key = nil
@current_value = nil
@parser = DiscourseAi::Completions::JsonStreamingParser.new
@parser.key do |k|
@current_key = k
@current_value = nil
end
@parser.value do |v|
if @current_key
stream_consumer.notify_progress(@current_key, v)
@current_key = nil
end
end
end
def broken?
@broken
end
def <<(raw_json)
# llm could send broken json
# in that case just deal with it later
# don't stream
return if @broken
begin
@parser << raw_json
rescue DiscourseAi::Completions::ParserError
# Note: We're parsing JSON content that was itself embedded as a string inside another JSON object.
# During the outer JSON.parse, any escaped control characters (like "\\n") are unescaped to real characters ("\n"),
# which corrupts the inner JSON structure when passed to the parser here.
# To handle this, we retry parsing with the string JSON-escaped again (`.dump[1..-2]`) if the first attempt fails.
try_escape_and_parse(raw_json)
return if @broken
end
if @parser.state == :start_string && @current_key
# this is is worth notifying
stream_consumer.notify_progress(@current_key, @parser.buf)
end
@current_key = nil if @parser.state == :end_value
end
private
def try_escape_and_parse(raw_json)
if raw_json.blank? || !raw_json.is_a?(String)
@broken = true
return
end
# Escape the string as JSON and remove surrounding quotes
escaped_json = raw_json.dump[1..-2]
@parser << escaped_json
rescue DiscourseAi::Completions::ParserError
@broken = true
end
end
end
end