# frozen_string_literal: true

# This code is copied from the MIT licensed json-stream
# see: https://github.com/dgraham/json-stream
#
# It was copied to avoid the dependency and allow us to make some small changes
# particularly we need better access to internal state when parsing

module DiscourseAi
  module Completions
    # Raised on any invalid JSON text.
    ParserError = Class.new(RuntimeError)

    # A streaming JSON parser that generates SAX-like events for state changes.
    # Use the json gem for small documents. Use this for huge documents that
    # won't fit in memory.
    #
    # Examples
    #
    #   parser = JSON::Stream::Parser.new
    #   parser.key { |key| puts key }
    #   parser.value { |value| puts value }
    #   parser << '{"answer":'
    #   parser << ' 42}'
    class JsonStreamingParser
      # our changes:
      attr_reader :state, :buf, :pos

      # A character buffer that expects a UTF-8 encoded stream of bytes.
      # This handles truncated multi-byte characters properly so we can just
      # feed it binary data and receive a properly formatted UTF-8 String as
      # output.
      #
      # More UTF-8 parsing details are available at:
      #
      #   http://en.wikipedia.org/wiki/UTF-8
      #   http://tools.ietf.org/html/rfc3629#section-3
      class Buffer
        def initialize
          @state = :start
          @buffer = []
          @need = 0
        end

        # Fill the buffer with a String of binary UTF-8 encoded bytes. Returns
        # as much of the data in a UTF-8 String as we have. Truncated multi-byte
        # characters are saved in the buffer until the next call to this method
        # where we expect to receive the rest of the multi-byte character.
        #
        # data - The partial binary encoded String data.
        #
        # Raises JSON::Stream::ParserError if the UTF-8 byte sequence is malformed.
        #
        # Returns a UTF-8 encoded String.
        def <<(data)
          # Avoid state machine for complete UTF-8.
          if @buffer.empty?
            data.force_encoding(Encoding::UTF_8)
            return data if data.valid_encoding?
          end

          bytes = []
          data.each_byte do |byte|
            case @state
            when :start
              if byte < 128
                bytes << byte
              elsif byte >= 192
                @state = :multi_byte
                @buffer << byte
                @need =
                  case
                  when byte >= 240
                    4
                  when byte >= 224
                    3
                  when byte >= 192
                    2
                  end
              else
                error("Expected start of multi-byte or single byte char")
              end
            when :multi_byte
              if byte > 127 && byte < 192
                @buffer << byte
                if @buffer.size == @need
                  bytes += @buffer.slice!(0, @buffer.size)
                  @state = :start
                end
              else
                error("Expected continuation byte")
              end
            end
          end

          # Build UTF-8 encoded string from completed codepoints.
          bytes
            .pack("C*")
            .force_encoding(Encoding::UTF_8)
            .tap { |text| error("Invalid UTF-8 byte sequence") unless text.valid_encoding? }
        end

        # Determine if the buffer contains partial UTF-8 continuation bytes that
        # are waiting on subsequent completion bytes before a full codepoint is
        # formed.
        #
        # Examples
        #
        #   bytes = "é".bytes
        #
        #   buffer << bytes[0]
        #   buffer.empty?
        #   # => false
        #
        #   buffer << bytes[1]
        #   buffer.empty?
        #   # => true
        #
        # Returns true if the buffer is empty.
        def empty?
          @buffer.empty?
        end

        private

        def error(message)
          raise ParserError, message
        end
      end

      BUF_SIZE = 4096
      CONTROL = /[\x00-\x1F]/
      WS = /[ \n\t\r]/
      HEX = /[0-9a-fA-F]/
      DIGIT = /[0-9]/
      DIGIT_1_9 = /[1-9]/
      DIGIT_END = /\d$/
      TRUE_RE = /[rue]/
      FALSE_RE = /[alse]/
      NULL_RE = /[ul]/
      TRUE_KEYWORD = "true"
      FALSE_KEYWORD = "false"
      NULL_KEYWORD = "null"
      LEFT_BRACE = "{"
      RIGHT_BRACE = "}"
      LEFT_BRACKET = "["
      RIGHT_BRACKET = "]"
      BACKSLASH = '\\'
      SLASH = "/"
      QUOTE = '"'
      COMMA = ","
      COLON = ":"
      ZERO = "0"
      MINUS = "-"
      PLUS = "+"
      POINT = "."
      EXPONENT = /[eE]/
      B, F, N, R, T, U = %w[b f n r t u]

      # Create a new parser with an optional initialization block where
      # we can register event callbacks.
      #
      # Examples
      #
      #   parser = JSON::Stream::Parser.new do
      #     start_document { puts "start document" }
      #     end_document   { puts "end document" }
      #     start_object   { puts "start object" }
      #     end_object     { puts "end object" }
      #     start_array    { puts "start array" }
      #     end_array      { puts "end array" }
      #     key            { |k| puts "key: #{k}" }
      #     value          { |v| puts "value: #{v}" }
      #   end
      def initialize(&block)
        @state = :start_document
        @utf8 = Buffer.new
        @listeners = {
          start_document: [],
          end_document: [],
          start_object: [],
          end_object: [],
          start_array: [],
          end_array: [],
          key: [],
          value: [],
        }

        # Track parse stack.
        @stack = []
        @unicode = +""
        @buf = +""
        @pos = -1

        # Register any observers in the block.
        instance_eval(&block) if block_given?
      end

      def start_document(&block)
        @listeners[:start_document] << block
      end

      def end_document(&block)
        @listeners[:end_document] << block
      end

      def start_object(&block)
        @listeners[:start_object] << block
      end

      def end_object(&block)
        @listeners[:end_object] << block
      end

      def start_array(&block)
        @listeners[:start_array] << block
      end

      def end_array(&block)
        @listeners[:end_array] << block
      end

      def key(&block)
        @listeners[:key] << block
      end

      def value(&block)
        @listeners[:value] << block
      end

      # Pass data into the parser to advance the state machine and
      # generate callback events. This is well suited for an EventMachine
      # receive_data loop.
      #
      # data - The String of partial JSON data to parse.
      #
      # Raises a JSON::Stream::ParserError if the JSON data is malformed.
      #
      # Returns nothing.
      def <<(data)
        (@utf8 << data).each_char do |ch|
          @pos += 1
          case @state
          when :start_document
            start_value(ch)
          when :start_object
            case ch
            when QUOTE
              @state = :start_string
              @stack.push(:key)
            when RIGHT_BRACE
              end_container(:object)
            when WS
              # ignore
            else
              error("Expected object key start")
            end
          when :start_string
            case ch
            when QUOTE
              if @stack.pop == :string
                end_value(@buf)
              else # :key
                @state = :end_key
                notify(:key, @buf)
              end
              @buf = +""
            when BACKSLASH
              @state = :start_escape
            when CONTROL
              error("Control characters must be escaped")
            else
              @buf << ch
            end
          when :start_escape
            case ch
            when QUOTE, BACKSLASH, SLASH
              @buf << ch
              @state = :start_string
            when B
              @buf << "\b"
              @state = :start_string
            when F
              @buf << "\f"
              @state = :start_string
            when N
              @buf << "\n"
              @state = :start_string
            when R
              @buf << "\r"
              @state = :start_string
            when T
              @buf << "\t"
              @state = :start_string
            when U
              @state = :unicode_escape
            else
              error("Expected escaped character")
            end
          when :unicode_escape
            case ch
            when HEX
              @unicode << ch
              if @unicode.size == 4
                codepoint = @unicode.slice!(0, 4).hex
                if codepoint >= 0xD800 && codepoint <= 0xDBFF
                  error("Expected low surrogate pair half") if @stack[-1].is_a?(Integer)
                  @state = :start_surrogate_pair
                  @stack.push(codepoint)
                elsif codepoint >= 0xDC00 && codepoint <= 0xDFFF
                  high = @stack.pop
                  error("Expected high surrogate pair half") unless high.is_a?(Integer)
                  pair = ((high - 0xD800) * 0x400) + (codepoint - 0xDC00) + 0x10000
                  @buf << pair
                  @state = :start_string
                else
                  @buf << codepoint
                  @state = :start_string
                end
              end
            else
              error("Expected unicode escape hex digit")
            end
          when :start_surrogate_pair
            case ch
            when BACKSLASH
              @state = :start_surrogate_pair_u
            else
              error("Expected low surrogate pair half")
            end
          when :start_surrogate_pair_u
            case ch
            when U
              @state = :unicode_escape
            else
              error("Expected low surrogate pair half")
            end
          when :start_negative_number
            case ch
            when ZERO
              @state = :start_zero
              @buf << ch
            when DIGIT_1_9
              @state = :start_int
              @buf << ch
            else
              error("Expected 0-9 digit")
            end
          when :start_zero
            case ch
            when POINT
              @state = :start_float
              @buf << ch
            when EXPONENT
              @state = :start_exponent
              @buf << ch
            else
              end_value(@buf.to_i)
              @buf = +""
              @pos -= 1
              redo
            end
          when :start_float
            case ch
            when DIGIT
              @state = :in_float
              @buf << ch
            else
              error("Expected 0-9 digit")
            end
          when :in_float
            case ch
            when DIGIT
              @buf << ch
            when EXPONENT
              @state = :start_exponent
              @buf << ch
            else
              end_value(@buf.to_f)
              @buf = +""
              @pos -= 1
              redo
            end
          when :start_exponent
            case ch
            when MINUS, PLUS, DIGIT
              @state = :in_exponent
              @buf << ch
            else
              error("Expected +, -, or 0-9 digit")
            end
          when :in_exponent
            case ch
            when DIGIT
              @buf << ch
            else
              error("Expected 0-9 digit") unless @buf =~ DIGIT_END
              end_value(@buf.to_f)
              @buf = +""
              @pos -= 1
              redo
            end
          when :start_int
            case ch
            when DIGIT
              @buf << ch
            when POINT
              @state = :start_float
              @buf << ch
            when EXPONENT
              @state = :start_exponent
              @buf << ch
            else
              end_value(@buf.to_i)
              @buf = +""
              @pos -= 1
              redo
            end
          when :start_true
            keyword(TRUE_KEYWORD, true, TRUE_RE, ch)
          when :start_false
            keyword(FALSE_KEYWORD, false, FALSE_RE, ch)
          when :start_null
            keyword(NULL_KEYWORD, nil, NULL_RE, ch)
          when :end_key
            case ch
            when COLON
              @state = :key_sep
            when WS
              # ignore
            else
              error("Expected colon key separator")
            end
          when :key_sep
            start_value(ch)
          when :start_array
            case ch
            when RIGHT_BRACKET
              end_container(:array)
            when WS
              # ignore
            else
              start_value(ch)
            end
          when :end_value
            case ch
            when COMMA
              @state = :value_sep
            when RIGHT_BRACE
              end_container(:object)
            when RIGHT_BRACKET
              end_container(:array)
            when WS
              # ignore
            else
              error("Expected comma or object or array close")
            end
          when :value_sep
            if @stack[-1] == :object
              case ch
              when QUOTE
                @state = :start_string
                @stack.push(:key)
              when WS
                # ignore
              else
                error("Expected object key start")
              end
            else
              start_value(ch)
            end
          when :end_document
            error("Unexpected data") unless ch =~ WS
          end
        end
      end

      # Drain any remaining buffered characters into the parser to complete
      # the parsing of the document.
      #
      # This is only required when parsing a document containing a single
      # numeric value, integer or float. The parser has no other way to
      # detect when it should no longer expect additional characters with
      # which to complete the parse, so it must be signaled by a call to
      # this method.
      #
      # If you're parsing more typical object or array documents, there's no
      # need to call `finish` because the parse will complete when the final
      # closing `]` or `}` character is scanned.
      #
      # Raises a JSON::Stream::ParserError if the JSON data is malformed.
      #
      # Returns nothing.
      def finish
        # Partial multi-byte character waiting for completion bytes.
        error("Unexpected end-of-file") unless @utf8.empty?

        # Partial array, object, or string.
        error("Unexpected end-of-file") unless @stack.empty?

        case @state
        when :end_document
          # done, do nothing
        when :in_float
          end_value(@buf.to_f)
        when :in_exponent
          error("Unexpected end-of-file") unless @buf =~ DIGIT_END
          end_value(@buf.to_f)
        when :start_zero
          end_value(@buf.to_i)
        when :start_int
          end_value(@buf.to_i)
        else
          error("Unexpected end-of-file")
        end
      end

      private

      # Invoke all registered observer procs for the event type.
      #
      # type - The Symbol listener name.
      # args - The argument list to pass into the observer procs.
      #
      # Examples
      #
      #    # broadcast events for {"answer": 42}
      #    notify(:start_object)
      #    notify(:key, "answer")
      #    notify(:value, 42)
      #    notify(:end_object)
      #
      # Returns nothing.
      def notify(type, *args)
        @listeners[type].each { |block| block.call(*args) }
      end

      # Complete an object or array container value type.
      #
      # type - The Symbol, :object or :array, of the expected type.
      #
      # Raises a JSON::Stream::ParserError if the expected container type
      #   was not completed.
      #
      # Returns nothing.
      def end_container(type)
        @state = :end_value
        if @stack.pop == type
          case type
          when :object
            notify(:end_object)
          when :array
            notify(:end_array)
          end
        else
          error("Expected end of #{type}")
        end
        notify_end_document if @stack.empty?
      end

      # Broadcast an `end_document` event to observers after a complete JSON
      # value document (object, array, number, string, true, false, null) has
      # been parsed from the text. This is the final event sent to observers
      # and signals the parse has finished.
      #
      # Returns nothing.
      def notify_end_document
        @state = :end_document
        notify(:end_document)
      end

      # Parse one of the three allowed keywords: true, false, null.
      #
      # word  - The String keyword ('true', 'false', 'null').
      # value - The Ruby value (true, false, nil).
      # re    - The Regexp of allowed keyword characters.
      # ch    - The current String character being parsed.
      #
      # Raises a JSON::Stream::ParserError if the character does not belong
      #   in the expected keyword.
      #
      # Returns nothing.
      def keyword(word, value, re, ch)
        if ch =~ re
          @buf << ch
        else
          error("Expected #{word} keyword")
        end

        if @buf.size == word.size
          if @buf == word
            @buf = +""
            end_value(value)
          else
            error("Expected #{word} keyword")
          end
        end
      end

      # Process the first character of one of the seven possible JSON
      # values: object, array, string, true, false, null, number.
      #
      # ch - The current character String.
      #
      # Raises a JSON::Stream::ParserError if the character does not signal
      #   the start of a value.
      #
      # Returns nothing.
      def start_value(ch)
        case ch
        when LEFT_BRACE
          notify(:start_document) if @stack.empty?
          @state = :start_object
          @stack.push(:object)
          notify(:start_object)
        when LEFT_BRACKET
          notify(:start_document) if @stack.empty?
          @state = :start_array
          @stack.push(:array)
          notify(:start_array)
        when QUOTE
          @state = :start_string
          @stack.push(:string)
        when T
          @state = :start_true
          @buf << ch
        when F
          @state = :start_false
          @buf << ch
        when N
          @state = :start_null
          @buf << ch
        when MINUS
          @state = :start_negative_number
          @buf << ch
        when ZERO
          @state = :start_zero
          @buf << ch
        when DIGIT_1_9
          @state = :start_int
          @buf << ch
        when WS
          # ignore
        else
          error("Expected value")
        end
      end

      # Advance the state machine and notify `value` observers that a
      # string, number or keyword (true, false, null) value was parsed.
      #
      # value - The object to broadcast to observers.
      #
      # Returns nothing.
      def end_value(value)
        @state = :end_value
        notify(:start_document) if @stack.empty?
        notify(:value, value)
        notify_end_document if @stack.empty?
      end

      def error(message)
        raise ParserError, "#{message}: char #{@pos}"
      end
    end
  end
end