From dbb9c802b1b9a11ce135cabea65bd4046191a4d4 Mon Sep 17 00:00:00 2001
From: David Kyle <david.kyle@elastic.co>
Date: Thu, 9 Jul 2020 13:43:46 +0100
Subject: [PATCH] Better error message when the model cannot be parsed due to
 its size (#59166) (#59209)

The actual cause can be lost in a long list of parse exceptions
this surfaces the cause when the problem is size.
---
 .../InferenceToXContentCompressor.java        | 23 ++++++++++++++++++-
 .../utils/SimpleBoundedInputStream.java       | 18 +++++++++++++--
 .../InferenceToXContentCompressorTests.java   | 23 ++++++++++++++++++-
 3 files changed, 60 insertions(+), 4 deletions(-)
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/InferenceToXContentCompressor.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/InferenceToXContentCompressor.java
index b0b9c850e7a..47af238f711 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/InferenceToXContentCompressor.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/InferenceToXContentCompressor.java
@@ -6,6 +6,7 @@
 
 package org.elasticsearch.xpack.core.ml.inference;
 
+import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.common.CheckedFunction;
 import org.elasticsearch.common.bytes.BytesArray;
 import org.elasticsearch.common.bytes.BytesReference;
@@ -16,6 +17,7 @@ import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
 import org.elasticsearch.common.xcontent.NamedXContentRegistry;
 import org.elasticsearch.common.xcontent.ToXContentObject;
 import org.elasticsearch.common.xcontent.XContentHelper;
+import org.elasticsearch.common.xcontent.XContentParseException;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.common.xcontent.json.JsonXContent;
@@ -51,10 +53,29 @@ public final class InferenceToXContentCompressor {
     public static <T> T inflate(String compressedString,
                                 CheckedFunction<XContentParser, T, IOException> parserFunction,
                                 NamedXContentRegistry xContentRegistry) throws IOException {
+        return inflate(compressedString, parserFunction, xContentRegistry, MAX_INFLATED_BYTES);
+    }
+
+    static <T> T inflate(String compressedString,
+                         CheckedFunction<XContentParser, T, IOException> parserFunction,
+                         NamedXContentRegistry xContentRegistry,
+                         long maxBytes) throws IOException {
         try(XContentParser parser = JsonXContent.jsonXContent.createParser(xContentRegistry,
             LoggingDeprecationHandler.INSTANCE,
-            inflate(compressedString, MAX_INFLATED_BYTES))) {
+            inflate(compressedString, maxBytes))) {
             return parserFunction.apply(parser);
+        } catch (XContentParseException parseException) {
+            SimpleBoundedInputStream.StreamSizeExceededException streamSizeCause =
+                (SimpleBoundedInputStream.StreamSizeExceededException)
+                    ExceptionsHelper.unwrap(parseException, SimpleBoundedInputStream.StreamSizeExceededException.class);
+
+            if (streamSizeCause != null) {
+                // The root cause is that the model is too big.
+                throw new IOException("Cannot parse model definition as the content is larger than the maximum stream size of ["
+                + streamSizeCause.getMaxBytes() + "] bytes. Max stream size is 10% of the JVM heap or 1GB whichever is smallest");
+            } else {
+                throw parseException;
+            }
         }
     }
 
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/utils/SimpleBoundedInputStream.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/utils/SimpleBoundedInputStream.java
index 1d845d3b332..766119e6138 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/utils/SimpleBoundedInputStream.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/utils/SimpleBoundedInputStream.java
@@ -20,6 +20,19 @@ public final class SimpleBoundedInputStream extends InputStream {
     private final long maxBytes;
     private long numBytes;
 
+    public static class StreamSizeExceededException extends IOException {
+        private final long maxBytes;
+
+        public StreamSizeExceededException(String message, long maxBytes) {
+            super(message);
+            this.maxBytes = maxBytes;
+        }
+
+        public long getMaxBytes() {
+            return maxBytes;
+        }
+    }
+
     public SimpleBoundedInputStream(InputStream inputStream, long maxBytes) {
         this.in = ExceptionsHelper.requireNonNull(inputStream, "inputStream");
         if (maxBytes < 0) {
@@ -31,13 +44,14 @@ public final class SimpleBoundedInputStream extends InputStream {
     /**
      * A simple wrapper around the injected input stream that restricts the total number of bytes able to be read.
      * @return The byte read.
-     * @throws IOException on failure or when byte limit is exceeded
+     * @throws StreamSizeExceededException when byte limit is exceeded
+     * @throws IOException on failure
      */
     @Override
     public int read() throws IOException {
         // We have reached the maximum, signal stream completion.
         if (numBytes >= maxBytes) {
-            throw new IOException("input stream exceeded maximum bytes of [" + maxBytes + "]");
+            throw new StreamSizeExceededException("input stream exceeded maximum bytes of [" + maxBytes + "]", maxBytes);
         }
         numBytes++;
         return in.read();
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/InferenceToXContentCompressorTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/InferenceToXContentCompressorTests.java
index 099c9ea4465..d8ca7130a3b 100644
--- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/InferenceToXContentCompressorTests.java
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/InferenceToXContentCompressorTests.java
@@ -46,13 +46,34 @@ public class InferenceToXContentCompressorTests extends ESTestCase {
         int max = firstDeflate.getBytes(StandardCharsets.UTF_8).length + 10;
         IOException ex = expectThrows(IOException.class,
             () -> Streams.readFully(InferenceToXContentCompressor.inflate(firstDeflate, max)));
-        assertThat(ex.getMessage(), equalTo("input stream exceeded maximum bytes of [" + max + "]"));
+        assertThat(ex.getMessage(), equalTo("" +
+            "input stream exceeded maximum bytes of [" + max + "]"));
     }
 
     public void testInflateGarbage() {
         expectThrows(IOException.class, () -> Streams.readFully(InferenceToXContentCompressor.inflate(randomAlphaOfLength(10), 100L)));
     }
 
+    public void testInflateParsingTooLargeStream() throws IOException {
+        TrainedModelDefinition definition = TrainedModelDefinitionTests.createRandomBuilder()
+            .setPreProcessors(Stream.generate(() -> randomFrom(FrequencyEncodingTests.createRandom(),
+                OneHotEncodingTests.createRandom(),
+                TargetMeanEncodingTests.createRandom()))
+                .limit(100)
+                .collect(Collectors.toList()))
+            .build();
+        String compressedString = InferenceToXContentCompressor.deflate(definition);
+        int max = compressedString.getBytes(StandardCharsets.UTF_8).length + 10;
+
+        IOException e = expectThrows(IOException.class, ()-> InferenceToXContentCompressor.inflate(compressedString,
+            parser -> TrainedModelDefinition.fromXContent(parser, true).build(),
+            xContentRegistry(),
+            max));
+
+        assertThat(e.getMessage(), equalTo("Cannot parse model definition as the content is larger than the maximum stream size of ["
+            + max + "] bytes. Max stream size is 10% of the JVM heap or 1GB whichever is smallest"));
+    }
+
     @Override
     protected NamedXContentRegistry xContentRegistry() {
         return new NamedXContentRegistry(new MlInferenceNamedXContentProvider().getNamedXContentParsers());