From 9843355ddd8bf71c070b71d9832b12e66486ed1f Mon Sep 17 00:00:00 2001 From: Kashif Faraz Date: Mon, 29 Aug 2022 10:33:48 +0530 Subject: [PATCH] Throw parse exception for multi-valued numeric dims (#12953) During ingestion, if a row containing multiple values for a numeric dimension is encountered, the whole ingestion task fails. Ideally, this should just be registered as a parse exception. Changes: - Remove `instanceof List` check from `LongDimensionIndexer`, `FloatDimensionIndexer` and `DoubleDimensionIndexer`. Any invalid type, including list, throws a parse exception in `DimensionHandlerUtils.convertObjectToXXX` methods. `ParseException` is already handled in `OnHeapIncrementalIndex` and does not fail the entire task. --- .../druid/segment/DimensionHandlerUtils.java | 39 +++++++++- .../druid/segment/DoubleDimensionIndexer.java | 5 -- .../druid/segment/FloatDimensionIndexer.java | 5 -- .../druid/segment/LongDimensionIndexer.java | 5 -- .../incremental/IncrementalIndexTest.java | 73 +++++++++++++++++++ 5 files changed, 109 insertions(+), 18 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java index 309e14b3e2a..b509c7f058d 100644 --- a/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java +++ b/processing/src/main/java/org/apache/druid/segment/DimensionHandlerUtils.java @@ -327,8 +327,19 @@ public final class DimensionHandlerUtils throw new ParseException((String) valObj, "could not convert value [%s] to long", valObj); } return ret; + } else if (valObj instanceof List) { + throw new ParseException( + valObj.getClass().toString(), + "Could not ingest value %s as long. A long column cannot have multiple values in the same row.", + valObj + ); } else { - throw new ParseException(valObj.getClass().toString(), "Unknown type[%s]", valObj.getClass()); + throw new ParseException( + valObj.getClass().toString(), + "Could not convert value [%s] to long. Invalid type: [%s]", + valObj, + valObj.getClass() + ); } } @@ -355,8 +366,19 @@ public final class DimensionHandlerUtils throw new ParseException((String) valObj, "could not convert value [%s] to float", valObj); } return ret; + } else if (valObj instanceof List) { + throw new ParseException( + valObj.getClass().toString(), + "Could not ingest value %s as float. A float column cannot have multiple values in the same row.", + valObj + ); } else { - throw new ParseException(valObj.getClass().toString(), "Unknown type[%s]", valObj.getClass()); + throw new ParseException( + valObj.getClass().toString(), + "Could not convert value [%s] to float. Invalid type: [%s]", + valObj, + valObj.getClass() + ); } } @@ -537,8 +559,19 @@ public final class DimensionHandlerUtils throw new ParseException((String) valObj, "could not convert value [%s] to double", valObj); } return ret; + } else if (valObj instanceof List) { + throw new ParseException( + valObj.getClass().toString(), + "Could not ingest value %s as double. A double column cannot have multiple values in the same row.", + valObj + ); } else { - throw new ParseException(valObj.getClass().toString(), "Unknown type[%s]", valObj.getClass()); + throw new ParseException( + valObj.getClass().toString(), + "Could not convert value [%s] to double. Invalid type: [%s]", + valObj, + valObj.getClass() + ); } } diff --git a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java index 63be88a6d2f..f4e13cca565 100644 --- a/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/DoubleDimensionIndexer.java @@ -34,7 +34,6 @@ import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; import javax.annotation.Nullable; import java.util.Comparator; -import java.util.List; import java.util.Objects; public class DoubleDimensionIndexer implements DimensionIndexer @@ -46,10 +45,6 @@ public class DoubleDimensionIndexer implements DimensionIndexer processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { - if (dimValues instanceof List) { - throw new UnsupportedOperationException("Numeric columns do not support multivalue rows."); - } - Double d = DimensionHandlerUtils.convertObjectToDouble(dimValues, reportParseExceptions); if (d == null) { hasNulls = NullHandling.sqlCompatible(); diff --git a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java index 2da428f2873..be5e86b7bb6 100644 --- a/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/FloatDimensionIndexer.java @@ -34,7 +34,6 @@ import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; import javax.annotation.Nullable; import java.util.Comparator; -import java.util.List; import java.util.Objects; public class FloatDimensionIndexer implements DimensionIndexer @@ -46,10 +45,6 @@ public class FloatDimensionIndexer implements DimensionIndexer processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { - if (dimValues instanceof List) { - throw new UnsupportedOperationException("Numeric columns do not support multivalue rows."); - } - Float f = DimensionHandlerUtils.convertObjectToFloat(dimValues, reportParseExceptions); if (f == null) { hasNulls = NullHandling.sqlCompatible(); diff --git a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java index 20867302098..85ed29b9c28 100644 --- a/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/LongDimensionIndexer.java @@ -34,7 +34,6 @@ import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; import javax.annotation.Nullable; import java.util.Comparator; -import java.util.List; import java.util.Objects; public class LongDimensionIndexer implements DimensionIndexer @@ -46,10 +45,6 @@ public class LongDimensionIndexer implements DimensionIndexer @Override public EncodedKeyComponent processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions) { - if (dimValues instanceof List) { - throw new UnsupportedOperationException("Numeric columns do not support multivalue rows."); - } - Long l = DimensionHandlerUtils.convertObjectToLong(dimValues, reportParseExceptions); if (l == null) { hasNulls = NullHandling.sqlCompatible(); diff --git a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexTest.java b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexTest.java index 17534653646..9c2523d29ed 100644 --- a/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexTest.java +++ b/processing/src/test/java/org/apache/druid/segment/incremental/IncrementalIndexTest.java @@ -232,6 +232,79 @@ public class IncrementalIndexTest extends InitializedNullHandlingTest ); } + @Test + public void testMultiValuedNumericDimensions() throws IndexSizeExceededException + { + IncrementalIndex index = indexCreator.createIndex(); + + IncrementalIndexAddResult result; + result = index.add( + new MapBasedInputRow( + 0, + Lists.newArrayList("string", "float", "long", "double"), + ImmutableMap.of( + "string", "A", + "float", "19.0", + "long", Arrays.asList(10L, 5L), + "double", 21.0d + ) + ) + ); + Assert.assertEquals(UnparseableColumnsParseException.class, result.getParseException().getClass()); + Assert.assertEquals( + "{string=A, float=19.0, long=[10, 5], double=21.0}", + result.getParseException().getInput() + ); + Assert.assertEquals( + "Found unparseable columns in row: [{string=A, float=19.0, long=[10, 5], double=21.0}], exceptions: [Could not ingest value [10, 5] as long. A long column cannot have multiple values in the same row.]", + result.getParseException().getMessage() + ); + + result = index.add( + new MapBasedInputRow( + 0, + Lists.newArrayList("string", "float", "long", "double"), + ImmutableMap.of( + "string", "A", + "float", Arrays.asList(10.0f, 5.0f), + "long", 20, + "double", 21.0d + ) + ) + ); + Assert.assertEquals(UnparseableColumnsParseException.class, result.getParseException().getClass()); + Assert.assertEquals( + "{string=A, float=[10.0, 5.0], long=20, double=21.0}", + result.getParseException().getInput() + ); + Assert.assertEquals( + "Found unparseable columns in row: [{string=A, float=[10.0, 5.0], long=20, double=21.0}], exceptions: [Could not ingest value [10.0, 5.0] as float. A float column cannot have multiple values in the same row.]", + result.getParseException().getMessage() + ); + + result = index.add( + new MapBasedInputRow( + 0, + Lists.newArrayList("string", "float", "long", "double"), + ImmutableMap.of( + "string", "A", + "float", 19.0, + "long", 20, + "double", Arrays.asList(10.0D, 5.0D) + ) + ) + ); + Assert.assertEquals(UnparseableColumnsParseException.class, result.getParseException().getClass()); + Assert.assertEquals( + "{string=A, float=19.0, long=20, double=[10.0, 5.0]}", + result.getParseException().getInput() + ); + Assert.assertEquals( + "Found unparseable columns in row: [{string=A, float=19.0, long=20, double=[10.0, 5.0]}], exceptions: [Could not ingest value [10.0, 5.0] as double. A double column cannot have multiple values in the same row.]", + result.getParseException().getMessage() + ); + } + @Test public void sameRow() throws IndexSizeExceededException {