Throw parse exception for multi-valued numeric dims (#12953)

During ingestion, if a row containing multiple values for a numeric dimension is encountered,
the whole ingestion task fails. Ideally, this should just be registered as a parse exception.

Changes:
- Remove `instanceof List` check from `LongDimensionIndexer`, `FloatDimensionIndexer` and `DoubleDimensionIndexer`.

Any invalid type, including list, throws a parse exception in `DimensionHandlerUtils.convertObjectToXXX`
methods. `ParseException` is already handled in `OnHeapIncrementalIndex` and does not fail the entire task.
This commit is contained in:
Kashif Faraz 2022-08-29 10:33:48 +05:30 committed by GitHub
parent 16f5ac5bd5
commit 9843355ddd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 109 additions and 18 deletions

View File

@ -327,8 +327,19 @@ public final class DimensionHandlerUtils
throw new ParseException((String) valObj, "could not convert value [%s] to long", valObj);
}
return ret;
} else if (valObj instanceof List) {
throw new ParseException(
valObj.getClass().toString(),
"Could not ingest value %s as long. A long column cannot have multiple values in the same row.",
valObj
);
} else {
throw new ParseException(valObj.getClass().toString(), "Unknown type[%s]", valObj.getClass());
throw new ParseException(
valObj.getClass().toString(),
"Could not convert value [%s] to long. Invalid type: [%s]",
valObj,
valObj.getClass()
);
}
}
@ -355,8 +366,19 @@ public final class DimensionHandlerUtils
throw new ParseException((String) valObj, "could not convert value [%s] to float", valObj);
}
return ret;
} else if (valObj instanceof List) {
throw new ParseException(
valObj.getClass().toString(),
"Could not ingest value %s as float. A float column cannot have multiple values in the same row.",
valObj
);
} else {
throw new ParseException(valObj.getClass().toString(), "Unknown type[%s]", valObj.getClass());
throw new ParseException(
valObj.getClass().toString(),
"Could not convert value [%s] to float. Invalid type: [%s]",
valObj,
valObj.getClass()
);
}
}
@ -537,8 +559,19 @@ public final class DimensionHandlerUtils
throw new ParseException((String) valObj, "could not convert value [%s] to double", valObj);
}
return ret;
} else if (valObj instanceof List) {
throw new ParseException(
valObj.getClass().toString(),
"Could not ingest value %s as double. A double column cannot have multiple values in the same row.",
valObj
);
} else {
throw new ParseException(valObj.getClass().toString(), "Unknown type[%s]", valObj.getClass());
throw new ParseException(
valObj.getClass().toString(),
"Could not convert value [%s] to double. Invalid type: [%s]",
valObj,
valObj.getClass()
);
}
}

View File

@ -34,7 +34,6 @@ import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
import javax.annotation.Nullable;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
public class DoubleDimensionIndexer implements DimensionIndexer<Double, Double, Double>
@ -46,10 +45,6 @@ public class DoubleDimensionIndexer implements DimensionIndexer<Double, Double,
@Override
public EncodedKeyComponent<Double> processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
{
if (dimValues instanceof List) {
throw new UnsupportedOperationException("Numeric columns do not support multivalue rows.");
}
Double d = DimensionHandlerUtils.convertObjectToDouble(dimValues, reportParseExceptions);
if (d == null) {
hasNulls = NullHandling.sqlCompatible();

View File

@ -34,7 +34,6 @@ import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
import javax.annotation.Nullable;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
public class FloatDimensionIndexer implements DimensionIndexer<Float, Float, Float>
@ -46,10 +45,6 @@ public class FloatDimensionIndexer implements DimensionIndexer<Float, Float, Flo
@Override
public EncodedKeyComponent<Float> processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
{
if (dimValues instanceof List) {
throw new UnsupportedOperationException("Numeric columns do not support multivalue rows.");
}
Float f = DimensionHandlerUtils.convertObjectToFloat(dimValues, reportParseExceptions);
if (f == null) {
hasNulls = NullHandling.sqlCompatible();

View File

@ -34,7 +34,6 @@ import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
import javax.annotation.Nullable;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
public class LongDimensionIndexer implements DimensionIndexer<Long, Long, Long>
@ -46,10 +45,6 @@ public class LongDimensionIndexer implements DimensionIndexer<Long, Long, Long>
@Override
public EncodedKeyComponent<Long> processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
{
if (dimValues instanceof List) {
throw new UnsupportedOperationException("Numeric columns do not support multivalue rows.");
}
Long l = DimensionHandlerUtils.convertObjectToLong(dimValues, reportParseExceptions);
if (l == null) {
hasNulls = NullHandling.sqlCompatible();

View File

@ -232,6 +232,79 @@ public class IncrementalIndexTest extends InitializedNullHandlingTest
);
}
@Test
public void testMultiValuedNumericDimensions() throws IndexSizeExceededException
{
IncrementalIndex index = indexCreator.createIndex();
IncrementalIndexAddResult result;
result = index.add(
new MapBasedInputRow(
0,
Lists.newArrayList("string", "float", "long", "double"),
ImmutableMap.of(
"string", "A",
"float", "19.0",
"long", Arrays.asList(10L, 5L),
"double", 21.0d
)
)
);
Assert.assertEquals(UnparseableColumnsParseException.class, result.getParseException().getClass());
Assert.assertEquals(
"{string=A, float=19.0, long=[10, 5], double=21.0}",
result.getParseException().getInput()
);
Assert.assertEquals(
"Found unparseable columns in row: [{string=A, float=19.0, long=[10, 5], double=21.0}], exceptions: [Could not ingest value [10, 5] as long. A long column cannot have multiple values in the same row.]",
result.getParseException().getMessage()
);
result = index.add(
new MapBasedInputRow(
0,
Lists.newArrayList("string", "float", "long", "double"),
ImmutableMap.of(
"string", "A",
"float", Arrays.asList(10.0f, 5.0f),
"long", 20,
"double", 21.0d
)
)
);
Assert.assertEquals(UnparseableColumnsParseException.class, result.getParseException().getClass());
Assert.assertEquals(
"{string=A, float=[10.0, 5.0], long=20, double=21.0}",
result.getParseException().getInput()
);
Assert.assertEquals(
"Found unparseable columns in row: [{string=A, float=[10.0, 5.0], long=20, double=21.0}], exceptions: [Could not ingest value [10.0, 5.0] as float. A float column cannot have multiple values in the same row.]",
result.getParseException().getMessage()
);
result = index.add(
new MapBasedInputRow(
0,
Lists.newArrayList("string", "float", "long", "double"),
ImmutableMap.of(
"string", "A",
"float", 19.0,
"long", 20,
"double", Arrays.asList(10.0D, 5.0D)
)
)
);
Assert.assertEquals(UnparseableColumnsParseException.class, result.getParseException().getClass());
Assert.assertEquals(
"{string=A, float=19.0, long=20, double=[10.0, 5.0]}",
result.getParseException().getInput()
);
Assert.assertEquals(
"Found unparseable columns in row: [{string=A, float=19.0, long=20, double=[10.0, 5.0]}], exceptions: [Could not ingest value [10.0, 5.0] as double. A double column cannot have multiple values in the same row.]",
result.getParseException().getMessage()
);
}
@Test
public void sameRow() throws IndexSizeExceededException
{