Introduce natural comparator for types that don't have a StringComparator (#15145)

Fixes a bug when executing queries with the ordering of arrays
This commit is contained in:
Laksh Singla 2023-10-16 10:37:32 +05:30 committed by GitHub
parent 4b0d1b3488
commit dc8d2192c3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 380 additions and 122 deletions

View File

@ -311,8 +311,17 @@ public class GroupByQueryKit implements QueryKit<GroupByQuery>
}
}
/**
* Only allow ordering the queries from the MSQ engine, ignoring the comparator that is set in the query. This
* function checks if it is safe to do so, which is the case if the natural comparator is used for the dimension.
* Since MSQ executes the queries planned by the SQL layer, this is a sanity check as we always add the natural
* comparator for the dimensions there
*/
private static boolean isNaturalComparator(final ValueType type, final StringComparator comparator)
{
if (StringComparators.NATURAL.equals(comparator)) {
return true;
}
return ((type == ValueType.STRING && StringComparators.LEXICOGRAPHIC.equals(comparator))
|| (type.isNumeric() && StringComparators.NUMERIC.equals(comparator)))
&& !type.isArray();

View File

@ -30,9 +30,11 @@ import org.apache.druid.msq.indexing.MSQTuningConfig;
import org.apache.druid.msq.indexing.destination.TaskReportMSQDestination;
import org.apache.druid.msq.test.MSQTestBase;
import org.apache.druid.msq.util.MultiStageQueryContext;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.NestedDataTestUtils;
import org.apache.druid.query.Query;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.scan.ScanQuery;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
@ -43,6 +45,7 @@ import org.apache.druid.sql.calcite.planner.ColumnMappings;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.utils.CompressionUtils;
import org.hamcrest.CoreMatchers;
import org.junit.Before;
import org.junit.Test;
import org.junit.internal.matchers.ThrowableMessageMatcher;
import org.junit.runner.RunWith;
@ -67,6 +70,9 @@ import java.util.Map;
@RunWith(Parameterized.class)
public class MSQArraysTest extends MSQTestBase
{
private String dataFileNameJsonString;
private String dataFileSignatureJsonString;
private DataSource dataFileExternalDataSource;
@Parameterized.Parameters(name = "{index}:with context {0}")
public static Collection<Object[]> data()
@ -86,6 +92,40 @@ public class MSQArraysTest extends MSQTestBase
@Parameterized.Parameter(1)
public Map<String, Object> context;
@Before
public void setup() throws IOException
{
// Read the file and make the name available to the tests
File dataFile = temporaryFolder.newFile();
final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader()
.getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE);
final InputStream decompressing = CompressionUtils.decompress(
resourceStream,
NestedDataTestUtils.ARRAY_TYPES_DATA_FILE
);
Files.copy(decompressing, dataFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
decompressing.close();
dataFileNameJsonString = queryFramework().queryJsonMapper().writeValueAsString(dataFile);
RowSignature dataFileSignature = RowSignature.builder()
.add("timestamp", ColumnType.STRING)
.add("arrayString", ColumnType.STRING_ARRAY)
.add("arrayStringNulls", ColumnType.STRING_ARRAY)
.add("arrayLong", ColumnType.LONG_ARRAY)
.add("arrayLongNulls", ColumnType.LONG_ARRAY)
.add("arrayDouble", ColumnType.DOUBLE_ARRAY)
.add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY)
.build();
dataFileSignatureJsonString = queryFramework().queryJsonMapper().writeValueAsString(dataFileSignature);
dataFileExternalDataSource = new ExternalDataSource(
new LocalInputSource(null, null, ImmutableList.of(dataFile)),
new JsonInputFormat(null, null, null, null, null),
dataFileSignature
);
}
/**
* Tests the behaviour of INSERT query when arrayIngestMode is set to none (default) and the user tries to ingest
* string arrays
@ -135,7 +175,7 @@ public class MSQArraysTest extends MSQTestBase
* Tests the INSERT query when 'auto' type is set
*/
@Test
public void testInsertArraysAutoType() throws IOException
public void testInsertArraysAutoType()
{
List<Object[]> expectedRows = Arrays.asList(
new Object[]{1672531200000L, null, null, null},
@ -164,18 +204,6 @@ public class MSQArraysTest extends MSQTestBase
final Map<String, Object> adjustedContext = new HashMap<>(context);
adjustedContext.put(MultiStageQueryContext.CTX_USE_AUTO_SCHEMAS, true);
final File tmpFile = temporaryFolder.newFile();
final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader()
.getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE);
final InputStream decompressing = CompressionUtils.decompress(
resourceStream,
NestedDataTestUtils.ARRAY_TYPES_DATA_FILE
);
Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
decompressing.close();
final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile);
testIngestQuery().setSql(" INSERT INTO foo1 SELECT\n"
+ " TIME_PARSE(\"timestamp\") as __time,\n"
+ " arrayString,\n"
@ -183,7 +211,7 @@ public class MSQArraysTest extends MSQTestBase
+ " arrayDouble\n"
+ "FROM TABLE(\n"
+ " EXTERN(\n"
+ " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n"
+ " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n"
+ " '{\"type\": \"json\"}',\n"
+ " '[{\"name\": \"timestamp\", \"type\": \"STRING\"}, {\"name\": \"arrayString\", \"type\": \"COMPLEX<json>\"}, {\"name\": \"arrayLong\", \"type\": \"COMPLEX<json>\"}, {\"name\": \"arrayDouble\", \"type\": \"COMPLEX<json>\"}]'\n"
+ " )\n"
@ -200,39 +228,11 @@ public class MSQArraysTest extends MSQTestBase
* types as well
*/
@Test
public void testInsertArraysWithStringArraysAsMVDs() throws IOException
public void testInsertArraysWithStringArraysAsMVDs()
{
RowSignature rowSignatureWithoutTimeAndStringColumns =
RowSignature.builder()
.add("arrayLong", ColumnType.LONG_ARRAY)
.add("arrayLongNulls", ColumnType.LONG_ARRAY)
.add("arrayDouble", ColumnType.DOUBLE_ARRAY)
.add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY)
.build();
RowSignature fileSignature = RowSignature.builder()
.add("timestamp", ColumnType.STRING)
.add("arrayString", ColumnType.STRING_ARRAY)
.add("arrayStringNulls", ColumnType.STRING_ARRAY)
.addAll(rowSignatureWithoutTimeAndStringColumns)
.build();
final Map<String, Object> adjustedContext = new HashMap<>(context);
adjustedContext.put(MultiStageQueryContext.CTX_ARRAY_INGEST_MODE, "mvd");
final File tmpFile = temporaryFolder.newFile();
final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader()
.getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE);
final InputStream decompressing = CompressionUtils.decompress(
resourceStream,
NestedDataTestUtils.ARRAY_TYPES_DATA_FILE
);
Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
decompressing.close();
final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile);
testIngestQuery().setSql(" INSERT INTO foo1 SELECT\n"
+ " TIME_PARSE(\"timestamp\") as __time,\n"
+ " arrayString,\n"
@ -243,9 +243,9 @@ public class MSQArraysTest extends MSQTestBase
+ " arrayDoubleNulls\n"
+ "FROM TABLE(\n"
+ " EXTERN(\n"
+ " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n"
+ " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n"
+ " '{\"type\": \"json\"}',\n"
+ " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n"
+ " '" + dataFileSignatureJsonString + "'\n"
+ " )\n"
+ ") PARTITIONED BY ALL")
.setQueryContext(adjustedContext)
@ -262,7 +262,7 @@ public class MSQArraysTest extends MSQTestBase
* array types
*/
@Test
public void testInsertArraysAsArrays() throws IOException
public void testInsertArraysAsArrays()
{
final List<Object[]> expectedRows = Arrays.asList(
new Object[]{
@ -403,11 +403,6 @@ public class MSQArraysTest extends MSQTestBase
.add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY)
.build();
RowSignature fileSignature = RowSignature.builder()
.add("timestamp", ColumnType.STRING)
.addAll(rowSignatureWithoutTimeColumn)
.build();
RowSignature rowSignature = RowSignature.builder()
.add("__time", ColumnType.LONG)
.addAll(rowSignatureWithoutTimeColumn)
@ -416,18 +411,6 @@ public class MSQArraysTest extends MSQTestBase
final Map<String, Object> adjustedContext = new HashMap<>(context);
adjustedContext.put(MultiStageQueryContext.CTX_ARRAY_INGEST_MODE, "array");
final File tmpFile = temporaryFolder.newFile();
final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader()
.getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE);
final InputStream decompressing = CompressionUtils.decompress(
resourceStream,
NestedDataTestUtils.ARRAY_TYPES_DATA_FILE
);
Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
decompressing.close();
final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile);
testIngestQuery().setSql(" INSERT INTO foo1 SELECT\n"
+ " TIME_PARSE(\"timestamp\") as __time,\n"
+ " arrayString,\n"
@ -438,9 +421,9 @@ public class MSQArraysTest extends MSQTestBase
+ " arrayDoubleNulls\n"
+ "FROM TABLE(\n"
+ " EXTERN(\n"
+ " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n"
+ " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n"
+ " '{\"type\": \"json\"}',\n"
+ " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n"
+ " '" + dataFileSignatureJsonString + "'\n"
+ " )\n"
+ ") PARTITIONED BY ALL")
.setQueryContext(adjustedContext)
@ -451,26 +434,26 @@ public class MSQArraysTest extends MSQTestBase
}
@Test
public void testSelectOnArraysWithArrayIngestModeAsNone() throws IOException
public void testSelectOnArraysWithArrayIngestModeAsNone()
{
testSelectOnArrays("none");
}
@Test
public void testSelectOnArraysWithArrayIngestModeAsMVD() throws IOException
public void testSelectOnArraysWithArrayIngestModeAsMVD()
{
testSelectOnArrays("mvd");
}
@Test
public void testSelectOnArraysWithArrayIngestModeAsArray() throws IOException
public void testSelectOnArraysWithArrayIngestModeAsArray()
{
testSelectOnArrays("array");
}
// Tests the behaviour of the select with the given arrayIngestMode. The expectation should be the same, since the
// arrayIngestMode should only determine how the array gets ingested at the end.
public void testSelectOnArrays(String arrayIngestMode) throws IOException
public void testSelectOnArrays(String arrayIngestMode)
{
final List<Object[]> expectedRows = Arrays.asList(
new Object[]{
@ -611,11 +594,6 @@ public class MSQArraysTest extends MSQTestBase
.add("arrayDoubleNulls", ColumnType.DOUBLE_ARRAY)
.build();
RowSignature fileSignature = RowSignature.builder()
.add("timestamp", ColumnType.STRING)
.addAll(rowSignatureWithoutTimeColumn)
.build();
RowSignature rowSignature = RowSignature.builder()
.add("__time", ColumnType.LONG)
.addAll(rowSignatureWithoutTimeColumn)
@ -634,24 +612,8 @@ public class MSQArraysTest extends MSQTestBase
final Map<String, Object> adjustedContext = new HashMap<>(context);
adjustedContext.put(MultiStageQueryContext.CTX_ARRAY_INGEST_MODE, arrayIngestMode);
final File tmpFile = temporaryFolder.newFile();
final InputStream resourceStream = NestedDataTestUtils.class.getClassLoader()
.getResourceAsStream(NestedDataTestUtils.ARRAY_TYPES_DATA_FILE);
final InputStream decompressing = CompressionUtils.decompress(
resourceStream,
NestedDataTestUtils.ARRAY_TYPES_DATA_FILE
);
Files.copy(decompressing, tmpFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
decompressing.close();
final String toReadFileNameAsJson = queryFramework().queryJsonMapper().writeValueAsString(tmpFile);
Query<?> expectedQuery = newScanQueryBuilder()
.dataSource(new ExternalDataSource(
new LocalInputSource(null, null, ImmutableList.of(tmpFile)),
new JsonInputFormat(null, null, null, null, null),
fileSignature
))
.dataSource(dataFileExternalDataSource)
.intervals(querySegmentSpec(Filtration.eternity()))
.columns(
"arrayDouble",
@ -681,9 +643,9 @@ public class MSQArraysTest extends MSQTestBase
+ " arrayDoubleNulls\n"
+ "FROM TABLE(\n"
+ " EXTERN(\n"
+ " '{ \"files\": [" + toReadFileNameAsJson + "],\"type\":\"local\"}',\n"
+ " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n"
+ " '{\"type\": \"json\"}',\n"
+ " '" + queryFramework().queryJsonMapper().writeValueAsString(fileSignature) + "'\n"
+ " '" + dataFileSignatureJsonString + "'\n"
+ " )\n"
+ ")")
.setQueryContext(adjustedContext)
@ -708,6 +670,192 @@ public class MSQArraysTest extends MSQTestBase
.verifyResults();
}
@Test
public void testScanWithOrderByOnStringArray()
{
final List<Object[]> expectedRows = Arrays.asList(
new Object[]{Arrays.asList("d", "e")},
new Object[]{Arrays.asList("d", "e")},
new Object[]{Arrays.asList("b", "c")},
new Object[]{Arrays.asList("b", "c")},
new Object[]{Arrays.asList("a", "b", "c")},
new Object[]{Arrays.asList("a", "b", "c")},
new Object[]{Arrays.asList("a", "b")},
new Object[]{Arrays.asList("a", "b")},
new Object[]{Arrays.asList("a", "b")},
new Object[]{Arrays.asList("a", "b")},
new Object[]{null},
new Object[]{null},
new Object[]{null},
new Object[]{null}
);
RowSignature rowSignature = RowSignature.builder()
.add("arrayString", ColumnType.STRING_ARRAY)
.build();
RowSignature scanSignature = RowSignature.builder()
.add("arrayString", ColumnType.STRING_ARRAY)
.build();
Query<?> expectedQuery = newScanQueryBuilder()
.dataSource(dataFileExternalDataSource)
.intervals(querySegmentSpec(Filtration.eternity()))
.columns("arrayString")
.orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayString", ScanQuery.Order.DESCENDING)))
.context(defaultScanQueryContext(context, scanSignature))
.build();
testSelectQuery().setSql("SELECT\n"
+ " arrayString\n"
+ "FROM TABLE(\n"
+ " EXTERN(\n"
+ " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n"
+ " '{\"type\": \"json\"}',\n"
+ " '" + dataFileSignatureJsonString + "'\n"
+ " )\n"
+ ")\n"
+ "ORDER BY arrayString DESC")
.setQueryContext(context)
.setExpectedMSQSpec(MSQSpec
.builder()
.query(expectedQuery)
.columnMappings(new ColumnMappings(ImmutableList.of(
new ColumnMapping("arrayString", "arrayString")
)))
.tuningConfig(MSQTuningConfig.defaultConfig())
.destination(TaskReportMSQDestination.INSTANCE)
.build()
)
.setExpectedRowSignature(rowSignature)
.setExpectedResultRows(expectedRows)
.verifyResults();
}
@Test
public void testScanWithOrderByOnLongArray()
{
final List<Object[]> expectedRows = Arrays.asList(
new Object[]{null},
new Object[]{null},
new Object[]{null},
new Object[]{null},
new Object[]{Arrays.asList(1L, 2L, 3L)},
new Object[]{Arrays.asList(1L, 2L, 3L)},
new Object[]{Arrays.asList(1L, 2L, 3L)},
new Object[]{Arrays.asList(1L, 2L, 3L)},
new Object[]{Arrays.asList(1L, 2L, 3L, 4L)},
new Object[]{Arrays.asList(1L, 2L, 3L, 4L)},
new Object[]{Arrays.asList(1L, 4L)},
new Object[]{Arrays.asList(1L, 4L)},
new Object[]{Arrays.asList(2L, 3L)},
new Object[]{Arrays.asList(2L, 3L)}
);
RowSignature rowSignature = RowSignature.builder()
.add("arrayLong", ColumnType.LONG_ARRAY)
.build();
RowSignature scanSignature = RowSignature.builder()
.add("arrayLong", ColumnType.LONG_ARRAY)
.build();
Query<?> expectedQuery = newScanQueryBuilder()
.dataSource(dataFileExternalDataSource)
.intervals(querySegmentSpec(Filtration.eternity()))
.columns("arrayLong")
.orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayLong", ScanQuery.Order.ASCENDING)))
.context(defaultScanQueryContext(context, scanSignature))
.build();
testSelectQuery().setSql("SELECT\n"
+ " arrayLong\n"
+ "FROM TABLE(\n"
+ " EXTERN(\n"
+ " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n"
+ " '{\"type\": \"json\"}',\n"
+ " '" + dataFileSignatureJsonString + "'\n"
+ " )\n"
+ ")\n"
+ "ORDER BY arrayLong")
.setQueryContext(context)
.setExpectedMSQSpec(MSQSpec
.builder()
.query(expectedQuery)
.columnMappings(new ColumnMappings(ImmutableList.of(
new ColumnMapping("arrayLong", "arrayLong")
)))
.tuningConfig(MSQTuningConfig.defaultConfig())
.destination(TaskReportMSQDestination.INSTANCE)
.build()
)
.setExpectedRowSignature(rowSignature)
.setExpectedResultRows(expectedRows)
.verifyResults();
}
@Test
public void testScanWithOrderByOnDoubleArray()
{
final List<Object[]> expectedRows = Arrays.asList(
new Object[]{null},
new Object[]{null},
new Object[]{null},
new Object[]{null},
new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)},
new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)},
new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)},
new Object[]{Arrays.asList(1.1d, 2.2d, 3.3d)},
new Object[]{Arrays.asList(1.1d, 3.3d)},
new Object[]{Arrays.asList(1.1d, 3.3d)},
new Object[]{Arrays.asList(2.2d, 3.3d, 4.0d)},
new Object[]{Arrays.asList(2.2d, 3.3d, 4.0d)},
new Object[]{Arrays.asList(3.3d, 4.4d, 5.5d)},
new Object[]{Arrays.asList(3.3d, 4.4d, 5.5d)}
);
RowSignature rowSignature = RowSignature.builder()
.add("arrayDouble", ColumnType.DOUBLE_ARRAY)
.build();
RowSignature scanSignature = RowSignature.builder()
.add("arrayDouble", ColumnType.DOUBLE_ARRAY)
.build();
Query<?> expectedQuery = newScanQueryBuilder()
.dataSource(dataFileExternalDataSource)
.intervals(querySegmentSpec(Filtration.eternity()))
.columns("arrayDouble")
.orderBy(Collections.singletonList(new ScanQuery.OrderBy("arrayDouble", ScanQuery.Order.ASCENDING)))
.context(defaultScanQueryContext(context, scanSignature))
.build();
testSelectQuery().setSql("SELECT\n"
+ " arrayDouble\n"
+ "FROM TABLE(\n"
+ " EXTERN(\n"
+ " '{ \"files\": [" + dataFileNameJsonString + "],\"type\":\"local\"}',\n"
+ " '{\"type\": \"json\"}',\n"
+ " '" + dataFileSignatureJsonString + "'\n"
+ " )\n"
+ ")\n"
+ "ORDER BY arrayDouble")
.setQueryContext(context)
.setExpectedMSQSpec(MSQSpec
.builder()
.query(expectedQuery)
.columnMappings(new ColumnMappings(ImmutableList.of(
new ColumnMapping("arrayDouble", "arrayDouble")
)))
.tuningConfig(MSQTuningConfig.defaultConfig())
.destination(TaskReportMSQDestination.INSTANCE)
.build()
)
.setExpectedRowSignature(rowSignature)
.setExpectedResultRows(expectedRows)
.verifyResults();
}
private List<Object[]> expectedMultiValueFooRowsToArray()
{

View File

@ -89,7 +89,6 @@ import org.junit.runners.Parameterized;
import org.mockito.ArgumentMatchers;
import org.mockito.Mockito;
import javax.annotation.Nonnull;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
@ -2534,7 +2533,6 @@ public class MSQSelectTest extends MSQTestBase
.verifyResults();
}
@Nonnull
private List<Object[]> expectedMultiValueFooRowsGroup()
{
ArrayList<Object[]> expected = new ArrayList<>();
@ -2553,7 +2551,6 @@ public class MSQSelectTest extends MSQTestBase
return expected;
}
@Nonnull
private List<Object[]> expectedMultiValueFooRowsGroupByList()
{
ArrayList<Object[]> expected = new ArrayList<>();

View File

@ -37,7 +37,8 @@ public class StringComparatorModule extends SimpleModule
new NamedType(StringComparators.AlphanumericComparator.class, StringComparators.ALPHANUMERIC_NAME),
new NamedType(StringComparators.StrlenComparator.class, StringComparators.STRLEN_NAME),
new NamedType(StringComparators.NumericComparator.class, StringComparators.NUMERIC_NAME),
new NamedType(StringComparators.VersionComparator.class, StringComparators.VERSION_NAME)
new NamedType(StringComparators.VersionComparator.class, StringComparators.VERSION_NAME),
new NamedType(StringComparators.NaturalComparator.class, StringComparators.NATURAL_NAME)
);
}

View File

@ -100,7 +100,8 @@ public class BoundDimFilter extends AbstractOptimizableDimFilter implements DimF
boolean orderingIsAlphanumeric = this.ordering.equals(StringComparators.ALPHANUMERIC);
Preconditions.checkState(
alphaNumeric == orderingIsAlphanumeric,
"mismatch between alphanumeric and ordering property");
"mismatch between alphanumeric and ordering property"
);
}
}
this.extractionFn = extractionFn;

View File

@ -434,6 +434,11 @@ public class GrouperBufferComparatorUtils
private static boolean isPrimitiveComparable(boolean pushLimitDown, @Nullable StringComparator stringComparator)
{
return !pushLimitDown || stringComparator == null || stringComparator.equals(StringComparators.NUMERIC);
return !pushLimitDown
|| stringComparator == null
|| stringComparator.equals(StringComparators.NUMERIC)
// NATURAL isn't set for numeric types, however if it is, then that would mean that we are ordering the
// numeric type with its natural comparator (which is NUMERIC)
|| stringComparator.equals(StringComparators.NATURAL);
}
}

View File

@ -1116,7 +1116,8 @@ public class RowBasedGrouperHelper
final StringComparator comparator = comparators.get(i);
final ColumnType fieldType = fieldTypes.get(i);
if (fieldType.isNumeric() && comparator.equals(StringComparators.NUMERIC)) {
if (fieldType.isNumeric()
&& (comparator.equals(StringComparators.NUMERIC) || comparator.equals(StringComparators.NATURAL))) {
// use natural comparison
if (fieldType.is(ValueType.DOUBLE)) {
// sometimes doubles can become floats making the round trip from serde, make sure to coerce them both

View File

@ -41,6 +41,8 @@ public abstract class StringComparator implements Comparator<String>
return StringComparators.NUMERIC;
case StringComparators.VERSION_NAME:
return StringComparators.VERSION;
case StringComparators.NATURAL_NAME:
return StringComparators.NATURAL;
default:
throw new IAE("Unknown string comparator[%s]", type);
}

View File

@ -22,6 +22,8 @@ package org.apache.druid.query.ordering;
import com.google.common.collect.Ordering;
import com.google.common.primitives.Ints;
import org.apache.druid.common.guava.GuavaUtils;
import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.maven.artifact.versioning.DefaultArtifactVersion;
import java.math.BigDecimal;
@ -34,25 +36,28 @@ public class StringComparators
public static final String NUMERIC_NAME = "numeric";
public static final String STRLEN_NAME = "strlen";
public static final String VERSION_NAME = "version";
public static final String NATURAL_NAME = "natural";
public static final StringComparator LEXICOGRAPHIC = new LexicographicComparator();
public static final StringComparator ALPHANUMERIC = new AlphanumericComparator();
public static final StringComparator NUMERIC = new NumericComparator();
public static final StringComparator STRLEN = new StrlenComparator();
public static final StringComparator VERSION = new VersionComparator();
public static final StringComparator NATURAL = new NaturalComparator();
public static final int LEXICOGRAPHIC_CACHE_ID = 0x01;
public static final int ALPHANUMERIC_CACHE_ID = 0x02;
public static final int NUMERIC_CACHE_ID = 0x03;
public static final int STRLEN_CACHE_ID = 0x04;
public static final int VERSION_CACHE_ID = 0x05;
public static final int NATURAL_CACHE_ID = 0x06;
/**
* Comparison using the natural comparator of {@link String}.
*
* Note that this is not equivalent to comparing UTF-8 byte arrays; see javadocs for
* {@link org.apache.druid.java.util.common.StringUtils#compareUnicode(String, String)} and
* {@link org.apache.druid.java.util.common.StringUtils#compareUtf8UsingJavaStringOrdering(byte[], byte[])}.
* {@link StringUtils#compareUnicode(String, String)} and
* {@link StringUtils#compareUtf8UsingJavaStringOrdering(byte[], byte[])}.
*/
public static class LexicographicComparator extends StringComparator
{
@ -492,4 +497,51 @@ public class StringComparators
return new byte[]{(byte) VERSION_CACHE_ID};
}
}
/**
* NaturalComparator refers to the natural ordering of the type that it refers.
*
* For example, if the type is Long, the natural ordering would be numeric
* if the type is an array, the natural ordering would be lexicographic comparison of the natural ordering of the
* elements in the arrays.
*
* It is a sigil value for the dimension that we can handle in the execution layer, and don't need the comparator for.
* It is also a placeholder for dimensions that we don't have a comparator for (like arrays), but is a required for
* planning
*/
public static class NaturalComparator extends StringComparator
{
@Override
public int compare(String o1, String o2)
{
throw DruidException.defensive("compare() should not be called for the NaturalComparator");
}
@Override
public String toString()
{
return StringComparators.NATURAL_NAME;
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
return o != null && getClass() == o.getClass();
}
@Override
public int hashCode()
{
return 0;
}
@Override
public byte[] getCacheKey()
{
return new byte[]{(byte) NATURAL_CACHE_ID};
}
}
}

View File

@ -22,6 +22,7 @@ package org.apache.druid.query.ordering;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import org.apache.druid.error.DruidException;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.junit.Assert;
import org.junit.Test;
@ -33,6 +34,8 @@ import java.util.List;
public class StringComparatorsTest
{
private static final ObjectMapper JSON_MAPPER = new DefaultObjectMapper();
private void commonTest(StringComparator comparator)
{
// equality test
@ -156,65 +159,83 @@ public class StringComparatorsTest
Assert.assertTrue(StringComparators.VERSION.compare("1.0-SNAPSHOT", "1.0-Final") < 0);
}
@Test
public void testNaturalComparator()
{
Assert.assertThrows(DruidException.class, () -> StringComparators.NATURAL.compare("str1", "str2"));
}
@Test
public void testLexicographicComparatorSerdeTest() throws IOException
{
ObjectMapper jsonMapper = new DefaultObjectMapper();
String expectJsonSpec = "{\"type\":\"lexicographic\"}";
String jsonSpec = jsonMapper.writeValueAsString(StringComparators.LEXICOGRAPHIC);
String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.LEXICOGRAPHIC);
Assert.assertEquals(expectJsonSpec, jsonSpec);
Assert.assertEquals(StringComparators.LEXICOGRAPHIC, jsonMapper.readValue(expectJsonSpec, StringComparator.class));
Assert.assertEquals(StringComparators.LEXICOGRAPHIC, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class));
String makeFromJsonSpec = "\"lexicographic\"";
Assert.assertEquals(
StringComparators.LEXICOGRAPHIC,
jsonMapper.readValue(makeFromJsonSpec, StringComparator.class)
JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class)
);
}
@Test
public void testAlphanumericComparatorSerdeTest() throws IOException
{
ObjectMapper jsonMapper = new DefaultObjectMapper();
String expectJsonSpec = "{\"type\":\"alphanumeric\"}";
String jsonSpec = jsonMapper.writeValueAsString(StringComparators.ALPHANUMERIC);
String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.ALPHANUMERIC);
Assert.assertEquals(expectJsonSpec, jsonSpec);
Assert.assertEquals(StringComparators.ALPHANUMERIC, jsonMapper.readValue(expectJsonSpec, StringComparator.class));
Assert.assertEquals(StringComparators.ALPHANUMERIC, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class));
String makeFromJsonSpec = "\"alphanumeric\"";
Assert.assertEquals(StringComparators.ALPHANUMERIC, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class));
Assert.assertEquals(StringComparators.ALPHANUMERIC, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class));
}
@Test
public void testStrlenComparatorSerdeTest() throws IOException
{
ObjectMapper jsonMapper = new DefaultObjectMapper();
String expectJsonSpec = "{\"type\":\"strlen\"}";
String jsonSpec = jsonMapper.writeValueAsString(StringComparators.STRLEN);
String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.STRLEN);
Assert.assertEquals(expectJsonSpec, jsonSpec);
Assert.assertEquals(StringComparators.STRLEN, jsonMapper.readValue(expectJsonSpec, StringComparator.class));
Assert.assertEquals(StringComparators.STRLEN, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class));
String makeFromJsonSpec = "\"strlen\"";
Assert.assertEquals(StringComparators.STRLEN, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class));
Assert.assertEquals(StringComparators.STRLEN, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class));
}
@Test
public void testNumericComparatorSerdeTest() throws IOException
{
ObjectMapper jsonMapper = new DefaultObjectMapper();
String expectJsonSpec = "{\"type\":\"numeric\"}";
String jsonSpec = jsonMapper.writeValueAsString(StringComparators.NUMERIC);
String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.NUMERIC);
Assert.assertEquals(expectJsonSpec, jsonSpec);
Assert.assertEquals(StringComparators.NUMERIC, jsonMapper.readValue(expectJsonSpec, StringComparator.class));
Assert.assertEquals(StringComparators.NUMERIC, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class));
String makeFromJsonSpec = "\"numeric\"";
Assert.assertEquals(StringComparators.NUMERIC, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class));
Assert.assertEquals(StringComparators.NUMERIC, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class));
makeFromJsonSpec = "\"NuMeRiC\"";
Assert.assertEquals(StringComparators.NUMERIC, jsonMapper.readValue(makeFromJsonSpec, StringComparator.class));
Assert.assertEquals(StringComparators.NUMERIC, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class));
}
@Test
public void testNaturalComparatorSerdeTest() throws IOException
{
String expectJsonSpec = "{\"type\":\"natural\"}";
String jsonSpec = JSON_MAPPER.writeValueAsString(StringComparators.NATURAL);
Assert.assertEquals(expectJsonSpec, jsonSpec);
Assert.assertEquals(StringComparators.NATURAL, JSON_MAPPER.readValue(expectJsonSpec, StringComparator.class));
String makeFromJsonSpec = "\"natural\"";
Assert.assertEquals(StringComparators.NATURAL, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class));
makeFromJsonSpec = "\"NaTuRaL\"";
Assert.assertEquals(StringComparators.NATURAL, JSON_MAPPER.readValue(makeFromJsonSpec, StringComparator.class));
}
}

View File

@ -198,7 +198,7 @@ public class Bounds
public static BoundDimFilter interval(final BoundRefKey boundRefKey, final Interval interval)
{
if (!boundRefKey.getComparator().equals(StringComparators.NUMERIC)) {
// Interval comparison only works with NUMERIC comparator.
// Interval comparison only works with NUMERIC comparator
throw new ISE("Comparator must be NUMERIC but was[%s]", boundRefKey.getComparator());
}

View File

@ -40,7 +40,6 @@ import org.apache.calcite.util.TimeString;
import org.apache.calcite.util.TimestampString;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.math.expr.ExpressionProcessingConfig;
@ -208,12 +207,18 @@ public class Calcites
SqlTypeName.INT_TYPES.contains(sqlTypeName);
}
/**
* Returns the natural StringComparator associated with the RelDataType
*/
public static StringComparator getStringComparatorForRelDataType(RelDataType dataType)
{
final ColumnType valueType = getColumnTypeForRelDataType(dataType);
return getStringComparatorForValueType(valueType);
}
/**
* Returns the natural StringComparator associated with the given ColumnType
*/
public static StringComparator getStringComparatorForValueType(ColumnType valueType)
{
if (valueType.isNumeric()) {
@ -221,7 +226,7 @@ public class Calcites
} else if (valueType.is(ValueType.STRING)) {
return StringComparators.LEXICOGRAPHIC;
} else {
throw new ISE("Unrecognized valueType[%s]", valueType);
return StringComparators.NATURAL;
}
}

View File

@ -20,6 +20,8 @@
package org.apache.druid.sql.calcite.planner;
import com.google.common.collect.ImmutableSortedSet;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.sql.calcite.util.CalciteTestBase;
import org.junit.Assert;
import org.junit.Test;
@ -52,4 +54,18 @@ public class CalcitesTest extends CalciteTestBase
Assert.assertEquals("x", Calcites.findUnusedPrefixForDigits("x", ImmutableSortedSet.of("foo", "xa", "_x")));
Assert.assertEquals("__x", Calcites.findUnusedPrefixForDigits("x", ImmutableSortedSet.of("foo", "x1a", "_x90")));
}
@Test
public void testGetStringComparatorForColumnType()
{
Assert.assertEquals(StringComparators.LEXICOGRAPHIC, Calcites.getStringComparatorForValueType(ColumnType.STRING));
Assert.assertEquals(StringComparators.NUMERIC, Calcites.getStringComparatorForValueType(ColumnType.LONG));
Assert.assertEquals(StringComparators.NUMERIC, Calcites.getStringComparatorForValueType(ColumnType.FLOAT));
Assert.assertEquals(StringComparators.NUMERIC, Calcites.getStringComparatorForValueType(ColumnType.DOUBLE));
Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.STRING_ARRAY));
Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.LONG_ARRAY));
Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.DOUBLE_ARRAY));
Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.NESTED_DATA));
Assert.assertEquals(StringComparators.NATURAL, Calcites.getStringComparatorForValueType(ColumnType.UNKNOWN_COMPLEX));
}
}