sql auto limit wrapping fix (#9043)

* sql auto limit wrapping fix

* fix tests and style

* remove setImportance
This commit is contained in:
Clint Wylie 2019-12-16 01:38:24 -08:00 committed by GitHub
parent 6881535b48
commit bc16ff5e7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 268 additions and 6 deletions

View File

@ -258,24 +258,29 @@ public class DruidPlanner implements Closeable
if (root.rel instanceof Sort) {
Sort innerSort = (Sort) root.rel;
final int offset = Calcites.getOffset(innerSort);
final int innerLimit = Calcites.getFetch(innerSort);
final int fetch = Calcites.collapseFetch(
Calcites.getFetch(innerSort),
innerLimit,
Ints.checkedCast(outerLimit),
0
);
if (fetch == innerLimit) {
// nothing to do, don't bother to make a new sort
return root.rel;
}
return LogicalSort.create(
innerSort.getInput(),
innerSort.collation,
makeBigIntLiteral(offset),
offset > 0 ? makeBigIntLiteral(offset) : null,
makeBigIntLiteral(fetch)
);
}
return LogicalSort.create(
root.rel,
root.collation,
makeBigIntLiteral(0),
null,
makeBigIntLiteral(outerLimit)
);
}

View File

@ -402,6 +402,13 @@ public class DruidAvaticaHandlerTest extends CalciteTestBase
Pair.of("TABLE_NAME", CalciteTests.DATASOURCE4),
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
Pair.of("TABLE_NAME", CalciteTests.DATASOURCE5),
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
@ -448,6 +455,12 @@ public class DruidAvaticaHandlerTest extends CalciteTestBase
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
Pair.of("TABLE_NAME", CalciteTests.DATASOURCE5),
Pair.of("TABLE_SCHEM", "druid"),
Pair.of("TABLE_TYPE", "TABLE")
),
row(
Pair.of("TABLE_CAT", "druid"),
Pair.of("TABLE_NAME", CalciteTests.DATASOURCE3),

View File

@ -101,6 +101,8 @@ import java.util.Map;
public class CalciteQueryTest extends BaseCalciteQueryTest
{
private final boolean useDefault = NullHandling.replaceWithDefault();
@Test
public void testSelectConstantExpression() throws Exception
{
@ -329,6 +331,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
.add(new Object[]{"druid", CalciteTests.DATASOURCE1, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE2, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE4, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE5, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE3, "TABLE"})
.add(new Object[]{"druid", "aview", "VIEW"})
.add(new Object[]{"druid", "bview", "VIEW"})
@ -355,6 +358,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
.add(new Object[]{"druid", CalciteTests.DATASOURCE2, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE4, "TABLE"})
.add(new Object[]{"druid", CalciteTests.FORBIDDEN_DATASOURCE, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE5, "TABLE"})
.add(new Object[]{"druid", CalciteTests.DATASOURCE3, "TABLE"})
.add(new Object[]{"druid", "aview", "VIEW"})
.add(new Object[]{"druid", "bview", "VIEW"})
@ -812,6 +816,156 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
);
}
@Test
public void testSelectLimitWrappingAgainAkaIDontReallyQuiteUnderstandCalciteQueryPlanning() throws Exception
{
// this test is for a specific bug encountered where the 2nd query would not plan with auto limit wrapping, but if
// *any* column was removed from the select output, e.g. the first query in this test, then it does plan and
// function correctly. Running the query supplying an explicit limit worked, and turning off auto limit worked.
// The only difference between an explicit limit and auto limit was that the LogicalSort of the auto limit had an
// offset of 0 instead of null, so the resolution was to modify the planner to only set offset on the sort if the
// offset was non-zero. However, why the first query succeeded before this planner change and the latter did not is
// still a mystery...
testQuery(
"SELECT \"__time\", \"count\", \"dimHyperUnique\", \"dimMultivalEnumerated\", \"dimMultivalEnumerated2\","
+ " \"dimMultivalSequentialWithNulls\", \"dimSequential\", \"dimSequentialHalfNull\", \"dimUniform\","
+ " \"dimZipf\", \"metFloatNormal\", \"metFloatZipf\", \"metLongSequential\""
+ " FROM druid.lotsocolumns"
+ " WHERE __time >= CURRENT_TIMESTAMP - INTERVAL '10' YEAR",
OUTER_LIMIT_CONTEXT,
ImmutableList.of(
newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE5)
.intervals(querySegmentSpec(Intervals.of("1990-01-01T00:00:00.000Z/146140482-04-24T15:36:27.903Z")))
.columns(
ImmutableList.<String>builder()
.add("__time")
.add("count")
.add("dimHyperUnique")
.add("dimMultivalEnumerated")
.add("dimMultivalEnumerated2")
.add("dimMultivalSequentialWithNulls")
.add("dimSequential")
.add("dimSequentialHalfNull")
.add("dimUniform")
.add("dimZipf")
.add("metFloatNormal")
.add("metFloatZipf")
.add("metLongSequential")
.build()
)
.limit(2)
.order(ScanQuery.Order.DESCENDING)
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.context(OUTER_LIMIT_CONTEXT)
.build()
),
ImmutableList.of(
new Object[]{
1576306800000L,
1L,
"0",
"[\"Baz\",\"Baz\",\"Hello\",\"World\"]",
useDefault ? "[\"\",\"Apple\",\"Orange\"]" : "[null,\"Apple\",\"Orange\"]",
"[\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\"]",
"0",
"0",
"74416",
"27",
"5000.0",
"147.0",
"0"
},
new Object[]{
1576306800000L,
1L,
"8",
"[\"Baz\",\"World\",\"World\",\"World\"]",
useDefault ? "[\"\",\"Corundum\",\"Xylophone\"]" : "[null,\"Corundum\",\"Xylophone\"]",
useDefault ? "" : null,
"8",
useDefault ? "" : null,
"50515",
"9",
"4999.0",
"25.0",
"8"
}
)
);
testQuery(
"SELECT \"__time\", \"count\", \"dimHyperUnique\", \"dimMultivalEnumerated\", \"dimMultivalEnumerated2\","
+ " \"dimMultivalSequentialWithNulls\", \"dimSequential\", \"dimSequentialHalfNull\", \"dimUniform\","
+ " \"dimZipf\", \"metFloatNormal\", \"metFloatZipf\", \"metLongSequential\", \"metLongUniform\""
+ " FROM druid.lotsocolumns"
+ " WHERE __time >= CURRENT_TIMESTAMP - INTERVAL '10' YEAR",
OUTER_LIMIT_CONTEXT,
ImmutableList.of(
newScanQueryBuilder()
.dataSource(CalciteTests.DATASOURCE5)
.intervals(querySegmentSpec(Intervals.of("1990-01-01T00:00:00.000Z/146140482-04-24T15:36:27.903Z")))
.columns(
ImmutableList.<String>builder()
.add("__time")
.add("count")
.add("dimHyperUnique")
.add("dimMultivalEnumerated")
.add("dimMultivalEnumerated2")
.add("dimMultivalSequentialWithNulls")
.add("dimSequential")
.add("dimSequentialHalfNull")
.add("dimUniform")
.add("dimZipf")
.add("metFloatNormal")
.add("metFloatZipf")
.add("metLongSequential")
.add("metLongUniform")
.build()
)
.limit(2)
.order(ScanQuery.Order.DESCENDING)
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
.context(OUTER_LIMIT_CONTEXT)
.build()
),
ImmutableList.of(
new Object[]{
1576306800000L,
1L,
"0",
"[\"Baz\",\"Baz\",\"Hello\",\"World\"]",
useDefault ? "[\"\",\"Apple\",\"Orange\"]" : "[null,\"Apple\",\"Orange\"]",
"[\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\"]",
"0",
"0",
"74416",
"27",
"5000.0",
"147.0",
"0",
"372"
},
new Object[]{
1576306800000L,
1L,
"8",
"[\"Baz\",\"World\",\"World\",\"World\"]",
useDefault ? "[\"\",\"Corundum\",\"Xylophone\"]" : "[null,\"Corundum\",\"Xylophone\"]",
useDefault ? "" : null,
"8",
useDefault ? "" : null,
"50515",
"9",
"4999.0",
"25.0",
"8",
"252"
}
)
);
}
@Test
public void testTopNLimitWrapping() throws Exception
{
@ -2240,7 +2394,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
}
@Test
public void testUnplannableQueries() throws Exception
public void testUnplannableQueries()
{
// All of these queries are unplannable because they rely on features Druid doesn't support.
// This test is here to confirm that we don't fall back to Calcite's interpreter or enumerable implementation.
@ -3616,7 +3770,7 @@ public class CalciteQueryTest extends BaseCalciteQueryTest
}
@Test
public void testCountStarWithTimeFilterUsingStringLiteralsInvalid() throws Exception
public void testCountStarWithTimeFilterUsingStringLiteralsInvalid()
{
// Strings are implicitly cast to timestamps. Test an invalid string.
// This error message isn't ideal but it is at least better than silently ignoring the problem.

View File

@ -132,6 +132,7 @@ import java.io.File;
import java.lang.reflect.InvocationTargetException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@ -147,6 +148,7 @@ public class CalciteTests
public static final String DATASOURCE2 = "foo2";
public static final String DATASOURCE3 = "numfoo";
public static final String DATASOURCE4 = "foo4";
public static final String DATASOURCE5 = "lotsocolumns";
public static final String FORBIDDEN_DATASOURCE = "forbiddenDatasource";
public static final String TEST_SUPERUSER_NAME = "testSuperuser";
@ -259,6 +261,31 @@ public class CalciteTests
)
);
private static final InputRowParser<Map<String, Object>> PARSER_LOTS_OF_COLUMNS = new MapInputRowParser(
new TimeAndDimsParseSpec(
new TimestampSpec("timestamp", "millis", null),
new DimensionsSpec(
DimensionsSpec.getDefaultSchemas(
ImmutableList.<String>builder().add("dimHyperUnique")
.add("dimMultivalEnumerated")
.add("dimMultivalEnumerated2")
.add("dimMultivalSequentialWithNulls")
.add("dimSequential")
.add("dimSequentialHalfNull")
.add("dimUniform")
.add("dimZipf")
.add("metFloatNormal")
.add("metFloatZipf")
.add("metLongSequential")
.add("metLongUniform")
.build()
),
null,
null
)
)
);
private static final IncrementalIndexSchema INDEX_SCHEMA = new IncrementalIndexSchema.Builder()
.withMetrics(
new CountAggregatorFactory("cnt"),
@ -280,6 +307,14 @@ public class CalciteTests
.withRollup(false)
.build();
private static final IncrementalIndexSchema INDEX_SCHEMA_LOTS_O_COLUMNS = new IncrementalIndexSchema.Builder()
.withMetrics(
new CountAggregatorFactory("count")
)
.withDimensionsSpec(PARSER_LOTS_OF_COLUMNS)
.withRollup(false)
.build();
public static final List<InputRow> ROWS1 = ImmutableList.of(
createRow(
ImmutableMap.<String, Object>builder()
@ -451,6 +486,44 @@ public class CalciteTests
createRow("2000-01-01", "forbidden", "abcd", 9999.0)
);
// Hi, I'm Troy McClure. You may remember these rows from such benchmarks generator schemas as basic and expression
public static final List<InputRow> ROWS_LOTS_OF_COLUMNS = ImmutableList.of(
createRow(
ImmutableMap.<String, Object>builder()
.put("timestamp", 1576306800000L)
.put("metFloatZipf", 147.0)
.put("dimMultivalSequentialWithNulls", Arrays.asList("1", "2", "3", "4", "5", "6", "7", "8"))
.put("dimMultivalEnumerated2", Arrays.asList(null, "Orange", "Apple"))
.put("metLongUniform", 372)
.put("metFloatNormal", 5000.0)
.put("dimZipf", "27")
.put("dimUniform", "74416")
.put("dimMultivalEnumerated", Arrays.asList("Baz", "World", "Hello", "Baz"))
.put("metLongSequential", 0)
.put("dimHyperUnique", "0")
.put("dimSequential", "0")
.put("dimSequentialHalfNull", "0")
.build(),
PARSER_LOTS_OF_COLUMNS
),
createRow(
ImmutableMap.<String, Object>builder()
.put("timestamp", 1576306800000L)
.put("metFloatZipf", 25.0)
.put("dimMultivalEnumerated2", Arrays.asList("Xylophone", null, "Corundum"))
.put("metLongUniform", 252)
.put("metFloatNormal", 4999.0)
.put("dimZipf", "9")
.put("dimUniform", "50515")
.put("dimMultivalEnumerated", Arrays.asList("Baz", "World", "World", "World"))
.put("metLongSequential", 8)
.put("dimHyperUnique", "8")
.put("dimSequential", "8")
.build(),
PARSER_LOTS_OF_COLUMNS
)
);
private CalciteTests()
{
// No instantiation.
@ -641,6 +714,14 @@ public class CalciteTests
.rows(ROWS1_WITH_FULL_TIMESTAMP)
.buildMMappedIndex();
final QueryableIndex indexLotsOfColumns = IndexBuilder
.create()
.tmpDir(new File(tmpDir, "5"))
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(INDEX_SCHEMA_LOTS_O_COLUMNS)
.rows(ROWS_LOTS_OF_COLUMNS)
.buildMMappedIndex();
return new SpecificSegmentsQuerySegmentWalker(conglomerate).add(
DataSegment.builder()
@ -687,6 +768,15 @@ public class CalciteTests
.size(0)
.build(),
index4
).add(
DataSegment.builder()
.dataSource(DATASOURCE5)
.interval(indexLotsOfColumns.getDataInterval())
.version("1")
.shardSpec(new LinearShardSpec(0))
.size(0)
.build(),
indexLotsOfColumns
);
}