From 5249155284fcdcd3cbcb4115ee267f23bc43c2e6 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 10 Apr 2020 18:11:05 -0700 Subject: [PATCH] Fix off-by-one in IndexedTableJoinMatcher.getCardinality. (#9674) * Fix off-by-one in IndexedTableJoinMatcher.getCardinality. It would report a cardinality that is one lower than the actual cardinality. The missing value is the phantom null that can be generated by outer joins. * Fix tests. --- .../table/IndexedTableDimensionSelector.java | 19 +++++++++++++++-- .../join/table/IndexedTableJoinable.java | 2 +- .../HashJoinSegmentStorageAdapterTest.java | 2 +- .../join/table/IndexedTableJoinableTest.java | 21 +++++++++++++++++++ .../join/InlineJoinableFactoryTest.java | 4 ++-- 5 files changed, 42 insertions(+), 6 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableDimensionSelector.java b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableDimensionSelector.java index 18936a3e8e1..544385884fd 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableDimensionSelector.java +++ b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableDimensionSelector.java @@ -86,8 +86,7 @@ public class IndexedTableDimensionSelector implements DimensionSelector @Override public int getValueCardinality() { - // +1 for nulls. - return table.numRows() + 1; + return computeDimensionSelectorCardinality(table); } @Nullable @@ -141,4 +140,20 @@ public class IndexedTableDimensionSelector implements DimensionSelector inspector.visit("table", table); inspector.visit("extractionFn", extractionFn); } + + /** + * Returns the value that {@link #getValueCardinality()} would return for a particular {@link IndexedTable}. + * + * The value will be one higher than {@link IndexedTable#numRows()}, to account for the possibility of phantom nulls. + * + * @throws IllegalArgumentException if the table's row count is {@link Integer#MAX_VALUE} + */ + static int computeDimensionSelectorCardinality(final IndexedTable table) + { + if (table.numRows() == Integer.MAX_VALUE) { + throw new IllegalArgumentException("Table is too large"); + } + + return table.numRows() + 1; + } } diff --git a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinable.java b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinable.java index bb25890b720..38cf5f89574 100644 --- a/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinable.java +++ b/processing/src/main/java/org/apache/druid/segment/join/table/IndexedTableJoinable.java @@ -51,7 +51,7 @@ public class IndexedTableJoinable implements Joinable public int getCardinality(String columnName) { if (table.rowSignature().contains(columnName)) { - return table.numRows(); + return IndexedTableDimensionSelector.computeDimensionSelectorCardinality(table); } else { // NullDimensionSelector has cardinality = 1 (one null, nothing else). return 1; diff --git a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java index c8460fe3410..b2eaa148415 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapterTest.java @@ -103,7 +103,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag public void test_getDimensionCardinality_factToCountryJoinColumn() { Assert.assertEquals( - 18, + 19, makeFactToCountrySegment().getDimensionCardinality(FACT_TO_COUNTRY_ON_ISO_CODE_PREFIX + "countryName") ); } diff --git a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java index 28037b0fe75..e8e3be4e5d0 100644 --- a/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java +++ b/processing/src/test/java/org/apache/druid/segment/join/table/IndexedTableJoinableTest.java @@ -89,6 +89,27 @@ public class IndexedTableJoinableTest Assert.assertEquals(ImmutableList.of("str", "long"), joinable.getAvailableColumns()); } + @Test + public void test_getCardinality_string() + { + final IndexedTableJoinable joinable = new IndexedTableJoinable(indexedTable); + Assert.assertEquals(indexedTable.numRows() + 1, joinable.getCardinality("str")); + } + + @Test + public void test_getCardinality_long() + { + final IndexedTableJoinable joinable = new IndexedTableJoinable(indexedTable); + Assert.assertEquals(indexedTable.numRows() + 1, joinable.getCardinality("long")); + } + + @Test + public void test_getCardinality_nonexistent() + { + final IndexedTableJoinable joinable = new IndexedTableJoinable(indexedTable); + Assert.assertEquals(1, joinable.getCardinality("nonexistent")); + } + @Test public void test_getColumnCapabilities_string() { diff --git a/server/src/test/java/org/apache/druid/segment/join/InlineJoinableFactoryTest.java b/server/src/test/java/org/apache/druid/segment/join/InlineJoinableFactoryTest.java index f3d5eed0e7f..d1be6984043 100644 --- a/server/src/test/java/org/apache/druid/segment/join/InlineJoinableFactoryTest.java +++ b/server/src/test/java/org/apache/druid/segment/join/InlineJoinableFactoryTest.java @@ -76,8 +76,8 @@ public class InlineJoinableFactoryTest Assert.assertThat(joinable, CoreMatchers.instanceOf(IndexedTableJoinable.class)); Assert.assertEquals(ImmutableList.of("str", "long"), joinable.getAvailableColumns()); - Assert.assertEquals(2, joinable.getCardinality("str")); - Assert.assertEquals(2, joinable.getCardinality("long")); + Assert.assertEquals(3, joinable.getCardinality("str")); + Assert.assertEquals(3, joinable.getCardinality("long")); } private static JoinConditionAnalysis makeCondition(final String condition)