Fix ColumnSelectorColumnIndexSelector#getColumnCapabilities. (#15614)

* Fix ColumnSelectorColumnIndexSelector#getColumnCapabilities.

It was using virtualColumns.getColumnCapabilities, which only returns
capabilities for virtual columns, not regular columns. The effect of this
is that expression filters (and in some cases, arrayContainsElement filters)
would build value matchers rather than use indexes.

I think this has been like this since #12315, which added the
getColumnCapabilities method to BitmapIndexSelector, and included the same
implementation as exists in the code today.

This error is easy to make due to the design of virtualColumns.getColumnCapabilities,
so to help avoid it in the future, this patch renames the method to
getColumnCapabilitiesWithoutFallback to emphasize that it does not return
capabilities for regular columns.

* Make getColumnCapabilitiesWithoutFallback package-private.

* Fix expression filter bitmap usage.
This commit is contained in:
Gian Merlino 2024-01-02 21:09:18 -08:00 committed by GitHub
parent cfdea06857
commit b0e52c99bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 103 additions and 60 deletions

View File

@ -246,18 +246,15 @@ public class LazilyDecoratedRowsAndColumns implements RowsAndColumns
final RowSignature.Builder sigBob = RowSignature.builder();
for (String col : cols) {
ColumnCapabilities capabilities;
capabilities = columnSelectorFactory.getColumnCapabilities(col);
final ColumnCapabilities capabilities;
if (virtualColumns != null) {
capabilities = virtualColumns.getColumnCapabilitiesWithFallback(columnSelectorFactory, col);
} else {
capabilities = columnSelectorFactory.getColumnCapabilities(col);
}
if (capabilities != null) {
sigBob.add(col, capabilities.toColumnType());
continue;
}
if (virtualColumns != null) {
capabilities = virtualColumns.getColumnCapabilities(columnSelectorFactory, col);
if (capabilities != null) {
sigBob.add(col, capabilities.toColumnType());
continue;
}
}
}
final RowSignature signature = sigBob.build();

View File

@ -99,6 +99,6 @@ public class ColumnSelectorColumnIndexSelector implements ColumnIndexSelector
@Override
public ColumnCapabilities getColumnCapabilities(String column)
{
return virtualColumns.getColumnCapabilities(columnSelector, column);
return virtualColumns.getColumnCapabilitiesWithFallback(columnSelector, column);
}
}

View File

@ -178,10 +178,6 @@ public class QueryableIndexColumnSelectorFactory implements ColumnSelectorFactor
@Nullable
public ColumnCapabilities getColumnCapabilities(String columnName)
{
if (virtualColumns.exists(columnName)) {
return virtualColumns.getColumnCapabilities(columnCache, columnName);
}
return columnCache.getColumnCapabilities(columnName);
return virtualColumns.getColumnCapabilitiesWithFallback(columnCache, columnName);
}
}

View File

@ -394,8 +394,12 @@ public class VirtualColumns implements Cacheable
return virtualColumn.makeVectorObjectSelector(columnName, columnSelector, offset);
}
/**
* Get capabilities for the virtual column "columnName". If columnName is not a virtual column, returns null.
* Package-private since production callers want {@link #getColumnCapabilitiesWithFallback(ColumnInspector, String)}.
*/
@Nullable
public ColumnCapabilities getColumnCapabilities(ColumnInspector inspector, String columnName)
ColumnCapabilities getColumnCapabilitiesWithoutFallback(ColumnInspector inspector, String columnName)
{
final VirtualColumn virtualColumn = getVirtualColumn(columnName);
if (virtualColumn != null) {
@ -405,10 +409,14 @@ public class VirtualColumns implements Cacheable
}
}
/**
* Get capabilities for the column "columnName". If columnName is not a virtual column, delegates to the
* provided {@link ColumnInspector}.
*/
@Nullable
public ColumnCapabilities getColumnCapabilitiesWithFallback(ColumnInspector inspector, String columnName)
{
final ColumnCapabilities virtualColumnCapabilities = getColumnCapabilities(inspector, columnName);
final ColumnCapabilities virtualColumnCapabilities = getColumnCapabilitiesWithoutFallback(inspector, columnName);
if (virtualColumnCapabilities != null) {
return virtualColumnCapabilities;
} else {

View File

@ -344,6 +344,7 @@ public class ExpressionFilter implements Filter
*/
private DruidPredicateFactory getBitmapPredicateFactory(@Nullable ColumnCapabilities inputCapabilites)
{
final boolean isNullUnknown = expr.get().eval(InputBindings.nilBindings()).value() == null;
return new DruidPredicateFactory()
{
@Override
@ -431,6 +432,12 @@ public class ExpressionFilter implements Filter
).asBoolean();
}
@Override
public boolean isNullInputUnknown()
{
return isNullUnknown;
}
// The hashcode and equals are to make SubclassesMustOverrideEqualsAndHashCodeTest stop complaining..
// DruidPredicateFactory currently doesn't really need equals or hashcode since 'toString' method that is actually
// called when testing equality of DimensionPredicateFilter, so it's the truly required method, but that seems

View File

@ -127,12 +127,8 @@ class IncrementalIndexColumnSelectorFactory implements ColumnSelectorFactory, Ro
@Nullable
public ColumnCapabilities getColumnCapabilities(String columnName)
{
if (virtualColumns.exists(columnName)) {
return virtualColumns.getColumnCapabilities(adapter, columnName);
}
// Use adapter.getColumnCapabilities instead of index.getCapabilities (see note in IncrementalIndexStorageAdapater)
return adapter.getColumnCapabilities(columnName);
return virtualColumns.getColumnCapabilitiesWithFallback(adapter, columnName);
}
@Nullable

View File

@ -251,9 +251,6 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe
@Override
public ColumnCapabilities getColumnCapabilities(final String columnName)
{
if (virtualColumns.exists(columnName)) {
return virtualColumns.getColumnCapabilities(columnCache, columnName);
}
return columnCache.getColumnCapabilities(columnName);
return virtualColumns.getColumnCapabilitiesWithFallback(columnCache, columnName);
}
}

View File

@ -22,11 +22,13 @@ package org.apache.druid.segment;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.ImmutableBitmap;
import org.apache.druid.query.DefaultBitmapResultFactory;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.StringUtf8DictionaryEncodedColumn;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.index.BitmapColumnIndex;
import org.apache.druid.segment.index.semantic.DictionaryEncodedStringValueIndex;
import org.apache.druid.segment.index.semantic.StringValueSetIndexes;
@ -40,6 +42,7 @@ public class ColumnSelectorColumnIndexSelectorTest
{
private static final String STRING_DICTIONARY_COLUMN_NAME = "string";
private static final String NON_STRING_DICTIONARY_COLUMN_NAME = "not-string";
private static final String NONEXISTENT_COLUMN_NAME = "nonexistent";
BitmapFactory bitmapFactory;
VirtualColumns virtualColumns;
@ -52,13 +55,26 @@ public class ColumnSelectorColumnIndexSelectorTest
public void setup()
{
bitmapFactory = EasyMock.createMock(BitmapFactory.class);
virtualColumns = EasyMock.createMock(VirtualColumns.class);
virtualColumns = VirtualColumns.EMPTY;
index = EasyMock.createMock(ColumnSelector.class);
indexSelector = new ColumnSelectorColumnIndexSelector(bitmapFactory, virtualColumns, index);
indexSupplier = EasyMock.createMock(ColumnIndexSupplier.class);
EasyMock.expect(virtualColumns.getVirtualColumn(STRING_DICTIONARY_COLUMN_NAME)).andReturn(null).anyTimes();
EasyMock.expect(virtualColumns.getVirtualColumn(NON_STRING_DICTIONARY_COLUMN_NAME)).andReturn(null).anyTimes();
EasyMock.expect(index.getColumnCapabilities(STRING_DICTIONARY_COLUMN_NAME))
.andReturn(new ColumnCapabilitiesImpl().setType(ColumnType.STRING)
.setHasMultipleValues(false)
.setDictionaryEncoded(true))
.anyTimes();
EasyMock.expect(index.getColumnCapabilities(NON_STRING_DICTIONARY_COLUMN_NAME))
.andReturn(new ColumnCapabilitiesImpl().setType(ColumnType.STRING)
.setHasMultipleValues(false)
.setDictionaryEncoded(false))
.anyTimes();
EasyMock.expect(index.getColumnCapabilities(NONEXISTENT_COLUMN_NAME))
.andReturn(null)
.anyTimes();
ColumnHolder holder = EasyMock.createMock(ColumnHolder.class);
EasyMock.expect(index.getColumnHolder(STRING_DICTIONARY_COLUMN_NAME)).andReturn(holder).anyTimes();
@ -82,7 +98,9 @@ public class ColumnSelectorColumnIndexSelectorTest
EasyMock.expect(valueIndex.getBitmap(0)).andReturn(someBitmap).anyTimes();
EasyMock.expect(someIndex.forValue("foo")).andReturn(columnIndex).anyTimes();
EasyMock.expect(columnIndex.computeBitmapResult(EasyMock.anyObject(), EasyMock.eq(false))).andReturn(someBitmap).anyTimes();
EasyMock.expect(columnIndex.computeBitmapResult(EasyMock.anyObject(), EasyMock.eq(false)))
.andReturn(someBitmap)
.anyTimes();
ColumnHolder nonStringHolder = EasyMock.createMock(ColumnHolder.class);
EasyMock.expect(index.getColumnHolder(NON_STRING_DICTIONARY_COLUMN_NAME)).andReturn(nonStringHolder).anyTimes();
@ -96,7 +114,18 @@ public class ColumnSelectorColumnIndexSelectorTest
.setHasBitmapIndexes(true)
).anyTimes();
EasyMock.replay(bitmapFactory, virtualColumns, index, indexSupplier, holder, stringColumn, nonStringHolder, someIndex, columnIndex, valueIndex, someBitmap);
EasyMock.replay(
bitmapFactory,
index,
indexSupplier,
holder,
stringColumn,
nonStringHolder,
someIndex,
columnIndex,
valueIndex,
someBitmap
);
}
@Test
@ -116,7 +145,7 @@ public class ColumnSelectorColumnIndexSelectorTest
false
);
Assert.assertNotNull(valueBitmap);
EasyMock.verify(bitmapFactory, virtualColumns, index, indexSupplier);
EasyMock.verify(bitmapFactory, index, indexSupplier);
}
@Test
@ -130,6 +159,31 @@ public class ColumnSelectorColumnIndexSelectorTest
StringValueSetIndexes valueIndex = supplier.as(StringValueSetIndexes.class);
Assert.assertNull(valueIndex);
EasyMock.verify(bitmapFactory, virtualColumns, index, indexSupplier);
EasyMock.verify(bitmapFactory, index, indexSupplier);
}
@Test
public void testStringDictionaryGetColumnCapabilities()
{
final ColumnCapabilities capabilities = indexSelector.getColumnCapabilities(STRING_DICTIONARY_COLUMN_NAME);
Assert.assertEquals(ValueType.STRING, capabilities.getType());
Assert.assertEquals(ColumnCapabilities.Capable.FALSE, capabilities.hasMultipleValues());
Assert.assertEquals(ColumnCapabilities.Capable.TRUE, capabilities.isDictionaryEncoded());
}
@Test
public void testNonStringDictionaryGetColumnCapabilities()
{
final ColumnCapabilities capabilities = indexSelector.getColumnCapabilities(NON_STRING_DICTIONARY_COLUMN_NAME);
Assert.assertEquals(ValueType.STRING, capabilities.getType());
Assert.assertEquals(ColumnCapabilities.Capable.FALSE, capabilities.hasMultipleValues());
Assert.assertEquals(ColumnCapabilities.Capable.FALSE, capabilities.isDictionaryEncoded());
}
@Test
public void testNonexistentColumnGetColumnCapabilities()
{
final ColumnCapabilities capabilities = indexSelector.getColumnCapabilities(NONEXISTENT_COLUMN_NAME);
Assert.assertNull(capabilities);
}
}

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.apache.druid.segment.virtual;
package org.apache.druid.segment;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
@ -31,26 +31,14 @@ import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.BaseFloatColumnValueSelector;
import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionDictionarySelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.DimensionSelectorUtils;
import org.apache.druid.segment.IdLookup;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.TestLongColumnSelector;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.ZeroIndexedInts;
import org.apache.druid.segment.virtual.ExpressionVirtualColumn;
import org.apache.druid.segment.virtual.NestedFieldVirtualColumn;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
import org.junit.Rule;
@ -119,9 +107,9 @@ public class VirtualColumnsTest extends InitializedNullHandlingTest
{
final VirtualColumns virtualColumns = makeVirtualColumns();
final ColumnInspector baseInspector = column -> null;
Assert.assertEquals(ValueType.FLOAT, virtualColumns.getColumnCapabilities(baseInspector, "expr").getType());
Assert.assertEquals(ValueType.LONG, virtualColumns.getColumnCapabilities(baseInspector, "expr2").getType());
Assert.assertNull(virtualColumns.getColumnCapabilities(baseInspector, REAL_COLUMN_NAME));
Assert.assertEquals(ValueType.FLOAT, virtualColumns.getColumnCapabilitiesWithoutFallback(baseInspector, "expr").getType());
Assert.assertEquals(ValueType.LONG, virtualColumns.getColumnCapabilitiesWithoutFallback(baseInspector, "expr2").getType());
Assert.assertNull(virtualColumns.getColumnCapabilitiesWithoutFallback(baseInspector, REAL_COLUMN_NAME));
}
@Test
@ -135,9 +123,9 @@ public class VirtualColumnsTest extends InitializedNullHandlingTest
return null;
}
};
Assert.assertEquals(ValueType.FLOAT, virtualColumns.getColumnCapabilities(baseInspector, "expr").getType());
Assert.assertEquals(ValueType.DOUBLE, virtualColumns.getColumnCapabilities(baseInspector, "expr2").getType());
Assert.assertNull(virtualColumns.getColumnCapabilities(baseInspector, REAL_COLUMN_NAME));
Assert.assertEquals(ValueType.FLOAT, virtualColumns.getColumnCapabilitiesWithoutFallback(baseInspector, "expr").getType());
Assert.assertEquals(ValueType.DOUBLE, virtualColumns.getColumnCapabilitiesWithoutFallback(baseInspector, "expr2").getType());
Assert.assertNull(virtualColumns.getColumnCapabilitiesWithoutFallback(baseInspector, REAL_COLUMN_NAME));
}
@Test
@ -519,10 +507,10 @@ public class VirtualColumnsTest extends InitializedNullHandlingTest
final ExpressionVirtualColumn expr2 = new ExpressionVirtualColumn("v3", "v0 * x", null, TestExprMacroTable.INSTANCE);
final VirtualColumns virtualColumns = VirtualColumns.create(ImmutableList.of(v0, v1, expr1, expr2));
Assert.assertEquals(ColumnType.STRING, virtualColumns.getColumnCapabilities(baseInspector, "v0").toColumnType());
Assert.assertEquals(ColumnType.LONG, virtualColumns.getColumnCapabilities(baseInspector, "v1").toColumnType());
Assert.assertEquals(ColumnType.DOUBLE, virtualColumns.getColumnCapabilities(baseInspector, "v2").toColumnType());
Assert.assertEquals(ColumnType.DOUBLE, virtualColumns.getColumnCapabilities(baseInspector, "v3").toColumnType());
Assert.assertEquals(ColumnType.STRING, virtualColumns.getColumnCapabilitiesWithoutFallback(baseInspector, "v0").toColumnType());
Assert.assertEquals(ColumnType.LONG, virtualColumns.getColumnCapabilitiesWithoutFallback(baseInspector, "v1").toColumnType());
Assert.assertEquals(ColumnType.DOUBLE, virtualColumns.getColumnCapabilitiesWithoutFallback(baseInspector, "v2").toColumnType());
Assert.assertEquals(ColumnType.DOUBLE, virtualColumns.getColumnCapabilitiesWithoutFallback(baseInspector, "v3").toColumnType());
Assert.assertTrue(virtualColumns.canVectorize(baseInspector));
}

View File

@ -238,7 +238,7 @@ public class ExpressionVectorSelectorsTest extends InitializedNullHandlingTest
null
);
ColumnCapabilities capabilities = virtualColumns.getColumnCapabilities(storageAdapter, "v");
ColumnCapabilities capabilities = virtualColumns.getColumnCapabilitiesWithFallback(storageAdapter, "v");
int rowCount = 0;
if (capabilities.isDictionaryEncoded().isTrue()) {