make COMPLEX column optionally filterable in Druid code (#6223)

* make COMPLEX column filterable in Druid code

* Revert "make COMPLEX column filterable in Druid code"

This reverts commit 9fc6ec768c.

* complex columns can be optionally made filterable

* some types are always filterable

* add ColumnCapabilitiesImpl serde tests

* add SuppresedWarnings annotation
This commit is contained in:
Himanshu 2018-09-05 12:28:49 -07:00 committed by Gian Merlino
parent be6c901114
commit d61f708ef5
6 changed files with 114 additions and 17 deletions

View File

@ -29,10 +29,8 @@ import org.apache.druid.segment.column.BitmapIndex;
import org.apache.druid.segment.column.Column;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.GenericColumn;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.Indexed;
import org.apache.druid.segment.data.IndexedIterable;
import org.apache.druid.segment.filter.Filters;
import java.util.Iterator;
@ -142,7 +140,7 @@ public class ColumnSelectorBitmapIndexSelector implements BitmapIndexSelector
}
final Column column = index.getColumn(dimension);
if (column == null || !columnSupportsFiltering(column)) {
if (column == null || !column.getCapabilities().isFilterable()) {
// for missing columns and columns with types that do not support filtering,
// treat the column as if it were a String column full of nulls.
// Create a BitmapIndex so that filters applied to null columns can use
@ -212,7 +210,7 @@ public class ColumnSelectorBitmapIndexSelector implements BitmapIndexSelector
}
final Column column = index.getColumn(dimension);
if (column == null || !columnSupportsFiltering(column)) {
if (column == null || !column.getCapabilities().isFilterable()) {
if (NullHandling.isNullOrEquivalent(value)) {
return bitmapFactory.complement(bitmapFactory.makeEmptyImmutableBitmap(), getNumRows());
} else {
@ -247,10 +245,4 @@ public class ColumnSelectorBitmapIndexSelector implements BitmapIndexSelector
{
return virtualColumns.getVirtualColumn(columnName) != null;
}
private static boolean columnSupportsFiltering(Column column)
{
ValueType columnType = column.getCapabilities().getType();
return Filters.FILTERABLE_TYPES.contains(columnType);
}
}

View File

@ -29,6 +29,7 @@ public class ColumnBuilder
{
private ValueType type = null;
private boolean hasMultipleValues = false;
private boolean filterable = false;
private Supplier<DictionaryEncodedColumn> dictionaryEncodedColumn = null;
private Supplier<GenericColumn> genericColumn = null;
@ -60,6 +61,13 @@ public class ColumnBuilder
return this;
}
@SuppressWarnings("unused")
public ColumnBuilder setFilterable(boolean filterable)
{
this.filterable = filterable;
return this;
}
public ColumnBuilder setDictionaryEncodedColumn(Supplier<DictionaryEncodedColumn> dictionaryEncodedColumn)
{
this.dictionaryEncodedColumn = dictionaryEncodedColumn;
@ -100,7 +108,8 @@ public class ColumnBuilder
.setDictionaryEncoded(dictionaryEncodedColumn != null)
.setHasBitmapIndexes(bitmapIndex != null)
.setHasSpatialIndexes(spatialIndex != null)
.setHasMultipleValues(hasMultipleValues),
.setHasMultipleValues(hasMultipleValues)
.setFilterable(filterable),
dictionaryEncodedColumn,
genericColumn,
complexColumn,

View File

@ -30,4 +30,5 @@ public interface ColumnCapabilities
boolean hasBitmapIndexes();
boolean hasSpatialIndexes();
boolean hasMultipleValues();
boolean isFilterable();
}

View File

@ -19,6 +19,7 @@
package org.apache.druid.segment.column;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.druid.java.util.common.ISE;
@ -33,6 +34,10 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
private boolean hasSpatialIndexes = false;
private boolean hasMultipleValues = false;
// This is a query time concept and not persisted in the segment files.
@JsonIgnore
private boolean filterable;
@Override
@JsonProperty
public ValueType getType()
@ -99,6 +104,22 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
return hasMultipleValues;
}
@Override
public boolean isFilterable()
{
return type == ValueType.STRING ||
type == ValueType.LONG ||
type == ValueType.FLOAT ||
type == ValueType.DOUBLE ||
filterable;
}
public ColumnCapabilitiesImpl setFilterable(boolean filterable)
{
this.filterable = filterable;
return this;
}
public ColumnCapabilitiesImpl setHasMultipleValues(boolean hasMultipleValues)
{
this.hasMultipleValues = hasMultipleValues;
@ -124,5 +145,6 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
this.hasInvertedIndexes |= other.hasBitmapIndexes();
this.hasSpatialIndexes |= other.hasSpatialIndexes();
this.hasMultipleValues |= other.hasMultipleValues();
this.filterable &= other.isFilterable();
}
}

View File

@ -64,12 +64,6 @@ import java.util.NoSuchElementException;
*/
public class Filters
{
public static final List<ValueType> FILTERABLE_TYPES = ImmutableList.of(
ValueType.STRING,
ValueType.LONG,
ValueType.FLOAT,
ValueType.DOUBLE
);
private static final String CTX_KEY_USE_FILTER_CNF = "useFilterCNF";
/**

View File

@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.column;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.druid.segment.TestHelper;
import org.junit.Assert;
import org.junit.Test;
public class ColumnCapabilitiesImplTest
{
private final ObjectMapper mapper = TestHelper.makeJsonMapper();
@Test
public void testSerde() throws Exception
{
String json = mapper.writeValueAsString(new ColumnCapabilitiesImpl()
.setDictionaryEncoded(true)
.setHasBitmapIndexes(true)
.setHasMultipleValues(true)
.setHasSpatialIndexes(true)
.setType(ValueType.COMPLEX)
.setFilterable(true));
Assert.assertFalse(json.contains("filterable"));
ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class);
Assert.assertEquals(ValueType.COMPLEX, cc.getType());
Assert.assertTrue(cc.isDictionaryEncoded());
Assert.assertFalse(cc.isRunLengthEncoded());
Assert.assertTrue(cc.hasSpatialIndexes());
Assert.assertTrue(cc.hasMultipleValues());
Assert.assertTrue(cc.hasBitmapIndexes());
Assert.assertFalse(cc.isFilterable());
}
@Test
public void testDeserialization() throws Exception
{
String json = "{\n"
+ " \"type\":\"COMPLEX\",\n"
+ " \"dictionaryEncoded\":true,\n"
+ " \"runLengthEncoded\":true,\n"
+ " \"hasSpatialIndexes\":true,\n"
+ " \"hasMultipleValues\":true,\n"
+ " \"hasBitmapIndexes\":true,\n"
+ " \"filterable\":true\n"
+ "}";
ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class);
Assert.assertEquals(ValueType.COMPLEX, cc.getType());
Assert.assertTrue(cc.isDictionaryEncoded());
Assert.assertTrue(cc.isRunLengthEncoded());
Assert.assertTrue(cc.hasSpatialIndexes());
Assert.assertTrue(cc.hasMultipleValues());
Assert.assertTrue(cc.hasBitmapIndexes());
Assert.assertFalse(cc.isFilterable());
}
}