fix bug with expressions on sparse string realtime columns without explicit null valued rows (#10248)

* fix bug with realtime expressions on sparse string columns

* fix test

* add comment back

* push capabilities for dimensions to dimension indexers since they know things

* style

* style

* fixes

* getting a bit carried away

* missed one

* fix it

* benchmark build fix

* review stuffs

* javadoc and comments

* add comment

* more strict check

* fix missed usaged of impl instead of interface
This commit is contained in:
Clint Wylie 2020-08-11 11:07:17 -07:00 committed by GitHub
parent 35284e5166
commit c72f96a4ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 553 additions and 197 deletions

View File

@ -59,7 +59,7 @@ public class StringDimensionIndexerBenchmark
@Setup
public void setup()
{
indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true);
indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true, false);
for (int i = 0; i < cardinality; i++) {
indexer.processRowValsToUnsortedEncodedKeyComponent("abcd-" + i, true);

View File

@ -47,7 +47,7 @@ import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.BaseProgressIndicator;
import org.apache.druid.segment.ProgressIndicator;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.indexing.TuningConfigs;
@ -289,7 +289,7 @@ public class IndexGeneratorJob implements Jobby
AggregatorFactory[] aggs,
HadoopDruidIndexerConfig config,
Iterable<String> oldDimOrder,
Map<String, ColumnCapabilitiesImpl> oldCapabilities
Map<String, ColumnCapabilities> oldCapabilities
)
{
final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();

View File

@ -340,7 +340,7 @@ public class GroupByQueryEngineV2
// Now check column capabilities.
final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension());
return (columnCapabilities != null && !columnCapabilities.hasMultipleValues().isMaybeTrue()) ||
return (columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse()) ||
(missingMeansNonExistent && columnCapabilities == null);
});
}

View File

@ -227,7 +227,7 @@ public class SegmentAnalyzer
min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0));
max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1));
}
} else if (capabilities.isDictionaryEncoded()) {
} else if (capabilities.isDictionaryEncoded().isTrue()) {
// fallback if no bitmap index
DictionaryEncodedColumn<String> theColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn();
cardinality = theColumn.getCardinality();

View File

@ -194,7 +194,7 @@ public class TopNQueryEngine
}
if (capabilities != null && capabilities.getType() == ValueType.STRING) {
// string columns must use the on heap algorithm unless they have the following capabilites
return capabilities.isDictionaryEncoded() && capabilities.areDictionaryValuesUnique().isTrue();
return capabilities.isDictionaryEncoded().isTrue() && capabilities.areDictionaryValuesUnique().isTrue();
} else {
// non-strings are not eligible to use the pooled algorithm, and should use a heap algorithm
return false;

View File

@ -73,16 +73,16 @@ public final class DimensionHandlerUtils
)
{
if (capabilities == null) {
return new StringDimensionHandler(dimensionName, multiValueHandling, true);
return new StringDimensionHandler(dimensionName, multiValueHandling, true, false);
}
multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
if (capabilities.getType() == ValueType.STRING) {
if (!capabilities.isDictionaryEncoded()) {
if (!capabilities.isDictionaryEncoded().isTrue()) {
throw new IAE("String column must have dictionary encoding.");
}
return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes());
return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes(), capabilities.hasSpatialIndexes());
}
if (capabilities.getType() == ValueType.LONG) {
@ -98,7 +98,7 @@ public final class DimensionHandlerUtils
}
// Return a StringDimensionHandler by default (null columns will be treated as String typed)
return new StringDimensionHandler(dimensionName, multiValueHandling, true);
return new StringDimensionHandler(dimensionName, multiValueHandling, true, false);
}
public static List<ValueType> getValueTypesFromDimensionSpecs(List<DimensionSpec> dimSpecs)
@ -226,11 +226,11 @@ public final class DimensionHandlerUtils
capabilities = ColumnCapabilitiesImpl.copyOf(capabilities)
.setType(ValueType.STRING)
.setDictionaryValuesUnique(
capabilities.isDictionaryEncoded() &&
capabilities.isDictionaryEncoded().isTrue() &&
fn.getExtractionType() == ExtractionFn.ExtractionType.ONE_TO_ONE
)
.setDictionaryValuesSorted(
capabilities.isDictionaryEncoded() && fn.preservesOrdering()
capabilities.isDictionaryEncoded().isTrue() && fn.preservesOrdering()
);
}

View File

@ -22,6 +22,7 @@ package org.apache.druid.segment;
import org.apache.druid.collections.bitmap.BitmapFactory;
import org.apache.druid.collections.bitmap.MutableBitmap;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@ -236,6 +237,7 @@ public interface DimensionIndexer
IncrementalIndex.DimensionDesc desc
);
ColumnCapabilities getColumnCapabilities();
/**
* Compares the row values for this DimensionIndexer's dimension from a Row key.
*

View File

@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@ -38,6 +41,9 @@ public class DoubleDimensionIndexer implements DimensionIndexer<Double, Double,
{
public static final Comparator<Double> DOUBLE_COMPARATOR = Comparators.naturalNullsFirst();
private final ColumnCapabilitiesImpl capabilities =
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE);
@Override
public Double processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
{
@ -89,6 +95,12 @@ public class DoubleDimensionIndexer implements DimensionIndexer<Double, Double,
return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
}
@Override
public ColumnCapabilities getColumnCapabilities()
{
return capabilities;
}
@Override
public DimensionSelector makeDimensionSelector(
DimensionSpec spec,

View File

@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@ -38,6 +41,9 @@ public class FloatDimensionIndexer implements DimensionIndexer<Float, Float, Flo
{
public static final Comparator<Float> FLOAT_COMPARATOR = Comparators.naturalNullsFirst();
private final ColumnCapabilitiesImpl capabilities =
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT);
@Override
public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
{
@ -90,6 +96,12 @@ public class FloatDimensionIndexer implements DimensionIndexer<Float, Float, Flo
return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
}
@Override
public ColumnCapabilities getColumnCapabilities()
{
return capabilities;
}
@Override
public DimensionSelector makeDimensionSelector(
DimensionSpec spec,

View File

@ -88,6 +88,63 @@ public class IndexMergerV9 implements IndexMerger
{
private static final Logger log = new Logger(IndexMergerV9.class);
// merge logic for the state capabilities will be in after incremental index is persisted
public static final ColumnCapabilities.CoercionLogic DIMENSION_CAPABILITY_MERGE_LOGIC =
new ColumnCapabilities.CoercionLogic()
{
@Override
public boolean dictionaryEncoded()
{
return true;
}
@Override
public boolean dictionaryValuesSorted()
{
return true;
}
@Override
public boolean dictionaryValuesUnique()
{
return true;
}
@Override
public boolean multipleValues()
{
return false;
}
};
public static final ColumnCapabilities.CoercionLogic METRIC_CAPABILITY_MERGE_LOGIC =
new ColumnCapabilities.CoercionLogic()
{
@Override
public boolean dictionaryEncoded()
{
return false;
}
@Override
public boolean dictionaryValuesSorted()
{
return false;
}
@Override
public boolean dictionaryValuesUnique()
{
return false;
}
@Override
public boolean multipleValues()
{
return false;
}
};
private final ObjectMapper mapper;
private final IndexIO indexIO;
private final SegmentWriteOutMediumFactory defaultSegmentWriteOutMediumFactory;
@ -724,14 +781,14 @@ public class IndexMergerV9 implements IndexMerger
for (String dimension : adapter.getDimensionNames()) {
ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
capabilitiesMap.compute(dimension, (d, existingCapabilities) ->
ColumnCapabilitiesImpl.snapshot(capabilities)
.merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, DIMENSION_CAPABILITY_MERGE_LOGIC)
);
}
for (String metric : adapter.getMetricNames()) {
ColumnCapabilities capabilities = adapter.getCapabilities(metric);
capabilitiesMap.compute(metric, (m, existingCapabilities) ->
ColumnCapabilitiesImpl.snapshot(capabilities)
.merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, METRIC_CAPABILITY_MERGE_LOGIC)
);
metricsValueTypes.put(metric, capabilities.getType());
metricTypeNames.put(metric, adapter.getMetricType(metric));
}
@ -1011,7 +1068,10 @@ public class IndexMergerV9 implements IndexMerger
{
Map<String, DimensionHandler> handlers = new LinkedHashMap<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
ColumnCapabilities capabilities = dimCapabilities.get(i);
ColumnCapabilities capabilities = ColumnCapabilitiesImpl.snapshot(
dimCapabilities.get(i),
DIMENSION_CAPABILITY_MERGE_LOGIC
);
String dimName = mergedDimensions.get(i);
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null);
handlers.put(dimName, handler);

View File

@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.guava.Comparators;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
@ -38,6 +41,10 @@ public class LongDimensionIndexer implements DimensionIndexer<Long, Long, Long>
{
public static final Comparator LONG_COMPARATOR = Comparators.<Long>naturalNullsFirst();
private final ColumnCapabilitiesImpl capabilities =
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG);
@Override
public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
{
@ -90,6 +97,12 @@ public class LongDimensionIndexer implements DimensionIndexer<Long, Long, Long>
return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
}
@Override
public ColumnCapabilities getColumnCapabilities()
{
return capabilities;
}
@Override
public DimensionSelector makeDimensionSelector(
DimensionSpec spec,

View File

@ -98,12 +98,14 @@ public class StringDimensionHandler implements DimensionHandler<Integer, int[],
private final String dimensionName;
private final MultiValueHandling multiValueHandling;
private final boolean hasBitmapIndexes;
private final boolean hasSpatialIndexes;
public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes)
public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes)
{
this.dimensionName = dimensionName;
this.multiValueHandling = multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
this.hasSpatialIndexes = hasSpatialIndexes;
}
@Override
@ -139,7 +141,7 @@ public class StringDimensionHandler implements DimensionHandler<Integer, int[],
@Override
public DimensionIndexer<Integer, int[], String> makeIndexer()
{
return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes);
return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes);
}
@Override

View File

@ -40,6 +40,9 @@ import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.ArrayBasedIndexedInts;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.data.IndexedInts;
@ -74,7 +77,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
private String minValue = null;
@Nullable
private String maxValue = null;
private int idForNull = ABSENT_VALUE_ID;
private volatile int idForNull = ABSENT_VALUE_ID;
private final Object2IntMap<String> valueToId = new Object2IntOpenHashMap<>();
@ -233,17 +236,19 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
private final DimensionDictionary dimLookup;
private final MultiValueHandling multiValueHandling;
private final boolean hasBitmapIndexes;
private final boolean hasSpatialIndexes;
private volatile boolean hasMultipleValues = false;
private volatile boolean isSparse = false;
@Nullable
private SortedDimensionDictionary sortedLookup;
public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes)
public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes)
{
this.dimLookup = new DimensionDictionary();
this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
this.hasBitmapIndexes = hasBitmapIndexes;
this.hasSpatialIndexes = hasSpatialIndexes;
}
@Override
@ -400,6 +405,17 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
return dimLookup.size();
}
/**
* returns true if all values are encoded in {@link #dimLookup}
*/
private boolean dictionaryEncodesAllValues()
{
// name lookup is possible in advance if we explicitly process a value for every row, or if we've encountered an
// actual null value and it is present in our dictionary. otherwise the dictionary will be missing ids for implicit
// null values
return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID;
}
@Override
public int compareUnsortedEncodedKeyComponents(int[] lhs, int[] rhs)
{
@ -456,6 +472,37 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
return Arrays.hashCode(key);
}
@Override
public ColumnCapabilities getColumnCapabilities()
{
ColumnCapabilitiesImpl capabilites = new ColumnCapabilitiesImpl().setType(ValueType.STRING)
.setHasBitmapIndexes(hasBitmapIndexes)
.setHasSpatialIndexes(hasSpatialIndexes)
.setDictionaryValuesUnique(true)
.setDictionaryValuesSorted(false);
// Strings are opportunistically multi-valued, but the capabilities are initialized as 'unknown', since a
// multi-valued row might be processed at any point during ingestion.
// We only explicitly set multiple values if we are certain that there are multiple values, otherwise, a race
// condition might occur where this indexer might process a multi-valued row in the period between obtaining the
// capabilities, and actually processing the rows with a selector. Leaving as unknown allows the caller to decide
// how to handle this.
if (hasMultipleValues) {
capabilites.setHasMultipleValues(true);
}
// Likewise, only set dictionaryEncoded if explicitly if true for a similar reason as multi-valued handling. The
// dictionary is populated as rows are processed, but there might be implicit default values not accounted for in
// the dictionary yet. We can be certain that the dictionary has an entry for every value if either of
// a) we have already processed an explitic default (null) valued row for this column
// b) the processing was not 'sparse', meaning that this indexer has processed an explict value for every row
// is true.
final boolean allValuesEncoded = dictionaryEncodesAllValues();
if (allValuesEncoded) {
capabilites.setDictionaryEncoded(true);
}
return capabilites;
}
@Override
public DimensionSelector makeDimensionSelector(
final DimensionSpec spec,
@ -630,9 +677,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
@Override
public boolean nameLookupPossibleInAdvance()
{
// name lookup is possible in advance if we got a value for every row (setSparseIndexed was not called on this
// column) or we've encountered an actual null value and it is present in our dictionary
return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID;
return dictionaryEncodesAllValues();
}
@Nullable
@ -696,6 +741,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
return makeDimensionSelector(DefaultDimensionSpec.of(desc.getName()), currEntry, desc);
}
@Nullable
@Override
public Object convertUnsortedEncodedKeyComponentToActualList(int[] key)

View File

@ -26,17 +26,59 @@ import org.apache.druid.java.util.common.StringUtils;
import javax.annotation.Nullable;
/**
* This interface is used to expose information about columns that is interesting to know for all matters dealing with
* reading from columns, including query planning and optimization, creating readers to merge segments at ingestion
* time, and probably nearly anything else you can imagine.
*/
public interface ColumnCapabilities
{
/**
* Column type, good to know so caller can know what to expect and which optimal selector to use
*/
ValueType getType();
boolean isDictionaryEncoded();
/**
* Is the column dictionary encoded? If so, a DimensionDictionarySelector may be used instead of using a value
* selector, allowing algorithms to operate on primitive integer dictionary ids rather than the looked up dictionary
* values
*/
Capable isDictionaryEncoded();
/**
* If the column is dictionary encoded, are those values sorted? Useful to know for optimizations that can defer
* looking up values and allowing sorting with the dictionary ids directly
*/
Capable areDictionaryValuesSorted();
/**
* If the column is dictionary encoded, is there a 1:1 mapping of dictionary ids to values? If this is true, it
* unlocks optimizations such as allowing for things like grouping directly on dictionary ids and deferred value
* lookup
*/
Capable areDictionaryValuesUnique();
boolean isRunLengthEncoded();
boolean hasBitmapIndexes();
boolean hasSpatialIndexes();
/**
* String columns are sneaky, and might have multiple values, this is to allow callers to know and appropriately
* prepare themselves
*/
Capable hasMultipleValues();
/**
* Does the column have an inverted index bitmap for each value? If so, these may be employed to 'pre-filter' the
* column by examining if the values match the filter and intersecting the bitmaps, to avoid having to scan and
* evaluate if every row matches the filter
*/
boolean hasBitmapIndexes();
/**
* Does the column have spatial indexes available to allow use with spatial filtering?
*/
boolean hasSpatialIndexes();
/**
* All Druid primitive columns support filtering, maybe with or without indexes, but by default complex columns
* do not support direct filtering, unless provided by through a custom implementation.
*/
boolean isFilterable();
enum Capable
@ -55,6 +97,11 @@ public interface ColumnCapabilities
return isTrue() || isUnknown();
}
public boolean isFalse()
{
return this == FALSE;
}
public boolean isUnknown()
{
return this == UNKNOWN;
@ -105,4 +152,36 @@ public interface ColumnCapabilities
return StringUtils.toLowerCase(super.toString());
}
}
/**
* This interface define the shape of a mechnism to allow for bespoke coercion of {@link Capable#UNKNOWN} into
* {@link Capable#TRUE} or {@link Capable#FALSE} for each {@link Capable} of a {@link ColumnCapabilities}, as is
* appropriate for the situation of the caller.
*/
interface CoercionLogic
{
/**
* If {@link ColumnCapabilities#isDictionaryEncoded()} is {@link Capable#UNKNOWN}, define if it should be treated
* as true or false.
*/
boolean dictionaryEncoded();
/**
* If {@link ColumnCapabilities#areDictionaryValuesSorted()} ()} is {@link Capable#UNKNOWN}, define if it should be treated
* as true or false.
*/
boolean dictionaryValuesSorted();
/**
* If {@link ColumnCapabilities#areDictionaryValuesUnique()} ()} is {@link Capable#UNKNOWN}, define if it should be treated
* as true or false.
*/
boolean dictionaryValuesUnique();
/**
* If {@link ColumnCapabilities#hasMultipleValues()} is {@link Capable#UNKNOWN}, define if it should be treated
* as true or false.
*/
boolean multipleValues();
}
}

View File

@ -21,6 +21,7 @@ package org.apache.druid.segment.column;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonSetter;
import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.ISE;
@ -37,7 +38,6 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
if (other != null) {
capabilities.type = other.getType();
capabilities.dictionaryEncoded = other.isDictionaryEncoded();
capabilities.runLengthEncoded = other.isRunLengthEncoded();
capabilities.hasInvertedIndexes = other.hasBitmapIndexes();
capabilities.hasSpatialIndexes = other.hasSpatialIndexes();
capabilities.hasMultipleValues = other.hasMultipleValues();
@ -49,32 +49,61 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
}
/**
* Used at indexing time to finalize all {@link Capable#UNKNOWN} values to
* {@link Capable#FALSE}, in order to present a snapshot of the state of the this column
* Copy a {@link ColumnCapabilities} and coerce all {@link ColumnCapabilities.Capable#UNKNOWN} to
* {@link ColumnCapabilities.Capable#TRUE} or {@link ColumnCapabilities.Capable#FALSE} as specified by
* {@link ColumnCapabilities.CoercionLogic}
*/
@Nullable
public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities)
{
return snapshot(capabilities, false);
}
/**
* Used at indexing time to finalize all {@link Capable#UNKNOWN} values to
* {@link Capable#FALSE} or {@link Capable#TRUE}, in order to present a snapshot of the state of the this column
*/
@Nullable
public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, boolean unknownIsTrue)
public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, CoercionLogic coerce)
{
if (capabilities == null) {
return null;
}
ColumnCapabilitiesImpl copy = copyOf(capabilities);
copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(unknownIsTrue);
copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(unknownIsTrue);
copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(unknownIsTrue);
copy.dictionaryEncoded = copy.dictionaryEncoded.coerceUnknownToBoolean(coerce.dictionaryEncoded());
copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(coerce.dictionaryValuesSorted());
copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(coerce.dictionaryValuesUnique());
copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(coerce.multipleValues());
return copy;
}
/**
* Snapshots a pair of capabilities and then merges them
*/
@Nullable
public static ColumnCapabilitiesImpl merge(
@Nullable final ColumnCapabilities capabilities,
@Nullable final ColumnCapabilities other,
CoercionLogic coercionLogic
)
{
ColumnCapabilitiesImpl merged = snapshot(capabilities, coercionLogic);
ColumnCapabilitiesImpl otherSnapshot = snapshot(other, coercionLogic);
if (merged == null) {
return otherSnapshot;
} else if (otherSnapshot == null) {
return merged;
}
if (merged.type == null) {
merged.type = other.getType();
}
if (!merged.type.equals(otherSnapshot.getType())) {
throw new ISE("Cannot merge columns of type[%s] and [%s]", merged.type, otherSnapshot.getType());
}
merged.dictionaryEncoded = merged.dictionaryEncoded.or(otherSnapshot.isDictionaryEncoded());
merged.hasMultipleValues = merged.hasMultipleValues.or(otherSnapshot.hasMultipleValues());
merged.dictionaryValuesSorted = merged.dictionaryValuesSorted.and(otherSnapshot.areDictionaryValuesSorted());
merged.dictionaryValuesUnique = merged.dictionaryValuesUnique.and(otherSnapshot.areDictionaryValuesUnique());
merged.hasInvertedIndexes |= otherSnapshot.hasBitmapIndexes();
merged.hasSpatialIndexes |= otherSnapshot.hasSpatialIndexes();
merged.filterable &= otherSnapshot.isFilterable();
return merged;
}
/**
* Create a no frills, simple column with {@link ValueType} set and everything else false
@ -93,10 +122,9 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
@Nullable
private ValueType type = null;
private boolean dictionaryEncoded = false;
private boolean runLengthEncoded = false;
private boolean hasInvertedIndexes = false;
private boolean hasSpatialIndexes = false;
private Capable dictionaryEncoded = Capable.UNKNOWN;
private Capable hasMultipleValues = Capable.UNKNOWN;
// These capabilities are computed at query time and not persisted in the segment files.
@ -121,15 +149,16 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
}
@Override
@JsonProperty
public boolean isDictionaryEncoded()
@JsonProperty("dictionaryEncoded")
public Capable isDictionaryEncoded()
{
return dictionaryEncoded;
}
@JsonSetter("dictionaryEncoded")
public ColumnCapabilitiesImpl setDictionaryEncoded(boolean dictionaryEncoded)
{
this.dictionaryEncoded = dictionaryEncoded;
this.dictionaryEncoded = Capable.of(dictionaryEncoded);
return this;
}
@ -157,13 +186,6 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
return this;
}
@Override
@JsonProperty
public boolean isRunLengthEncoded()
{
return runLengthEncoded;
}
@Override
@JsonProperty("hasBitmapIndexes")
public boolean hasBitmapIndexes()
@ -218,30 +240,4 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
this.filterable = filterable;
return this;
}
public ColumnCapabilities merge(@Nullable ColumnCapabilities other)
{
if (other == null) {
return this;
}
if (type == null) {
type = other.getType();
}
if (!type.equals(other.getType())) {
throw new ISE("Cannot merge columns of type[%s] and [%s]", type, other.getType());
}
this.dictionaryEncoded |= other.isDictionaryEncoded();
this.runLengthEncoded |= other.isRunLengthEncoded();
this.hasInvertedIndexes |= other.hasBitmapIndexes();
this.hasSpatialIndexes |= other.hasSpatialIndexes();
this.filterable &= other.isFilterable();
this.hasMultipleValues = this.hasMultipleValues.or(other.hasMultipleValues());
this.dictionaryValuesSorted = this.dictionaryValuesSorted.and(other.areDictionaryValuesSorted());
this.dictionaryValuesUnique = this.dictionaryValuesUnique.and(other.areDictionaryValuesUnique());
return this;
}
}

View File

@ -115,7 +115,7 @@ public class ExpressionFilter implements Filter
// multiple values. The lack of multiple values is important because expression filters treat multi-value
// arrays as nulls, which doesn't permit index based filtering.
final String column = Iterables.getOnlyElement(requiredBindings.get());
return selector.getBitmapIndex(column) != null && !selector.hasMultipleValues(column).isMaybeTrue();
return selector.getBitmapIndex(column) != null && selector.hasMultipleValues(column).isFalse();
} else {
// Multi-column expression.
return false;

View File

@ -414,7 +414,7 @@ public class Filters
if (filter.supportsBitmapIndex(indexSelector)) {
final ColumnHolder columnHolder = columnSelector.getColumnHolder(dimension);
if (columnHolder != null) {
return !columnHolder.getCapabilities().hasMultipleValues().isMaybeTrue();
return columnHolder.getCapabilities().hasMultipleValues().isFalse();
}
}
return false;

View File

@ -59,6 +59,7 @@ import org.apache.druid.segment.DimensionIndexer;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.DoubleColumnSelector;
import org.apache.druid.segment.FloatColumnSelector;
import org.apache.druid.segment.IndexMergerV9;
import org.apache.druid.segment.LongColumnSelector;
import org.apache.druid.segment.Metadata;
import org.apache.druid.segment.NilColumnValueSelector;
@ -249,7 +250,8 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
private final Map<String, DimensionDesc> dimensionDescs;
private final List<DimensionDesc> dimensionDescsList;
private final Map<String, ColumnCapabilitiesImpl> columnCapabilities;
// dimension capabilities are provided by the indexers
private final Map<String, ColumnCapabilities> timeAndMetricsColumnCapabilities;
private final AtomicInteger numEntries = new AtomicInteger();
private final AtomicLong bytesInMemory = new AtomicLong();
@ -287,7 +289,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
this.deserializeComplexMetrics = deserializeComplexMetrics;
this.reportParseExceptions = reportParseExceptions;
this.columnCapabilities = new HashMap<>();
this.timeAndMetricsColumnCapabilities = new HashMap<>();
this.metadata = new Metadata(
null,
getCombiningAggregators(metrics),
@ -302,7 +304,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
for (AggregatorFactory metric : metrics) {
MetricDesc metricDesc = new MetricDesc(metricDescs.size(), metric);
metricDescs.put(metricDesc.getName(), metricDesc);
columnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities());
timeAndMetricsColumnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities());
}
DimensionsSpec dimensionsSpec = incrementalIndexSchema.getDimensionsSpec();
@ -312,24 +314,22 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
for (DimensionSchema dimSchema : dimensionsSpec.getDimensions()) {
ValueType type = TYPE_MAP.get(dimSchema.getValueType());
String dimName = dimSchema.getName();
ColumnCapabilitiesImpl capabilities = makeCapabilitiesFromValueType(type);
ColumnCapabilitiesImpl capabilities = makeDefaultCapabilitiesFromValueType(type);
capabilities.setHasBitmapIndexes(dimSchema.hasBitmapIndex());
if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) {
capabilities.setHasSpatialIndexes(true);
} else {
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(
dimName,
capabilities,
dimSchema.getMultiValueHandling()
);
addNewDimension(dimName, capabilities, handler);
}
columnCapabilities.put(dimName, capabilities);
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(
dimName,
capabilities,
dimSchema.getMultiValueHandling()
);
addNewDimension(dimName, handler);
}
//__time capabilities
columnCapabilities.put(
timeAndMetricsColumnCapabilities.put(
ColumnHolder.TIME_COLUMN_NAME,
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG)
);
@ -589,9 +589,13 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
return row;
}
public Map<String, ColumnCapabilitiesImpl> getColumnCapabilities()
public Map<String, ColumnCapabilities> getColumnCapabilities()
{
return columnCapabilities;
ImmutableMap.Builder<String, ColumnCapabilities> builder =
ImmutableMap.<String, ColumnCapabilities>builder().putAll(timeAndMetricsColumnCapabilities);
dimensionDescs.forEach((dimension, desc) -> builder.put(dimension, desc.getCapabilities()));
return builder.build();
}
/**
@ -658,23 +662,22 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
continue;
}
boolean wasNewDim = false;
ColumnCapabilitiesImpl capabilities;
DimensionDesc desc = dimensionDescs.get(dimension);
if (desc != null) {
capabilities = desc.getCapabilities();
absentDimensions.remove(dimension);
} else {
wasNewDim = true;
capabilities = columnCapabilities.get(dimension);
if (capabilities == null) {
// For schemaless type discovery, assume everything is a String for now, can change later.
capabilities = makeCapabilitiesFromValueType(ValueType.STRING);
columnCapabilities.put(dimension, capabilities);
}
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null);
desc = addNewDimension(dimension, capabilities, handler);
desc = addNewDimension(
dimension,
DimensionHandlerUtils.getHandlerFromCapabilities(
dimension,
// for schemaless type discovery, everything is a String. this should probably try to autodetect
// based on the value to use a better handler
makeDefaultCapabilitiesFromValueType(ValueType.STRING),
null
)
);
}
DimensionHandler handler = desc.getHandler();
DimensionIndexer indexer = desc.getIndexer();
Object dimsKey = null;
try {
@ -684,13 +687,6 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
parseExceptionMessages.add(pe.getMessage());
}
dimsKeySize += indexer.estimateEncodedKeyComponentSize(dimsKey);
// Set column capabilities as data is coming in
if (!capabilities.hasMultipleValues().isTrue() &&
dimsKey != null &&
handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) {
capabilities.setHasMultipleValues(true);
}
if (wasNewDim) {
// unless this is the first row we are processing, all newly discovered columns will be sparse
if (maxIngestedEventTime != null) {
@ -928,7 +924,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
}
}
private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type)
private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType type)
{
if (type == ValueType.STRING) {
// we start out as not having multiple values, but this might change as we encounter them
@ -949,7 +945,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
*/
public void loadDimensionIterable(
Iterable<String> oldDimensionOrder,
Map<String, ColumnCapabilitiesImpl> oldColumnCapabilities
Map<String, ColumnCapabilities> oldColumnCapabilities
)
{
synchronized (dimensionDescs) {
@ -958,19 +954,21 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
}
for (String dim : oldDimensionOrder) {
if (dimensionDescs.get(dim) == null) {
ColumnCapabilitiesImpl capabilities = oldColumnCapabilities.get(dim);
columnCapabilities.put(dim, capabilities);
ColumnCapabilitiesImpl capabilities = ColumnCapabilitiesImpl.snapshot(
oldColumnCapabilities.get(dim),
IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC
);
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null);
addNewDimension(dim, capabilities, handler);
addNewDimension(dim, handler);
}
}
}
}
@GuardedBy("dimensionDescs")
private DimensionDesc addNewDimension(String dim, ColumnCapabilitiesImpl capabilities, DimensionHandler handler)
private DimensionDesc addNewDimension(String dim, DimensionHandler handler)
{
DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, capabilities, handler);
DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, handler);
dimensionDescs.put(dim, desc);
dimensionDescsList.add(desc);
return desc;
@ -998,7 +996,10 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
@Nullable
public ColumnCapabilities getCapabilities(String column)
{
return columnCapabilities.get(column);
if (dimensionDescs.containsKey(column)) {
return dimensionDescs.get(column).getCapabilities();
}
return timeAndMetricsColumnCapabilities.get(column);
}
public Metadata getMetadata()
@ -1080,15 +1081,13 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
{
private final int index;
private final String name;
private final ColumnCapabilitiesImpl capabilities;
private final DimensionHandler handler;
private final DimensionIndexer indexer;
public DimensionDesc(int index, String name, ColumnCapabilitiesImpl capabilities, DimensionHandler handler)
public DimensionDesc(int index, String name, DimensionHandler handler)
{
this.index = index;
this.name = name;
this.capabilities = capabilities;
this.handler = handler;
this.indexer = handler.makeIndexer();
}
@ -1103,9 +1102,9 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
return name;
}
public ColumnCapabilitiesImpl getCapabilities()
public ColumnCapabilities getCapabilities()
{
return capabilities;
return indexer.getColumnCapabilities();
}
public DimensionHandler getHandler()
@ -1124,7 +1123,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
private final int index;
private final String name;
private final String type;
private final ColumnCapabilitiesImpl capabilities;
private final ColumnCapabilities capabilities;
public MetricDesc(int index, AggregatorFactory factory)
{
@ -1163,7 +1162,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
return type;
}
public ColumnCapabilitiesImpl getCapabilities()
public ColumnCapabilities getCapabilities()
{
return capabilities;
}

View File

@ -52,6 +52,62 @@ import java.util.Iterator;
*/
public class IncrementalIndexStorageAdapter implements StorageAdapter
{
private static final ColumnCapabilities.CoercionLogic STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC =
new ColumnCapabilities.CoercionLogic()
{
@Override
public boolean dictionaryEncoded()
{
return false;
}
@Override
public boolean dictionaryValuesSorted()
{
return false;
}
@Override
public boolean dictionaryValuesUnique()
{
return true;
}
@Override
public boolean multipleValues()
{
return true;
}
};
private static final ColumnCapabilities.CoercionLogic SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC =
new ColumnCapabilities.CoercionLogic()
{
@Override
public boolean dictionaryEncoded()
{
return true;
}
@Override
public boolean dictionaryValuesSorted()
{
return true;
}
@Override
public boolean dictionaryValuesUnique()
{
return true;
}
@Override
public boolean multipleValues()
{
return false;
}
};
final IncrementalIndex<?> index;
public IncrementalIndexStorageAdapter(IncrementalIndex<?> index)
@ -154,7 +210,7 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter
// to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of
// multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could
// be removed.
return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true);
return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC);
}
/**
@ -165,7 +221,10 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter
*/
public ColumnCapabilities getSnapshotColumnCapabilities(String column)
{
return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column));
return ColumnCapabilitiesImpl.snapshot(
index.getCapabilities(column),
SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC
);
}
@Override

View File

@ -83,9 +83,9 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe
spec -> {
final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
if (holder == null
|| !holder.getCapabilities().isDictionaryEncoded()
|| holder.getCapabilities().isDictionaryEncoded().isFalse()
|| holder.getCapabilities().getType() != ValueType.STRING
|| !holder.getCapabilities().hasMultipleValues().isMaybeTrue()) {
|| holder.getCapabilities().hasMultipleValues().isFalse()) {
throw new ISE(
"Column[%s] is not a multi-value string column, do not ask for a multi-value selector",
spec.getDimension()
@ -119,7 +119,7 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe
spec -> {
final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
if (holder == null
|| !holder.getCapabilities().isDictionaryEncoded()
|| !holder.getCapabilities().isDictionaryEncoded().isTrue()
|| holder.getCapabilities().getType() != ValueType.STRING) {
// Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls.
return NilVectorSelector.create(offset);

View File

@ -153,8 +153,8 @@ public class ExpressionSelectors
);
} else if (capabilities != null
&& capabilities.getType() == ValueType.STRING
&& capabilities.isDictionaryEncoded()
&& !capabilities.hasMultipleValues().isMaybeTrue()
&& capabilities.isDictionaryEncoded().isTrue()
&& capabilities.hasMultipleValues().isFalse()
&& exprDetails.getArrayBindings().isEmpty()) {
// Optimization for expressions that hit one scalar string column and nothing else.
return new SingleStringInputCachingExpressionColumnValueSelector(
@ -225,7 +225,7 @@ public class ExpressionSelectors
// not treating it as an array and not wanting to output an array
if (capabilities != null
&& capabilities.getType() == ValueType.STRING
&& capabilities.isDictionaryEncoded()
&& capabilities.isDictionaryEncoded().isTrue()
&& !capabilities.hasMultipleValues().isUnknown()
&& !exprDetails.hasInputArrays()
&& !exprDetails.isOutputArray()

View File

@ -138,7 +138,7 @@ public class LookupSegmentTest
// reporting complete single-valued capabilities. It would be good to change this in the future, so query engines
// running on top of lookups can take advantage of singly-valued optimizations.
Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
Assert.assertFalse(capabilities.isDictionaryEncoded());
Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
}
@Test
@ -151,7 +151,7 @@ public class LookupSegmentTest
// running on top of lookups can take advantage of singly-valued optimizations.
Assert.assertEquals(ValueType.STRING, capabilities.getType());
Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
Assert.assertFalse(capabilities.isDictionaryEncoded());
Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
}
@Test

View File

@ -45,7 +45,6 @@ import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.StringDictionaryEncodedColumn;
@ -2012,32 +2011,6 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
Assert.assertEquals(-1, dictIdSeeker.seek(5));
}
@Test(expected = IllegalArgumentException.class)
public void testCloser() throws Exception
{
final long timestamp = System.currentTimeMillis();
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
IncrementalIndexTest.populateIndex(timestamp, toPersist);
ColumnCapabilitiesImpl capabilities = (ColumnCapabilitiesImpl) toPersist.getCapabilities("dim1");
capabilities.setHasSpatialIndexes(true);
final File tempDir = temporaryFolder.newFolder();
final File v8TmpDir = new File(tempDir, "v8-tmp");
final File v9TmpDir = new File(tempDir, "v9-tmp");
try {
indexMerger.persist(toPersist, tempDir, indexSpec, null);
}
finally {
if (v8TmpDir.exists()) {
Assert.fail("v8-tmp dir not clean.");
}
if (v9TmpDir.exists()) {
Assert.fail("v9-tmp dir not clean.");
}
}
}
@Test
public void testMultiValueHandling() throws Exception
{

View File

@ -150,20 +150,25 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
ColumnCapabilities caps = INC_INDEX.getCapabilities("d1");
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertTrue(caps.hasBitmapIndexes());
Assert.assertTrue(caps.isDictionaryEncoded());
Assert.assertTrue(caps.isDictionaryEncoded().isMaybeTrue());
Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
// multi-value is unknown unless explicitly set to 'true'
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
// at index merge or query time we 'complete' the capabilities to take a snapshot of the current state,
// coercing any 'UNKNOWN' values to false
Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue());
Assert.assertFalse(
ColumnCapabilitiesImpl.snapshot(caps, IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC)
.hasMultipleValues()
.isMaybeTrue()
);
Assert.assertFalse(caps.hasSpatialIndexes());
caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities();
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertTrue(caps.hasBitmapIndexes());
Assert.assertTrue(caps.isDictionaryEncoded());
Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue());
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@ -176,7 +181,7 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
ColumnCapabilities caps = INC_INDEX.getCapabilities("d2");
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertTrue(caps.hasBitmapIndexes());
Assert.assertTrue(caps.isDictionaryEncoded());
Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isTrue());
@ -185,7 +190,7 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities();
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertTrue(caps.hasBitmapIndexes());
Assert.assertTrue(caps.isDictionaryEncoded());
Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue());
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isTrue());
@ -199,12 +204,11 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
assertNonStringColumnCapabilities(MMAP_INDEX.getColumnHolder("m4").getCapabilities(), ValueType.COMPLEX);
}
private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueType valueType)
{
Assert.assertEquals(valueType, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
Assert.assertFalse(caps.isDictionaryEncoded());
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());

View File

@ -51,7 +51,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, ColumnHolder.TIME_COLUMN_NAME);
Assert.assertEquals(ValueType.LONG, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
Assert.assertFalse(caps.isDictionaryEncoded());
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@ -65,7 +65,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, STRING_COLUMN_NAME);
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
Assert.assertFalse(caps.isDictionaryEncoded());
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
@ -79,7 +79,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, LONG_COLUMN_NAME);
Assert.assertEquals(ValueType.LONG, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
Assert.assertFalse(caps.isDictionaryEncoded());
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@ -93,7 +93,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, FLOAT_COLUMN_NAME);
Assert.assertEquals(ValueType.FLOAT, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
Assert.assertFalse(caps.isDictionaryEncoded());
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@ -107,7 +107,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, DOUBLE_COLUMN_NAME);
Assert.assertEquals(ValueType.DOUBLE, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
Assert.assertFalse(caps.isDictionaryEncoded());
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
@ -121,7 +121,7 @@ public class RowBasedColumnSelectorFactoryTest
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, COMPLEX_COLUMN_NAME);
Assert.assertEquals(ValueType.COMPLEX, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
Assert.assertFalse(caps.isDictionaryEncoded());
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isUnknown());

View File

@ -44,8 +44,7 @@ public class ColumnCapabilitiesImplTest
ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class);
Assert.assertEquals(ValueType.COMPLEX, cc.getType());
Assert.assertTrue(cc.isDictionaryEncoded());
Assert.assertFalse(cc.isRunLengthEncoded());
Assert.assertTrue(cc.isDictionaryEncoded().isTrue());
Assert.assertTrue(cc.hasSpatialIndexes());
Assert.assertTrue(cc.hasMultipleValues().isTrue());
Assert.assertTrue(cc.hasBitmapIndexes());
@ -69,8 +68,7 @@ public class ColumnCapabilitiesImplTest
ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class);
Assert.assertEquals(ValueType.COMPLEX, cc.getType());
Assert.assertTrue(cc.isDictionaryEncoded());
Assert.assertTrue(cc.isRunLengthEncoded());
Assert.assertTrue(cc.isDictionaryEncoded().isTrue());
Assert.assertTrue(cc.hasSpatialIndexes());
Assert.assertTrue(cc.hasMultipleValues().isTrue());
Assert.assertTrue(cc.hasBitmapIndexes());

View File

@ -200,7 +200,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag
Assert.assertEquals(ValueType.STRING, capabilities.getType());
Assert.assertTrue(capabilities.hasBitmapIndexes());
Assert.assertTrue(capabilities.isDictionaryEncoded());
Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
Assert.assertTrue(capabilities.areDictionaryValuesSorted().isTrue());
Assert.assertTrue(capabilities.areDictionaryValuesUnique().isTrue());
}
@ -216,7 +216,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag
Assert.assertFalse(capabilities.hasBitmapIndexes());
Assert.assertFalse(capabilities.areDictionaryValuesUnique().isTrue());
Assert.assertFalse(capabilities.areDictionaryValuesSorted().isTrue());
Assert.assertTrue(capabilities.isDictionaryEncoded());
Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
}
@Test

View File

@ -137,7 +137,7 @@ public class IndexedTableJoinableTest
{
final ColumnCapabilities capabilities = target.getColumnCapabilities("str");
Assert.assertEquals(ValueType.STRING, capabilities.getType());
Assert.assertTrue(capabilities.isDictionaryEncoded());
Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
Assert.assertFalse(capabilities.hasBitmapIndexes());
Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(capabilities.hasSpatialIndexes());
@ -148,7 +148,7 @@ public class IndexedTableJoinableTest
{
final ColumnCapabilities capabilities = target.getColumnCapabilities("long");
Assert.assertEquals(ValueType.LONG, capabilities.getType());
Assert.assertFalse(capabilities.isDictionaryEncoded());
Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
Assert.assertFalse(capabilities.hasBitmapIndexes());
Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
Assert.assertFalse(capabilities.hasSpatialIndexes());

View File

@ -21,20 +21,41 @@ package org.apache.druid.segment.virtual;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.common.guava.SettableSupplier;
import org.apache.druid.data.input.MapBasedInputRow;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.Parser;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.BaseSingleValueDimensionSelector;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.TestObjectColumnSelector;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
import org.apache.druid.segment.incremental.IndexSizeExceededException;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class ExpressionColumnValueSelectorTest
public class ExpressionSelectorsTest extends InitializedNullHandlingTest
{
@Test
public void testSupplierFromDimensionSelector()
@ -231,6 +252,86 @@ public class ExpressionColumnValueSelectorTest
);
}
@Test
public void testIncrementIndexStringSelector() throws IndexSizeExceededException
{
// This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
// DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
// exception trying to make an optimized string expression selector that was not appropriate to use for the
// underlying dimension selector.
// This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the
// conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details
IncrementalIndexSchema schema = new IncrementalIndexSchema(
0,
new TimestampSpec("time", "millis", DateTimes.nowUtc()),
Granularities.NONE,
VirtualColumns.EMPTY,
DimensionsSpec.EMPTY,
new AggregatorFactory[]{new CountAggregatorFactory("count")},
true
);
IncrementalIndex index = new IncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).buildOnheap();
index.add(
new MapBasedInputRow(
DateTimes.nowUtc().getMillis(),
ImmutableList.of("x"),
ImmutableMap.of("x", "foo")
)
);
index.add(
new MapBasedInputRow(
DateTimes.nowUtc().plusMillis(1000).getMillis(),
ImmutableList.of("y"),
ImmutableMap.of("y", "foo")
)
);
IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
Sequence<Cursor> cursors = adapter.makeCursors(
null,
Intervals.ETERNITY,
VirtualColumns.EMPTY,
Granularities.ALL,
false,
null
);
int rowsProcessed = cursors.map(cursor -> {
DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector(
cursor.getColumnSelectorFactory(),
Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()),
null
);
DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector(
cursor.getColumnSelectorFactory(),
Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()),
null
);
int rowCount = 0;
while (!cursor.isDone()) {
Object x = xExprSelector.getObject();
Object y = yExprSelector.getObject();
List<String> expectedFoo = Collections.singletonList("foofoo");
List<String> expectedNull = NullHandling.replaceWithDefault()
? Collections.singletonList("foo")
: Collections.singletonList(null);
if (rowCount == 0) {
Assert.assertEquals(expectedFoo, x);
Assert.assertEquals(expectedNull, y);
} else {
Assert.assertEquals(expectedNull, x);
Assert.assertEquals(expectedFoo, y);
}
rowCount++;
cursor.advance();
}
return rowCount;
}).accumulate(0, (in, acc) -> in + acc);
Assert.assertEquals(2, rowsProcessed);
}
private static DimensionSelector dimensionSelectorFromSupplier(
final Supplier<String> supplier
)

View File

@ -810,7 +810,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
ColumnCapabilities caps = X_PLUS_Y.capabilities("expr");
Assert.assertEquals(ValueType.FLOAT, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
Assert.assertFalse(caps.isDictionaryEncoded());
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
@ -820,7 +820,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
caps = Z_CONCAT_X.capabilities("expr");
Assert.assertEquals(ValueType.STRING, caps.getType());
Assert.assertFalse(caps.hasBitmapIndexes());
Assert.assertFalse(caps.isDictionaryEncoded());
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
Assert.assertTrue(caps.hasMultipleValues().isUnknown());

View File

@ -29,7 +29,7 @@ import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.ReferenceCountingSegment;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexAddResult;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
@ -377,7 +377,7 @@ public class Sink implements Iterable<FireHydrant>, Overshadowable<Sink>
FireHydrant lastHydrant = hydrants.get(numHydrants - 1);
newCount = lastHydrant.getCount() + 1;
if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) {
Map<String, ColumnCapabilitiesImpl> oldCapabilities;
Map<String, ColumnCapabilities> oldCapabilities;
if (lastHydrant.hasSwapped()) {
oldCapabilities = new HashMap<>();
ReferenceCountingSegment segment = lastHydrant.getIncrementedSegment();
@ -385,7 +385,7 @@ public class Sink implements Iterable<FireHydrant>, Overshadowable<Sink>
QueryableIndex oldIndex = segment.asQueryableIndex();
for (String dim : oldIndex.getAvailableDimensions()) {
dimOrder.add(dim);
oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumnHolder(dim).getCapabilities());
oldCapabilities.put(dim, oldIndex.getColumnHolder(dim).getCapabilities());
}
}
finally {