mirror of https://github.com/apache/druid.git
fix bug with expressions on sparse string realtime columns without explicit null valued rows (#10248)
* fix bug with realtime expressions on sparse string columns * fix test * add comment back * push capabilities for dimensions to dimension indexers since they know things * style * style * fixes * getting a bit carried away * missed one * fix it * benchmark build fix * review stuffs * javadoc and comments * add comment * more strict check * fix missed usaged of impl instead of interface
This commit is contained in:
parent
35284e5166
commit
c72f96a4ba
|
@ -59,7 +59,7 @@ public class StringDimensionIndexerBenchmark
|
|||
@Setup
|
||||
public void setup()
|
||||
{
|
||||
indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true);
|
||||
indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true, false);
|
||||
|
||||
for (int i = 0; i < cardinality; i++) {
|
||||
indexer.processRowValsToUnsortedEncodedKeyComponent("abcd-" + i, true);
|
||||
|
|
|
@ -47,7 +47,7 @@ import org.apache.druid.query.aggregation.AggregatorFactory;
|
|||
import org.apache.druid.segment.BaseProgressIndicator;
|
||||
import org.apache.druid.segment.ProgressIndicator;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||
import org.apache.druid.segment.indexing.TuningConfigs;
|
||||
|
@ -289,7 +289,7 @@ public class IndexGeneratorJob implements Jobby
|
|||
AggregatorFactory[] aggs,
|
||||
HadoopDruidIndexerConfig config,
|
||||
Iterable<String> oldDimOrder,
|
||||
Map<String, ColumnCapabilitiesImpl> oldCapabilities
|
||||
Map<String, ColumnCapabilities> oldCapabilities
|
||||
)
|
||||
{
|
||||
final HadoopTuningConfig tuningConfig = config.getSchema().getTuningConfig();
|
||||
|
|
|
@ -340,7 +340,7 @@ public class GroupByQueryEngineV2
|
|||
|
||||
// Now check column capabilities.
|
||||
final ColumnCapabilities columnCapabilities = capabilitiesFunction.apply(dimension.getDimension());
|
||||
return (columnCapabilities != null && !columnCapabilities.hasMultipleValues().isMaybeTrue()) ||
|
||||
return (columnCapabilities != null && columnCapabilities.hasMultipleValues().isFalse()) ||
|
||||
(missingMeansNonExistent && columnCapabilities == null);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -227,7 +227,7 @@ public class SegmentAnalyzer
|
|||
min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0));
|
||||
max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1));
|
||||
}
|
||||
} else if (capabilities.isDictionaryEncoded()) {
|
||||
} else if (capabilities.isDictionaryEncoded().isTrue()) {
|
||||
// fallback if no bitmap index
|
||||
DictionaryEncodedColumn<String> theColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn();
|
||||
cardinality = theColumn.getCardinality();
|
||||
|
|
|
@ -194,7 +194,7 @@ public class TopNQueryEngine
|
|||
}
|
||||
if (capabilities != null && capabilities.getType() == ValueType.STRING) {
|
||||
// string columns must use the on heap algorithm unless they have the following capabilites
|
||||
return capabilities.isDictionaryEncoded() && capabilities.areDictionaryValuesUnique().isTrue();
|
||||
return capabilities.isDictionaryEncoded().isTrue() && capabilities.areDictionaryValuesUnique().isTrue();
|
||||
} else {
|
||||
// non-strings are not eligible to use the pooled algorithm, and should use a heap algorithm
|
||||
return false;
|
||||
|
|
|
@ -73,16 +73,16 @@ public final class DimensionHandlerUtils
|
|||
)
|
||||
{
|
||||
if (capabilities == null) {
|
||||
return new StringDimensionHandler(dimensionName, multiValueHandling, true);
|
||||
return new StringDimensionHandler(dimensionName, multiValueHandling, true, false);
|
||||
}
|
||||
|
||||
multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
|
||||
|
||||
if (capabilities.getType() == ValueType.STRING) {
|
||||
if (!capabilities.isDictionaryEncoded()) {
|
||||
if (!capabilities.isDictionaryEncoded().isTrue()) {
|
||||
throw new IAE("String column must have dictionary encoding.");
|
||||
}
|
||||
return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes());
|
||||
return new StringDimensionHandler(dimensionName, multiValueHandling, capabilities.hasBitmapIndexes(), capabilities.hasSpatialIndexes());
|
||||
}
|
||||
|
||||
if (capabilities.getType() == ValueType.LONG) {
|
||||
|
@ -98,7 +98,7 @@ public final class DimensionHandlerUtils
|
|||
}
|
||||
|
||||
// Return a StringDimensionHandler by default (null columns will be treated as String typed)
|
||||
return new StringDimensionHandler(dimensionName, multiValueHandling, true);
|
||||
return new StringDimensionHandler(dimensionName, multiValueHandling, true, false);
|
||||
}
|
||||
|
||||
public static List<ValueType> getValueTypesFromDimensionSpecs(List<DimensionSpec> dimSpecs)
|
||||
|
@ -226,11 +226,11 @@ public final class DimensionHandlerUtils
|
|||
capabilities = ColumnCapabilitiesImpl.copyOf(capabilities)
|
||||
.setType(ValueType.STRING)
|
||||
.setDictionaryValuesUnique(
|
||||
capabilities.isDictionaryEncoded() &&
|
||||
capabilities.isDictionaryEncoded().isTrue() &&
|
||||
fn.getExtractionType() == ExtractionFn.ExtractionType.ONE_TO_ONE
|
||||
)
|
||||
.setDictionaryValuesSorted(
|
||||
capabilities.isDictionaryEncoded() && fn.preservesOrdering()
|
||||
capabilities.isDictionaryEncoded().isTrue() && fn.preservesOrdering()
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.apache.druid.segment;
|
|||
import org.apache.druid.collections.bitmap.BitmapFactory;
|
||||
import org.apache.druid.collections.bitmap.MutableBitmap;
|
||||
import org.apache.druid.query.dimension.DimensionSpec;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.data.CloseableIndexed;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
|
||||
|
@ -236,6 +237,7 @@ public interface DimensionIndexer
|
|||
IncrementalIndex.DimensionDesc desc
|
||||
);
|
||||
|
||||
ColumnCapabilities getColumnCapabilities();
|
||||
/**
|
||||
* Compares the row values for this DimensionIndexer's dimension from a Row key.
|
||||
*
|
||||
|
|
|
@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
|
|||
import org.apache.druid.java.util.common.guava.Comparators;
|
||||
import org.apache.druid.query.dimension.DimensionSpec;
|
||||
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.data.CloseableIndexed;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
|
||||
|
@ -38,6 +41,9 @@ public class DoubleDimensionIndexer implements DimensionIndexer<Double, Double,
|
|||
{
|
||||
public static final Comparator<Double> DOUBLE_COMPARATOR = Comparators.naturalNullsFirst();
|
||||
|
||||
private final ColumnCapabilitiesImpl capabilities =
|
||||
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.DOUBLE);
|
||||
|
||||
@Override
|
||||
public Double processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
|
||||
{
|
||||
|
@ -89,6 +95,12 @@ public class DoubleDimensionIndexer implements DimensionIndexer<Double, Double,
|
|||
return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnCapabilities getColumnCapabilities()
|
||||
{
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DimensionSelector makeDimensionSelector(
|
||||
DimensionSpec spec,
|
||||
|
|
|
@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
|
|||
import org.apache.druid.java.util.common.guava.Comparators;
|
||||
import org.apache.druid.query.dimension.DimensionSpec;
|
||||
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.data.CloseableIndexed;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
|
||||
|
@ -38,6 +41,9 @@ public class FloatDimensionIndexer implements DimensionIndexer<Float, Float, Flo
|
|||
{
|
||||
public static final Comparator<Float> FLOAT_COMPARATOR = Comparators.naturalNullsFirst();
|
||||
|
||||
private final ColumnCapabilitiesImpl capabilities =
|
||||
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.FLOAT);
|
||||
|
||||
@Override
|
||||
public Float processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
|
||||
{
|
||||
|
@ -90,6 +96,12 @@ public class FloatDimensionIndexer implements DimensionIndexer<Float, Float, Flo
|
|||
return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnCapabilities getColumnCapabilities()
|
||||
{
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DimensionSelector makeDimensionSelector(
|
||||
DimensionSpec spec,
|
||||
|
|
|
@ -88,6 +88,63 @@ public class IndexMergerV9 implements IndexMerger
|
|||
{
|
||||
private static final Logger log = new Logger(IndexMergerV9.class);
|
||||
|
||||
// merge logic for the state capabilities will be in after incremental index is persisted
|
||||
public static final ColumnCapabilities.CoercionLogic DIMENSION_CAPABILITY_MERGE_LOGIC =
|
||||
new ColumnCapabilities.CoercionLogic()
|
||||
{
|
||||
@Override
|
||||
public boolean dictionaryEncoded()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean dictionaryValuesSorted()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean dictionaryValuesUnique()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean multipleValues()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
public static final ColumnCapabilities.CoercionLogic METRIC_CAPABILITY_MERGE_LOGIC =
|
||||
new ColumnCapabilities.CoercionLogic()
|
||||
{
|
||||
@Override
|
||||
public boolean dictionaryEncoded()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean dictionaryValuesSorted()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean dictionaryValuesUnique()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean multipleValues()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
private final ObjectMapper mapper;
|
||||
private final IndexIO indexIO;
|
||||
private final SegmentWriteOutMediumFactory defaultSegmentWriteOutMediumFactory;
|
||||
|
@ -724,14 +781,14 @@ public class IndexMergerV9 implements IndexMerger
|
|||
for (String dimension : adapter.getDimensionNames()) {
|
||||
ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
|
||||
capabilitiesMap.compute(dimension, (d, existingCapabilities) ->
|
||||
ColumnCapabilitiesImpl.snapshot(capabilities)
|
||||
.merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
|
||||
ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, DIMENSION_CAPABILITY_MERGE_LOGIC)
|
||||
);
|
||||
}
|
||||
for (String metric : adapter.getMetricNames()) {
|
||||
ColumnCapabilities capabilities = adapter.getCapabilities(metric);
|
||||
capabilitiesMap.compute(metric, (m, existingCapabilities) ->
|
||||
ColumnCapabilitiesImpl.snapshot(capabilities)
|
||||
.merge(ColumnCapabilitiesImpl.snapshot(existingCapabilities)));
|
||||
ColumnCapabilitiesImpl.merge(capabilities, existingCapabilities, METRIC_CAPABILITY_MERGE_LOGIC)
|
||||
);
|
||||
metricsValueTypes.put(metric, capabilities.getType());
|
||||
metricTypeNames.put(metric, adapter.getMetricType(metric));
|
||||
}
|
||||
|
@ -1011,7 +1068,10 @@ public class IndexMergerV9 implements IndexMerger
|
|||
{
|
||||
Map<String, DimensionHandler> handlers = new LinkedHashMap<>();
|
||||
for (int i = 0; i < mergedDimensions.size(); i++) {
|
||||
ColumnCapabilities capabilities = dimCapabilities.get(i);
|
||||
ColumnCapabilities capabilities = ColumnCapabilitiesImpl.snapshot(
|
||||
dimCapabilities.get(i),
|
||||
DIMENSION_CAPABILITY_MERGE_LOGIC
|
||||
);
|
||||
String dimName = mergedDimensions.get(i);
|
||||
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null);
|
||||
handlers.put(dimName, handler);
|
||||
|
|
|
@ -25,6 +25,9 @@ import org.apache.druid.common.config.NullHandling;
|
|||
import org.apache.druid.java.util.common.guava.Comparators;
|
||||
import org.apache.druid.query.dimension.DimensionSpec;
|
||||
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.data.CloseableIndexed;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexRowHolder;
|
||||
|
@ -38,6 +41,10 @@ public class LongDimensionIndexer implements DimensionIndexer<Long, Long, Long>
|
|||
{
|
||||
public static final Comparator LONG_COMPARATOR = Comparators.<Long>naturalNullsFirst();
|
||||
|
||||
private final ColumnCapabilitiesImpl capabilities =
|
||||
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG);
|
||||
|
||||
|
||||
@Override
|
||||
public Long processRowValsToUnsortedEncodedKeyComponent(@Nullable Object dimValues, boolean reportParseExceptions)
|
||||
{
|
||||
|
@ -90,6 +97,12 @@ public class LongDimensionIndexer implements DimensionIndexer<Long, Long, Long>
|
|||
return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnCapabilities getColumnCapabilities()
|
||||
{
|
||||
return capabilities;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DimensionSelector makeDimensionSelector(
|
||||
DimensionSpec spec,
|
||||
|
|
|
@ -98,12 +98,14 @@ public class StringDimensionHandler implements DimensionHandler<Integer, int[],
|
|||
private final String dimensionName;
|
||||
private final MultiValueHandling multiValueHandling;
|
||||
private final boolean hasBitmapIndexes;
|
||||
private final boolean hasSpatialIndexes;
|
||||
|
||||
public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes)
|
||||
public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes)
|
||||
{
|
||||
this.dimensionName = dimensionName;
|
||||
this.multiValueHandling = multiValueHandling;
|
||||
this.hasBitmapIndexes = hasBitmapIndexes;
|
||||
this.hasSpatialIndexes = hasSpatialIndexes;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -139,7 +141,7 @@ public class StringDimensionHandler implements DimensionHandler<Integer, int[],
|
|||
@Override
|
||||
public DimensionIndexer<Integer, int[], String> makeIndexer()
|
||||
{
|
||||
return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes);
|
||||
return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -40,6 +40,9 @@ import org.apache.druid.query.dimension.DimensionSpec;
|
|||
import org.apache.druid.query.extraction.ExtractionFn;
|
||||
import org.apache.druid.query.filter.ValueMatcher;
|
||||
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.data.ArrayBasedIndexedInts;
|
||||
import org.apache.druid.segment.data.CloseableIndexed;
|
||||
import org.apache.druid.segment.data.IndexedInts;
|
||||
|
@ -74,7 +77,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
|||
private String minValue = null;
|
||||
@Nullable
|
||||
private String maxValue = null;
|
||||
private int idForNull = ABSENT_VALUE_ID;
|
||||
private volatile int idForNull = ABSENT_VALUE_ID;
|
||||
|
||||
private final Object2IntMap<String> valueToId = new Object2IntOpenHashMap<>();
|
||||
|
||||
|
@ -233,17 +236,19 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
|||
private final DimensionDictionary dimLookup;
|
||||
private final MultiValueHandling multiValueHandling;
|
||||
private final boolean hasBitmapIndexes;
|
||||
private final boolean hasSpatialIndexes;
|
||||
private volatile boolean hasMultipleValues = false;
|
||||
private volatile boolean isSparse = false;
|
||||
|
||||
@Nullable
|
||||
private SortedDimensionDictionary sortedLookup;
|
||||
|
||||
public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes)
|
||||
public StringDimensionIndexer(MultiValueHandling multiValueHandling, boolean hasBitmapIndexes, boolean hasSpatialIndexes)
|
||||
{
|
||||
this.dimLookup = new DimensionDictionary();
|
||||
this.multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
|
||||
this.hasBitmapIndexes = hasBitmapIndexes;
|
||||
this.hasSpatialIndexes = hasSpatialIndexes;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -400,6 +405,17 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
|||
return dimLookup.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if all values are encoded in {@link #dimLookup}
|
||||
*/
|
||||
private boolean dictionaryEncodesAllValues()
|
||||
{
|
||||
// name lookup is possible in advance if we explicitly process a value for every row, or if we've encountered an
|
||||
// actual null value and it is present in our dictionary. otherwise the dictionary will be missing ids for implicit
|
||||
// null values
|
||||
return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareUnsortedEncodedKeyComponents(int[] lhs, int[] rhs)
|
||||
{
|
||||
|
@ -456,6 +472,37 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
|||
return Arrays.hashCode(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnCapabilities getColumnCapabilities()
|
||||
{
|
||||
ColumnCapabilitiesImpl capabilites = new ColumnCapabilitiesImpl().setType(ValueType.STRING)
|
||||
.setHasBitmapIndexes(hasBitmapIndexes)
|
||||
.setHasSpatialIndexes(hasSpatialIndexes)
|
||||
.setDictionaryValuesUnique(true)
|
||||
.setDictionaryValuesSorted(false);
|
||||
|
||||
// Strings are opportunistically multi-valued, but the capabilities are initialized as 'unknown', since a
|
||||
// multi-valued row might be processed at any point during ingestion.
|
||||
// We only explicitly set multiple values if we are certain that there are multiple values, otherwise, a race
|
||||
// condition might occur where this indexer might process a multi-valued row in the period between obtaining the
|
||||
// capabilities, and actually processing the rows with a selector. Leaving as unknown allows the caller to decide
|
||||
// how to handle this.
|
||||
if (hasMultipleValues) {
|
||||
capabilites.setHasMultipleValues(true);
|
||||
}
|
||||
// Likewise, only set dictionaryEncoded if explicitly if true for a similar reason as multi-valued handling. The
|
||||
// dictionary is populated as rows are processed, but there might be implicit default values not accounted for in
|
||||
// the dictionary yet. We can be certain that the dictionary has an entry for every value if either of
|
||||
// a) we have already processed an explitic default (null) valued row for this column
|
||||
// b) the processing was not 'sparse', meaning that this indexer has processed an explict value for every row
|
||||
// is true.
|
||||
final boolean allValuesEncoded = dictionaryEncodesAllValues();
|
||||
if (allValuesEncoded) {
|
||||
capabilites.setDictionaryEncoded(true);
|
||||
}
|
||||
return capabilites;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DimensionSelector makeDimensionSelector(
|
||||
final DimensionSpec spec,
|
||||
|
@ -630,9 +677,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
|||
@Override
|
||||
public boolean nameLookupPossibleInAdvance()
|
||||
{
|
||||
// name lookup is possible in advance if we got a value for every row (setSparseIndexed was not called on this
|
||||
// column) or we've encountered an actual null value and it is present in our dictionary
|
||||
return !isSparse || dimLookup.idForNull != ABSENT_VALUE_ID;
|
||||
return dictionaryEncodesAllValues();
|
||||
}
|
||||
|
||||
@Nullable
|
||||
|
@ -696,6 +741,7 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
|
|||
return makeDimensionSelector(DefaultDimensionSpec.of(desc.getName()), currEntry, desc);
|
||||
}
|
||||
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public Object convertUnsortedEncodedKeyComponentToActualList(int[] key)
|
||||
|
|
|
@ -26,17 +26,59 @@ import org.apache.druid.java.util.common.StringUtils;
|
|||
import javax.annotation.Nullable;
|
||||
|
||||
/**
|
||||
* This interface is used to expose information about columns that is interesting to know for all matters dealing with
|
||||
* reading from columns, including query planning and optimization, creating readers to merge segments at ingestion
|
||||
* time, and probably nearly anything else you can imagine.
|
||||
*/
|
||||
public interface ColumnCapabilities
|
||||
{
|
||||
/**
|
||||
* Column type, good to know so caller can know what to expect and which optimal selector to use
|
||||
*/
|
||||
ValueType getType();
|
||||
boolean isDictionaryEncoded();
|
||||
|
||||
/**
|
||||
* Is the column dictionary encoded? If so, a DimensionDictionarySelector may be used instead of using a value
|
||||
* selector, allowing algorithms to operate on primitive integer dictionary ids rather than the looked up dictionary
|
||||
* values
|
||||
*/
|
||||
Capable isDictionaryEncoded();
|
||||
|
||||
/**
|
||||
* If the column is dictionary encoded, are those values sorted? Useful to know for optimizations that can defer
|
||||
* looking up values and allowing sorting with the dictionary ids directly
|
||||
*/
|
||||
Capable areDictionaryValuesSorted();
|
||||
|
||||
/**
|
||||
* If the column is dictionary encoded, is there a 1:1 mapping of dictionary ids to values? If this is true, it
|
||||
* unlocks optimizations such as allowing for things like grouping directly on dictionary ids and deferred value
|
||||
* lookup
|
||||
*/
|
||||
Capable areDictionaryValuesUnique();
|
||||
boolean isRunLengthEncoded();
|
||||
boolean hasBitmapIndexes();
|
||||
boolean hasSpatialIndexes();
|
||||
|
||||
/**
|
||||
* String columns are sneaky, and might have multiple values, this is to allow callers to know and appropriately
|
||||
* prepare themselves
|
||||
*/
|
||||
Capable hasMultipleValues();
|
||||
|
||||
/**
|
||||
* Does the column have an inverted index bitmap for each value? If so, these may be employed to 'pre-filter' the
|
||||
* column by examining if the values match the filter and intersecting the bitmaps, to avoid having to scan and
|
||||
* evaluate if every row matches the filter
|
||||
*/
|
||||
boolean hasBitmapIndexes();
|
||||
|
||||
/**
|
||||
* Does the column have spatial indexes available to allow use with spatial filtering?
|
||||
*/
|
||||
boolean hasSpatialIndexes();
|
||||
|
||||
/**
|
||||
* All Druid primitive columns support filtering, maybe with or without indexes, but by default complex columns
|
||||
* do not support direct filtering, unless provided by through a custom implementation.
|
||||
*/
|
||||
boolean isFilterable();
|
||||
|
||||
enum Capable
|
||||
|
@ -55,6 +97,11 @@ public interface ColumnCapabilities
|
|||
return isTrue() || isUnknown();
|
||||
}
|
||||
|
||||
public boolean isFalse()
|
||||
{
|
||||
return this == FALSE;
|
||||
}
|
||||
|
||||
public boolean isUnknown()
|
||||
{
|
||||
return this == UNKNOWN;
|
||||
|
@ -105,4 +152,36 @@ public interface ColumnCapabilities
|
|||
return StringUtils.toLowerCase(super.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This interface define the shape of a mechnism to allow for bespoke coercion of {@link Capable#UNKNOWN} into
|
||||
* {@link Capable#TRUE} or {@link Capable#FALSE} for each {@link Capable} of a {@link ColumnCapabilities}, as is
|
||||
* appropriate for the situation of the caller.
|
||||
*/
|
||||
interface CoercionLogic
|
||||
{
|
||||
/**
|
||||
* If {@link ColumnCapabilities#isDictionaryEncoded()} is {@link Capable#UNKNOWN}, define if it should be treated
|
||||
* as true or false.
|
||||
*/
|
||||
boolean dictionaryEncoded();
|
||||
|
||||
/**
|
||||
* If {@link ColumnCapabilities#areDictionaryValuesSorted()} ()} is {@link Capable#UNKNOWN}, define if it should be treated
|
||||
* as true or false.
|
||||
*/
|
||||
boolean dictionaryValuesSorted();
|
||||
|
||||
/**
|
||||
* If {@link ColumnCapabilities#areDictionaryValuesUnique()} ()} is {@link Capable#UNKNOWN}, define if it should be treated
|
||||
* as true or false.
|
||||
*/
|
||||
boolean dictionaryValuesUnique();
|
||||
|
||||
/**
|
||||
* If {@link ColumnCapabilities#hasMultipleValues()} is {@link Capable#UNKNOWN}, define if it should be treated
|
||||
* as true or false.
|
||||
*/
|
||||
boolean multipleValues();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.druid.segment.column;
|
|||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonSetter;
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.druid.java.util.common.ISE;
|
||||
|
||||
|
@ -37,7 +38,6 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
|
|||
if (other != null) {
|
||||
capabilities.type = other.getType();
|
||||
capabilities.dictionaryEncoded = other.isDictionaryEncoded();
|
||||
capabilities.runLengthEncoded = other.isRunLengthEncoded();
|
||||
capabilities.hasInvertedIndexes = other.hasBitmapIndexes();
|
||||
capabilities.hasSpatialIndexes = other.hasSpatialIndexes();
|
||||
capabilities.hasMultipleValues = other.hasMultipleValues();
|
||||
|
@ -49,32 +49,61 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
|
|||
}
|
||||
|
||||
/**
|
||||
* Used at indexing time to finalize all {@link Capable#UNKNOWN} values to
|
||||
* {@link Capable#FALSE}, in order to present a snapshot of the state of the this column
|
||||
* Copy a {@link ColumnCapabilities} and coerce all {@link ColumnCapabilities.Capable#UNKNOWN} to
|
||||
* {@link ColumnCapabilities.Capable#TRUE} or {@link ColumnCapabilities.Capable#FALSE} as specified by
|
||||
* {@link ColumnCapabilities.CoercionLogic}
|
||||
*/
|
||||
@Nullable
|
||||
public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities)
|
||||
{
|
||||
return snapshot(capabilities, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Used at indexing time to finalize all {@link Capable#UNKNOWN} values to
|
||||
* {@link Capable#FALSE} or {@link Capable#TRUE}, in order to present a snapshot of the state of the this column
|
||||
*/
|
||||
@Nullable
|
||||
public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, boolean unknownIsTrue)
|
||||
public static ColumnCapabilitiesImpl snapshot(@Nullable final ColumnCapabilities capabilities, CoercionLogic coerce)
|
||||
{
|
||||
if (capabilities == null) {
|
||||
return null;
|
||||
}
|
||||
ColumnCapabilitiesImpl copy = copyOf(capabilities);
|
||||
copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(unknownIsTrue);
|
||||
copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(unknownIsTrue);
|
||||
copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(unknownIsTrue);
|
||||
copy.dictionaryEncoded = copy.dictionaryEncoded.coerceUnknownToBoolean(coerce.dictionaryEncoded());
|
||||
copy.dictionaryValuesSorted = copy.dictionaryValuesSorted.coerceUnknownToBoolean(coerce.dictionaryValuesSorted());
|
||||
copy.dictionaryValuesUnique = copy.dictionaryValuesUnique.coerceUnknownToBoolean(coerce.dictionaryValuesUnique());
|
||||
copy.hasMultipleValues = copy.hasMultipleValues.coerceUnknownToBoolean(coerce.multipleValues());
|
||||
return copy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Snapshots a pair of capabilities and then merges them
|
||||
*/
|
||||
@Nullable
|
||||
public static ColumnCapabilitiesImpl merge(
|
||||
@Nullable final ColumnCapabilities capabilities,
|
||||
@Nullable final ColumnCapabilities other,
|
||||
CoercionLogic coercionLogic
|
||||
)
|
||||
{
|
||||
ColumnCapabilitiesImpl merged = snapshot(capabilities, coercionLogic);
|
||||
ColumnCapabilitiesImpl otherSnapshot = snapshot(other, coercionLogic);
|
||||
if (merged == null) {
|
||||
return otherSnapshot;
|
||||
} else if (otherSnapshot == null) {
|
||||
return merged;
|
||||
}
|
||||
|
||||
if (merged.type == null) {
|
||||
merged.type = other.getType();
|
||||
}
|
||||
|
||||
if (!merged.type.equals(otherSnapshot.getType())) {
|
||||
throw new ISE("Cannot merge columns of type[%s] and [%s]", merged.type, otherSnapshot.getType());
|
||||
}
|
||||
|
||||
merged.dictionaryEncoded = merged.dictionaryEncoded.or(otherSnapshot.isDictionaryEncoded());
|
||||
merged.hasMultipleValues = merged.hasMultipleValues.or(otherSnapshot.hasMultipleValues());
|
||||
merged.dictionaryValuesSorted = merged.dictionaryValuesSorted.and(otherSnapshot.areDictionaryValuesSorted());
|
||||
merged.dictionaryValuesUnique = merged.dictionaryValuesUnique.and(otherSnapshot.areDictionaryValuesUnique());
|
||||
merged.hasInvertedIndexes |= otherSnapshot.hasBitmapIndexes();
|
||||
merged.hasSpatialIndexes |= otherSnapshot.hasSpatialIndexes();
|
||||
merged.filterable &= otherSnapshot.isFilterable();
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a no frills, simple column with {@link ValueType} set and everything else false
|
||||
|
@ -93,10 +122,9 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
|
|||
@Nullable
|
||||
private ValueType type = null;
|
||||
|
||||
private boolean dictionaryEncoded = false;
|
||||
private boolean runLengthEncoded = false;
|
||||
private boolean hasInvertedIndexes = false;
|
||||
private boolean hasSpatialIndexes = false;
|
||||
private Capable dictionaryEncoded = Capable.UNKNOWN;
|
||||
private Capable hasMultipleValues = Capable.UNKNOWN;
|
||||
|
||||
// These capabilities are computed at query time and not persisted in the segment files.
|
||||
|
@ -121,15 +149,16 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
|
|||
}
|
||||
|
||||
@Override
|
||||
@JsonProperty
|
||||
public boolean isDictionaryEncoded()
|
||||
@JsonProperty("dictionaryEncoded")
|
||||
public Capable isDictionaryEncoded()
|
||||
{
|
||||
return dictionaryEncoded;
|
||||
}
|
||||
|
||||
@JsonSetter("dictionaryEncoded")
|
||||
public ColumnCapabilitiesImpl setDictionaryEncoded(boolean dictionaryEncoded)
|
||||
{
|
||||
this.dictionaryEncoded = dictionaryEncoded;
|
||||
this.dictionaryEncoded = Capable.of(dictionaryEncoded);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -157,13 +186,6 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
|
|||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
@JsonProperty
|
||||
public boolean isRunLengthEncoded()
|
||||
{
|
||||
return runLengthEncoded;
|
||||
}
|
||||
|
||||
@Override
|
||||
@JsonProperty("hasBitmapIndexes")
|
||||
public boolean hasBitmapIndexes()
|
||||
|
@ -218,30 +240,4 @@ public class ColumnCapabilitiesImpl implements ColumnCapabilities
|
|||
this.filterable = filterable;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ColumnCapabilities merge(@Nullable ColumnCapabilities other)
|
||||
{
|
||||
if (other == null) {
|
||||
return this;
|
||||
}
|
||||
|
||||
if (type == null) {
|
||||
type = other.getType();
|
||||
}
|
||||
|
||||
if (!type.equals(other.getType())) {
|
||||
throw new ISE("Cannot merge columns of type[%s] and [%s]", type, other.getType());
|
||||
}
|
||||
|
||||
this.dictionaryEncoded |= other.isDictionaryEncoded();
|
||||
this.runLengthEncoded |= other.isRunLengthEncoded();
|
||||
this.hasInvertedIndexes |= other.hasBitmapIndexes();
|
||||
this.hasSpatialIndexes |= other.hasSpatialIndexes();
|
||||
this.filterable &= other.isFilterable();
|
||||
this.hasMultipleValues = this.hasMultipleValues.or(other.hasMultipleValues());
|
||||
this.dictionaryValuesSorted = this.dictionaryValuesSorted.and(other.areDictionaryValuesSorted());
|
||||
this.dictionaryValuesUnique = this.dictionaryValuesUnique.and(other.areDictionaryValuesUnique());
|
||||
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -115,7 +115,7 @@ public class ExpressionFilter implements Filter
|
|||
// multiple values. The lack of multiple values is important because expression filters treat multi-value
|
||||
// arrays as nulls, which doesn't permit index based filtering.
|
||||
final String column = Iterables.getOnlyElement(requiredBindings.get());
|
||||
return selector.getBitmapIndex(column) != null && !selector.hasMultipleValues(column).isMaybeTrue();
|
||||
return selector.getBitmapIndex(column) != null && selector.hasMultipleValues(column).isFalse();
|
||||
} else {
|
||||
// Multi-column expression.
|
||||
return false;
|
||||
|
|
|
@ -414,7 +414,7 @@ public class Filters
|
|||
if (filter.supportsBitmapIndex(indexSelector)) {
|
||||
final ColumnHolder columnHolder = columnSelector.getColumnHolder(dimension);
|
||||
if (columnHolder != null) {
|
||||
return !columnHolder.getCapabilities().hasMultipleValues().isMaybeTrue();
|
||||
return columnHolder.getCapabilities().hasMultipleValues().isFalse();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -59,6 +59,7 @@ import org.apache.druid.segment.DimensionIndexer;
|
|||
import org.apache.druid.segment.DimensionSelector;
|
||||
import org.apache.druid.segment.DoubleColumnSelector;
|
||||
import org.apache.druid.segment.FloatColumnSelector;
|
||||
import org.apache.druid.segment.IndexMergerV9;
|
||||
import org.apache.druid.segment.LongColumnSelector;
|
||||
import org.apache.druid.segment.Metadata;
|
||||
import org.apache.druid.segment.NilColumnValueSelector;
|
||||
|
@ -249,7 +250,8 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
|
||||
private final Map<String, DimensionDesc> dimensionDescs;
|
||||
private final List<DimensionDesc> dimensionDescsList;
|
||||
private final Map<String, ColumnCapabilitiesImpl> columnCapabilities;
|
||||
// dimension capabilities are provided by the indexers
|
||||
private final Map<String, ColumnCapabilities> timeAndMetricsColumnCapabilities;
|
||||
private final AtomicInteger numEntries = new AtomicInteger();
|
||||
private final AtomicLong bytesInMemory = new AtomicLong();
|
||||
|
||||
|
@ -287,7 +289,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
this.deserializeComplexMetrics = deserializeComplexMetrics;
|
||||
this.reportParseExceptions = reportParseExceptions;
|
||||
|
||||
this.columnCapabilities = new HashMap<>();
|
||||
this.timeAndMetricsColumnCapabilities = new HashMap<>();
|
||||
this.metadata = new Metadata(
|
||||
null,
|
||||
getCombiningAggregators(metrics),
|
||||
|
@ -302,7 +304,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
for (AggregatorFactory metric : metrics) {
|
||||
MetricDesc metricDesc = new MetricDesc(metricDescs.size(), metric);
|
||||
metricDescs.put(metricDesc.getName(), metricDesc);
|
||||
columnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities());
|
||||
timeAndMetricsColumnCapabilities.put(metricDesc.getName(), metricDesc.getCapabilities());
|
||||
}
|
||||
|
||||
DimensionsSpec dimensionsSpec = incrementalIndexSchema.getDimensionsSpec();
|
||||
|
@ -312,24 +314,22 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
for (DimensionSchema dimSchema : dimensionsSpec.getDimensions()) {
|
||||
ValueType type = TYPE_MAP.get(dimSchema.getValueType());
|
||||
String dimName = dimSchema.getName();
|
||||
ColumnCapabilitiesImpl capabilities = makeCapabilitiesFromValueType(type);
|
||||
ColumnCapabilitiesImpl capabilities = makeDefaultCapabilitiesFromValueType(type);
|
||||
capabilities.setHasBitmapIndexes(dimSchema.hasBitmapIndex());
|
||||
|
||||
if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) {
|
||||
capabilities.setHasSpatialIndexes(true);
|
||||
} else {
|
||||
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(
|
||||
dimName,
|
||||
capabilities,
|
||||
dimSchema.getMultiValueHandling()
|
||||
);
|
||||
addNewDimension(dimName, capabilities, handler);
|
||||
}
|
||||
columnCapabilities.put(dimName, capabilities);
|
||||
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(
|
||||
dimName,
|
||||
capabilities,
|
||||
dimSchema.getMultiValueHandling()
|
||||
);
|
||||
addNewDimension(dimName, handler);
|
||||
}
|
||||
|
||||
//__time capabilities
|
||||
columnCapabilities.put(
|
||||
timeAndMetricsColumnCapabilities.put(
|
||||
ColumnHolder.TIME_COLUMN_NAME,
|
||||
ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(ValueType.LONG)
|
||||
);
|
||||
|
@ -589,9 +589,13 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
return row;
|
||||
}
|
||||
|
||||
public Map<String, ColumnCapabilitiesImpl> getColumnCapabilities()
|
||||
public Map<String, ColumnCapabilities> getColumnCapabilities()
|
||||
{
|
||||
return columnCapabilities;
|
||||
ImmutableMap.Builder<String, ColumnCapabilities> builder =
|
||||
ImmutableMap.<String, ColumnCapabilities>builder().putAll(timeAndMetricsColumnCapabilities);
|
||||
|
||||
dimensionDescs.forEach((dimension, desc) -> builder.put(dimension, desc.getCapabilities()));
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -658,23 +662,22 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
continue;
|
||||
}
|
||||
boolean wasNewDim = false;
|
||||
ColumnCapabilitiesImpl capabilities;
|
||||
DimensionDesc desc = dimensionDescs.get(dimension);
|
||||
if (desc != null) {
|
||||
capabilities = desc.getCapabilities();
|
||||
absentDimensions.remove(dimension);
|
||||
} else {
|
||||
wasNewDim = true;
|
||||
capabilities = columnCapabilities.get(dimension);
|
||||
if (capabilities == null) {
|
||||
// For schemaless type discovery, assume everything is a String for now, can change later.
|
||||
capabilities = makeCapabilitiesFromValueType(ValueType.STRING);
|
||||
columnCapabilities.put(dimension, capabilities);
|
||||
}
|
||||
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimension, capabilities, null);
|
||||
desc = addNewDimension(dimension, capabilities, handler);
|
||||
desc = addNewDimension(
|
||||
dimension,
|
||||
DimensionHandlerUtils.getHandlerFromCapabilities(
|
||||
dimension,
|
||||
// for schemaless type discovery, everything is a String. this should probably try to autodetect
|
||||
// based on the value to use a better handler
|
||||
makeDefaultCapabilitiesFromValueType(ValueType.STRING),
|
||||
null
|
||||
)
|
||||
);
|
||||
}
|
||||
DimensionHandler handler = desc.getHandler();
|
||||
DimensionIndexer indexer = desc.getIndexer();
|
||||
Object dimsKey = null;
|
||||
try {
|
||||
|
@ -684,13 +687,6 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
parseExceptionMessages.add(pe.getMessage());
|
||||
}
|
||||
dimsKeySize += indexer.estimateEncodedKeyComponentSize(dimsKey);
|
||||
// Set column capabilities as data is coming in
|
||||
if (!capabilities.hasMultipleValues().isTrue() &&
|
||||
dimsKey != null &&
|
||||
handler.getLengthOfEncodedKeyComponent(dimsKey) > 1) {
|
||||
capabilities.setHasMultipleValues(true);
|
||||
}
|
||||
|
||||
if (wasNewDim) {
|
||||
// unless this is the first row we are processing, all newly discovered columns will be sparse
|
||||
if (maxIngestedEventTime != null) {
|
||||
|
@ -928,7 +924,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
}
|
||||
}
|
||||
|
||||
private ColumnCapabilitiesImpl makeCapabilitiesFromValueType(ValueType type)
|
||||
private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType type)
|
||||
{
|
||||
if (type == ValueType.STRING) {
|
||||
// we start out as not having multiple values, but this might change as we encounter them
|
||||
|
@ -949,7 +945,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
*/
|
||||
public void loadDimensionIterable(
|
||||
Iterable<String> oldDimensionOrder,
|
||||
Map<String, ColumnCapabilitiesImpl> oldColumnCapabilities
|
||||
Map<String, ColumnCapabilities> oldColumnCapabilities
|
||||
)
|
||||
{
|
||||
synchronized (dimensionDescs) {
|
||||
|
@ -958,19 +954,21 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
}
|
||||
for (String dim : oldDimensionOrder) {
|
||||
if (dimensionDescs.get(dim) == null) {
|
||||
ColumnCapabilitiesImpl capabilities = oldColumnCapabilities.get(dim);
|
||||
columnCapabilities.put(dim, capabilities);
|
||||
ColumnCapabilitiesImpl capabilities = ColumnCapabilitiesImpl.snapshot(
|
||||
oldColumnCapabilities.get(dim),
|
||||
IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC
|
||||
);
|
||||
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dim, capabilities, null);
|
||||
addNewDimension(dim, capabilities, handler);
|
||||
addNewDimension(dim, handler);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@GuardedBy("dimensionDescs")
|
||||
private DimensionDesc addNewDimension(String dim, ColumnCapabilitiesImpl capabilities, DimensionHandler handler)
|
||||
private DimensionDesc addNewDimension(String dim, DimensionHandler handler)
|
||||
{
|
||||
DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, capabilities, handler);
|
||||
DimensionDesc desc = new DimensionDesc(dimensionDescs.size(), dim, handler);
|
||||
dimensionDescs.put(dim, desc);
|
||||
dimensionDescsList.add(desc);
|
||||
return desc;
|
||||
|
@ -998,7 +996,10 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
@Nullable
|
||||
public ColumnCapabilities getCapabilities(String column)
|
||||
{
|
||||
return columnCapabilities.get(column);
|
||||
if (dimensionDescs.containsKey(column)) {
|
||||
return dimensionDescs.get(column).getCapabilities();
|
||||
}
|
||||
return timeAndMetricsColumnCapabilities.get(column);
|
||||
}
|
||||
|
||||
public Metadata getMetadata()
|
||||
|
@ -1080,15 +1081,13 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
{
|
||||
private final int index;
|
||||
private final String name;
|
||||
private final ColumnCapabilitiesImpl capabilities;
|
||||
private final DimensionHandler handler;
|
||||
private final DimensionIndexer indexer;
|
||||
|
||||
public DimensionDesc(int index, String name, ColumnCapabilitiesImpl capabilities, DimensionHandler handler)
|
||||
public DimensionDesc(int index, String name, DimensionHandler handler)
|
||||
{
|
||||
this.index = index;
|
||||
this.name = name;
|
||||
this.capabilities = capabilities;
|
||||
this.handler = handler;
|
||||
this.indexer = handler.makeIndexer();
|
||||
}
|
||||
|
@ -1103,9 +1102,9 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
return name;
|
||||
}
|
||||
|
||||
public ColumnCapabilitiesImpl getCapabilities()
|
||||
public ColumnCapabilities getCapabilities()
|
||||
{
|
||||
return capabilities;
|
||||
return indexer.getColumnCapabilities();
|
||||
}
|
||||
|
||||
public DimensionHandler getHandler()
|
||||
|
@ -1124,7 +1123,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
private final int index;
|
||||
private final String name;
|
||||
private final String type;
|
||||
private final ColumnCapabilitiesImpl capabilities;
|
||||
private final ColumnCapabilities capabilities;
|
||||
|
||||
public MetricDesc(int index, AggregatorFactory factory)
|
||||
{
|
||||
|
@ -1163,7 +1162,7 @@ public abstract class IncrementalIndex<AggregatorType> extends AbstractIndex imp
|
|||
return type;
|
||||
}
|
||||
|
||||
public ColumnCapabilitiesImpl getCapabilities()
|
||||
public ColumnCapabilities getCapabilities()
|
||||
{
|
||||
return capabilities;
|
||||
}
|
||||
|
|
|
@ -52,6 +52,62 @@ import java.util.Iterator;
|
|||
*/
|
||||
public class IncrementalIndexStorageAdapter implements StorageAdapter
|
||||
{
|
||||
private static final ColumnCapabilities.CoercionLogic STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC =
|
||||
new ColumnCapabilities.CoercionLogic()
|
||||
{
|
||||
@Override
|
||||
public boolean dictionaryEncoded()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean dictionaryValuesSorted()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean dictionaryValuesUnique()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean multipleValues()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
private static final ColumnCapabilities.CoercionLogic SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC =
|
||||
new ColumnCapabilities.CoercionLogic()
|
||||
{
|
||||
@Override
|
||||
public boolean dictionaryEncoded()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean dictionaryValuesSorted()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean dictionaryValuesUnique()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean multipleValues()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
final IncrementalIndex<?> index;
|
||||
|
||||
public IncrementalIndexStorageAdapter(IncrementalIndex<?> index)
|
||||
|
@ -154,7 +210,7 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter
|
|||
// to the StringDimensionIndexer so the selector built on top of it can produce values from the snapshot state of
|
||||
// multi-valuedness at cursor creation time, instead of the latest state, and getSnapshotColumnCapabilities could
|
||||
// be removed.
|
||||
return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), true);
|
||||
return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column), STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -165,7 +221,10 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter
|
|||
*/
|
||||
public ColumnCapabilities getSnapshotColumnCapabilities(String column)
|
||||
{
|
||||
return ColumnCapabilitiesImpl.snapshot(index.getCapabilities(column));
|
||||
return ColumnCapabilitiesImpl.snapshot(
|
||||
index.getCapabilities(column),
|
||||
SNAPSHOT_STORAGE_ADAPTER_CAPABILITIES_COERCE_LOGIC
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -83,9 +83,9 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe
|
|||
spec -> {
|
||||
final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
|
||||
if (holder == null
|
||||
|| !holder.getCapabilities().isDictionaryEncoded()
|
||||
|| holder.getCapabilities().isDictionaryEncoded().isFalse()
|
||||
|| holder.getCapabilities().getType() != ValueType.STRING
|
||||
|| !holder.getCapabilities().hasMultipleValues().isMaybeTrue()) {
|
||||
|| holder.getCapabilities().hasMultipleValues().isFalse()) {
|
||||
throw new ISE(
|
||||
"Column[%s] is not a multi-value string column, do not ask for a multi-value selector",
|
||||
spec.getDimension()
|
||||
|
@ -119,7 +119,7 @@ public class QueryableIndexVectorColumnSelectorFactory implements VectorColumnSe
|
|||
spec -> {
|
||||
final ColumnHolder holder = index.getColumnHolder(spec.getDimension());
|
||||
if (holder == null
|
||||
|| !holder.getCapabilities().isDictionaryEncoded()
|
||||
|| !holder.getCapabilities().isDictionaryEncoded().isTrue()
|
||||
|| holder.getCapabilities().getType() != ValueType.STRING) {
|
||||
// Asking for a single-value dimension selector on a non-string column gets you a bunch of nulls.
|
||||
return NilVectorSelector.create(offset);
|
||||
|
|
|
@ -153,8 +153,8 @@ public class ExpressionSelectors
|
|||
);
|
||||
} else if (capabilities != null
|
||||
&& capabilities.getType() == ValueType.STRING
|
||||
&& capabilities.isDictionaryEncoded()
|
||||
&& !capabilities.hasMultipleValues().isMaybeTrue()
|
||||
&& capabilities.isDictionaryEncoded().isTrue()
|
||||
&& capabilities.hasMultipleValues().isFalse()
|
||||
&& exprDetails.getArrayBindings().isEmpty()) {
|
||||
// Optimization for expressions that hit one scalar string column and nothing else.
|
||||
return new SingleStringInputCachingExpressionColumnValueSelector(
|
||||
|
@ -225,7 +225,7 @@ public class ExpressionSelectors
|
|||
// not treating it as an array and not wanting to output an array
|
||||
if (capabilities != null
|
||||
&& capabilities.getType() == ValueType.STRING
|
||||
&& capabilities.isDictionaryEncoded()
|
||||
&& capabilities.isDictionaryEncoded().isTrue()
|
||||
&& !capabilities.hasMultipleValues().isUnknown()
|
||||
&& !exprDetails.hasInputArrays()
|
||||
&& !exprDetails.isOutputArray()
|
||||
|
|
|
@ -138,7 +138,7 @@ public class LookupSegmentTest
|
|||
// reporting complete single-valued capabilities. It would be good to change this in the future, so query engines
|
||||
// running on top of lookups can take advantage of singly-valued optimizations.
|
||||
Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
|
||||
Assert.assertFalse(capabilities.isDictionaryEncoded());
|
||||
Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -151,7 +151,7 @@ public class LookupSegmentTest
|
|||
// running on top of lookups can take advantage of singly-valued optimizations.
|
||||
Assert.assertEquals(ValueType.STRING, capabilities.getType());
|
||||
Assert.assertTrue(capabilities.hasMultipleValues().isUnknown());
|
||||
Assert.assertFalse(capabilities.isDictionaryEncoded());
|
||||
Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -45,7 +45,6 @@ import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
|
|||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.CountAggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
|
||||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.apache.druid.segment.column.ColumnHolder;
|
||||
import org.apache.druid.segment.column.DictionaryEncodedColumn;
|
||||
import org.apache.druid.segment.column.StringDictionaryEncodedColumn;
|
||||
|
@ -2012,32 +2011,6 @@ public class IndexMergerTestBase extends InitializedNullHandlingTest
|
|||
Assert.assertEquals(-1, dictIdSeeker.seek(5));
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testCloser() throws Exception
|
||||
{
|
||||
final long timestamp = System.currentTimeMillis();
|
||||
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
|
||||
IncrementalIndexTest.populateIndex(timestamp, toPersist);
|
||||
ColumnCapabilitiesImpl capabilities = (ColumnCapabilitiesImpl) toPersist.getCapabilities("dim1");
|
||||
capabilities.setHasSpatialIndexes(true);
|
||||
|
||||
final File tempDir = temporaryFolder.newFolder();
|
||||
final File v8TmpDir = new File(tempDir, "v8-tmp");
|
||||
final File v9TmpDir = new File(tempDir, "v9-tmp");
|
||||
|
||||
try {
|
||||
indexMerger.persist(toPersist, tempDir, indexSpec, null);
|
||||
}
|
||||
finally {
|
||||
if (v8TmpDir.exists()) {
|
||||
Assert.fail("v8-tmp dir not clean.");
|
||||
}
|
||||
if (v9TmpDir.exists()) {
|
||||
Assert.fail("v9-tmp dir not clean.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiValueHandling() throws Exception
|
||||
{
|
||||
|
|
|
@ -150,20 +150,25 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
|
|||
ColumnCapabilities caps = INC_INDEX.getCapabilities("d1");
|
||||
Assert.assertEquals(ValueType.STRING, caps.getType());
|
||||
Assert.assertTrue(caps.hasBitmapIndexes());
|
||||
Assert.assertTrue(caps.isDictionaryEncoded());
|
||||
Assert.assertTrue(caps.isDictionaryEncoded().isMaybeTrue());
|
||||
Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
|
||||
// multi-value is unknown unless explicitly set to 'true'
|
||||
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
|
||||
// at index merge or query time we 'complete' the capabilities to take a snapshot of the current state,
|
||||
// coercing any 'UNKNOWN' values to false
|
||||
Assert.assertFalse(ColumnCapabilitiesImpl.snapshot(caps).hasMultipleValues().isMaybeTrue());
|
||||
Assert.assertFalse(
|
||||
ColumnCapabilitiesImpl.snapshot(caps, IndexMergerV9.DIMENSION_CAPABILITY_MERGE_LOGIC)
|
||||
.hasMultipleValues()
|
||||
.isMaybeTrue()
|
||||
);
|
||||
Assert.assertFalse(caps.hasSpatialIndexes());
|
||||
|
||||
caps = MMAP_INDEX.getColumnHolder("d1").getCapabilities();
|
||||
Assert.assertEquals(ValueType.STRING, caps.getType());
|
||||
Assert.assertTrue(caps.hasBitmapIndexes());
|
||||
Assert.assertTrue(caps.isDictionaryEncoded());
|
||||
Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
|
||||
|
@ -176,7 +181,7 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
|
|||
ColumnCapabilities caps = INC_INDEX.getCapabilities("d2");
|
||||
Assert.assertEquals(ValueType.STRING, caps.getType());
|
||||
Assert.assertTrue(caps.hasBitmapIndexes());
|
||||
Assert.assertTrue(caps.isDictionaryEncoded());
|
||||
Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertTrue(caps.hasMultipleValues().isTrue());
|
||||
|
@ -185,7 +190,7 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
|
|||
caps = MMAP_INDEX.getColumnHolder("d2").getCapabilities();
|
||||
Assert.assertEquals(ValueType.STRING, caps.getType());
|
||||
Assert.assertTrue(caps.hasBitmapIndexes());
|
||||
Assert.assertTrue(caps.isDictionaryEncoded());
|
||||
Assert.assertTrue(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertTrue(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertTrue(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertTrue(caps.hasMultipleValues().isTrue());
|
||||
|
@ -199,12 +204,11 @@ public class QueryableIndexColumnCapabilitiesTest extends InitializedNullHandlin
|
|||
assertNonStringColumnCapabilities(MMAP_INDEX.getColumnHolder("m4").getCapabilities(), ValueType.COMPLEX);
|
||||
}
|
||||
|
||||
|
||||
private void assertNonStringColumnCapabilities(ColumnCapabilities caps, ValueType valueType)
|
||||
{
|
||||
Assert.assertEquals(valueType, caps.getType());
|
||||
Assert.assertFalse(caps.hasBitmapIndexes());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
|
||||
|
|
|
@ -51,7 +51,7 @@ public class RowBasedColumnSelectorFactoryTest
|
|||
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, ColumnHolder.TIME_COLUMN_NAME);
|
||||
Assert.assertEquals(ValueType.LONG, caps.getType());
|
||||
Assert.assertFalse(caps.hasBitmapIndexes());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
|
||||
|
@ -65,7 +65,7 @@ public class RowBasedColumnSelectorFactoryTest
|
|||
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, STRING_COLUMN_NAME);
|
||||
Assert.assertEquals(ValueType.STRING, caps.getType());
|
||||
Assert.assertFalse(caps.hasBitmapIndexes());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
|
||||
|
@ -79,7 +79,7 @@ public class RowBasedColumnSelectorFactoryTest
|
|||
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, LONG_COLUMN_NAME);
|
||||
Assert.assertEquals(ValueType.LONG, caps.getType());
|
||||
Assert.assertFalse(caps.hasBitmapIndexes());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
|
||||
|
@ -93,7 +93,7 @@ public class RowBasedColumnSelectorFactoryTest
|
|||
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, FLOAT_COLUMN_NAME);
|
||||
Assert.assertEquals(ValueType.FLOAT, caps.getType());
|
||||
Assert.assertFalse(caps.hasBitmapIndexes());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
|
||||
|
@ -107,7 +107,7 @@ public class RowBasedColumnSelectorFactoryTest
|
|||
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, DOUBLE_COLUMN_NAME);
|
||||
Assert.assertEquals(ValueType.DOUBLE, caps.getType());
|
||||
Assert.assertFalse(caps.hasBitmapIndexes());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertFalse(caps.hasMultipleValues().isMaybeTrue());
|
||||
|
@ -121,7 +121,7 @@ public class RowBasedColumnSelectorFactoryTest
|
|||
RowBasedColumnSelectorFactory.getColumnCapabilities(ROW_SIGNATURE, COMPLEX_COLUMN_NAME);
|
||||
Assert.assertEquals(ValueType.COMPLEX, caps.getType());
|
||||
Assert.assertFalse(caps.hasBitmapIndexes());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
|
||||
|
|
|
@ -44,8 +44,7 @@ public class ColumnCapabilitiesImplTest
|
|||
ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class);
|
||||
|
||||
Assert.assertEquals(ValueType.COMPLEX, cc.getType());
|
||||
Assert.assertTrue(cc.isDictionaryEncoded());
|
||||
Assert.assertFalse(cc.isRunLengthEncoded());
|
||||
Assert.assertTrue(cc.isDictionaryEncoded().isTrue());
|
||||
Assert.assertTrue(cc.hasSpatialIndexes());
|
||||
Assert.assertTrue(cc.hasMultipleValues().isTrue());
|
||||
Assert.assertTrue(cc.hasBitmapIndexes());
|
||||
|
@ -69,8 +68,7 @@ public class ColumnCapabilitiesImplTest
|
|||
ColumnCapabilities cc = mapper.readValue(json, ColumnCapabilitiesImpl.class);
|
||||
|
||||
Assert.assertEquals(ValueType.COMPLEX, cc.getType());
|
||||
Assert.assertTrue(cc.isDictionaryEncoded());
|
||||
Assert.assertTrue(cc.isRunLengthEncoded());
|
||||
Assert.assertTrue(cc.isDictionaryEncoded().isTrue());
|
||||
Assert.assertTrue(cc.hasSpatialIndexes());
|
||||
Assert.assertTrue(cc.hasMultipleValues().isTrue());
|
||||
Assert.assertTrue(cc.hasBitmapIndexes());
|
||||
|
|
|
@ -200,7 +200,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag
|
|||
|
||||
Assert.assertEquals(ValueType.STRING, capabilities.getType());
|
||||
Assert.assertTrue(capabilities.hasBitmapIndexes());
|
||||
Assert.assertTrue(capabilities.isDictionaryEncoded());
|
||||
Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
|
||||
Assert.assertTrue(capabilities.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertTrue(capabilities.areDictionaryValuesUnique().isTrue());
|
||||
}
|
||||
|
@ -216,7 +216,7 @@ public class HashJoinSegmentStorageAdapterTest extends BaseHashJoinSegmentStorag
|
|||
Assert.assertFalse(capabilities.hasBitmapIndexes());
|
||||
Assert.assertFalse(capabilities.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertFalse(capabilities.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertTrue(capabilities.isDictionaryEncoded());
|
||||
Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -137,7 +137,7 @@ public class IndexedTableJoinableTest
|
|||
{
|
||||
final ColumnCapabilities capabilities = target.getColumnCapabilities("str");
|
||||
Assert.assertEquals(ValueType.STRING, capabilities.getType());
|
||||
Assert.assertTrue(capabilities.isDictionaryEncoded());
|
||||
Assert.assertTrue(capabilities.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(capabilities.hasBitmapIndexes());
|
||||
Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
|
||||
Assert.assertFalse(capabilities.hasSpatialIndexes());
|
||||
|
@ -148,7 +148,7 @@ public class IndexedTableJoinableTest
|
|||
{
|
||||
final ColumnCapabilities capabilities = target.getColumnCapabilities("long");
|
||||
Assert.assertEquals(ValueType.LONG, capabilities.getType());
|
||||
Assert.assertFalse(capabilities.isDictionaryEncoded());
|
||||
Assert.assertFalse(capabilities.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(capabilities.hasBitmapIndexes());
|
||||
Assert.assertFalse(capabilities.hasMultipleValues().isMaybeTrue());
|
||||
Assert.assertFalse(capabilities.hasSpatialIndexes());
|
||||
|
|
|
@ -21,20 +21,41 @@ package org.apache.druid.segment.virtual;
|
|||
|
||||
import com.google.common.base.Supplier;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.common.guava.SettableSupplier;
|
||||
import org.apache.druid.data.input.MapBasedInputRow;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
import org.apache.druid.data.input.impl.TimestampSpec;
|
||||
import org.apache.druid.java.util.common.DateTimes;
|
||||
import org.apache.druid.java.util.common.Intervals;
|
||||
import org.apache.druid.java.util.common.granularity.Granularities;
|
||||
import org.apache.druid.java.util.common.guava.Sequence;
|
||||
import org.apache.druid.math.expr.ExprEval;
|
||||
import org.apache.druid.math.expr.ExprMacroTable;
|
||||
import org.apache.druid.math.expr.Parser;
|
||||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.apache.druid.query.aggregation.CountAggregatorFactory;
|
||||
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
|
||||
import org.apache.druid.segment.BaseSingleValueDimensionSelector;
|
||||
import org.apache.druid.segment.ColumnValueSelector;
|
||||
import org.apache.druid.segment.Cursor;
|
||||
import org.apache.druid.segment.DimensionSelector;
|
||||
import org.apache.druid.segment.TestObjectColumnSelector;
|
||||
import org.apache.druid.segment.VirtualColumns;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
|
||||
import org.apache.druid.segment.incremental.IndexSizeExceededException;
|
||||
import org.apache.druid.testing.InitializedNullHandlingTest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class ExpressionColumnValueSelectorTest
|
||||
public class ExpressionSelectorsTest extends InitializedNullHandlingTest
|
||||
{
|
||||
@Test
|
||||
public void testSupplierFromDimensionSelector()
|
||||
|
@ -231,6 +252,86 @@ public class ExpressionColumnValueSelectorTest
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncrementIndexStringSelector() throws IndexSizeExceededException
|
||||
{
|
||||
// This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
|
||||
// DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
|
||||
// exception trying to make an optimized string expression selector that was not appropriate to use for the
|
||||
// underlying dimension selector.
|
||||
// This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the
|
||||
// conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details
|
||||
IncrementalIndexSchema schema = new IncrementalIndexSchema(
|
||||
0,
|
||||
new TimestampSpec("time", "millis", DateTimes.nowUtc()),
|
||||
Granularities.NONE,
|
||||
VirtualColumns.EMPTY,
|
||||
DimensionsSpec.EMPTY,
|
||||
new AggregatorFactory[]{new CountAggregatorFactory("count")},
|
||||
true
|
||||
);
|
||||
|
||||
IncrementalIndex index = new IncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).buildOnheap();
|
||||
index.add(
|
||||
new MapBasedInputRow(
|
||||
DateTimes.nowUtc().getMillis(),
|
||||
ImmutableList.of("x"),
|
||||
ImmutableMap.of("x", "foo")
|
||||
)
|
||||
);
|
||||
index.add(
|
||||
new MapBasedInputRow(
|
||||
DateTimes.nowUtc().plusMillis(1000).getMillis(),
|
||||
ImmutableList.of("y"),
|
||||
ImmutableMap.of("y", "foo")
|
||||
)
|
||||
);
|
||||
|
||||
IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
|
||||
|
||||
Sequence<Cursor> cursors = adapter.makeCursors(
|
||||
null,
|
||||
Intervals.ETERNITY,
|
||||
VirtualColumns.EMPTY,
|
||||
Granularities.ALL,
|
||||
false,
|
||||
null
|
||||
);
|
||||
int rowsProcessed = cursors.map(cursor -> {
|
||||
DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector(
|
||||
cursor.getColumnSelectorFactory(),
|
||||
Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()),
|
||||
null
|
||||
);
|
||||
DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector(
|
||||
cursor.getColumnSelectorFactory(),
|
||||
Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()),
|
||||
null
|
||||
);
|
||||
int rowCount = 0;
|
||||
while (!cursor.isDone()) {
|
||||
Object x = xExprSelector.getObject();
|
||||
Object y = yExprSelector.getObject();
|
||||
List<String> expectedFoo = Collections.singletonList("foofoo");
|
||||
List<String> expectedNull = NullHandling.replaceWithDefault()
|
||||
? Collections.singletonList("foo")
|
||||
: Collections.singletonList(null);
|
||||
if (rowCount == 0) {
|
||||
Assert.assertEquals(expectedFoo, x);
|
||||
Assert.assertEquals(expectedNull, y);
|
||||
} else {
|
||||
Assert.assertEquals(expectedNull, x);
|
||||
Assert.assertEquals(expectedFoo, y);
|
||||
}
|
||||
rowCount++;
|
||||
cursor.advance();
|
||||
}
|
||||
return rowCount;
|
||||
}).accumulate(0, (in, acc) -> in + acc);
|
||||
|
||||
Assert.assertEquals(2, rowsProcessed);
|
||||
}
|
||||
|
||||
private static DimensionSelector dimensionSelectorFromSupplier(
|
||||
final Supplier<String> supplier
|
||||
)
|
|
@ -810,7 +810,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
|
|||
ColumnCapabilities caps = X_PLUS_Y.capabilities("expr");
|
||||
Assert.assertEquals(ValueType.FLOAT, caps.getType());
|
||||
Assert.assertFalse(caps.hasBitmapIndexes());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
|
||||
|
@ -820,7 +820,7 @@ public class ExpressionVirtualColumnTest extends InitializedNullHandlingTest
|
|||
caps = Z_CONCAT_X.capabilities("expr");
|
||||
Assert.assertEquals(ValueType.STRING, caps.getType());
|
||||
Assert.assertFalse(caps.hasBitmapIndexes());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded());
|
||||
Assert.assertFalse(caps.isDictionaryEncoded().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesSorted().isTrue());
|
||||
Assert.assertFalse(caps.areDictionaryValuesUnique().isTrue());
|
||||
Assert.assertTrue(caps.hasMultipleValues().isUnknown());
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.druid.java.util.common.ISE;
|
|||
import org.apache.druid.query.aggregation.AggregatorFactory;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.ReferenceCountingSegment;
|
||||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.apache.druid.segment.column.ColumnCapabilities;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexAddResult;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||
|
@ -377,7 +377,7 @@ public class Sink implements Iterable<FireHydrant>, Overshadowable<Sink>
|
|||
FireHydrant lastHydrant = hydrants.get(numHydrants - 1);
|
||||
newCount = lastHydrant.getCount() + 1;
|
||||
if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) {
|
||||
Map<String, ColumnCapabilitiesImpl> oldCapabilities;
|
||||
Map<String, ColumnCapabilities> oldCapabilities;
|
||||
if (lastHydrant.hasSwapped()) {
|
||||
oldCapabilities = new HashMap<>();
|
||||
ReferenceCountingSegment segment = lastHydrant.getIncrementedSegment();
|
||||
|
@ -385,7 +385,7 @@ public class Sink implements Iterable<FireHydrant>, Overshadowable<Sink>
|
|||
QueryableIndex oldIndex = segment.asQueryableIndex();
|
||||
for (String dim : oldIndex.getAvailableDimensions()) {
|
||||
dimOrder.add(dim);
|
||||
oldCapabilities.put(dim, (ColumnCapabilitiesImpl) oldIndex.getColumnHolder(dim).getCapabilities());
|
||||
oldCapabilities.put(dim, oldIndex.getColumnHolder(dim).getCapabilities());
|
||||
}
|
||||
}
|
||||
finally {
|
||||
|
|
Loading…
Reference in New Issue