diff --git a/api/src/main/java/io/druid/data/input/impl/DimensionSchema.java b/api/src/main/java/io/druid/data/input/impl/DimensionSchema.java index a40786a30da..25384342af9 100644 --- a/api/src/main/java/io/druid/data/input/impl/DimensionSchema.java +++ b/api/src/main/java/io/druid/data/input/impl/DimensionSchema.java @@ -67,18 +67,60 @@ public abstract class DimensionSchema } } - private final String name; + public static enum MultiValueHandling + { + SORTED_ARRAY, + SORTED_SET, + ARRAY { + @Override + public boolean needSorting() { return false;} + }; - protected DimensionSchema(String name) + public boolean needSorting() + { + return true; + } + + @Override + @JsonValue + public String toString() + { + return name().toUpperCase(); + } + + @JsonCreator + public static MultiValueHandling fromString(String name) + { + return name == null ? ofDefault() : valueOf(name.toUpperCase()); + } + + // this can be system configuration + public static MultiValueHandling ofDefault() + { + return SORTED_ARRAY; + } + } + + private final String name; + private final MultiValueHandling multiValueHandling; + + protected DimensionSchema(String name, MultiValueHandling multiValueHandling) { this.name = Preconditions.checkNotNull(name, "Dimension name cannot be null."); + this.multiValueHandling = multiValueHandling; } @JsonProperty public String getName() { return name; - }; + } + + @JsonProperty + public MultiValueHandling getMultiValueHandling() + { + return multiValueHandling; + } @JsonIgnore public abstract String getTypeName(); diff --git a/api/src/main/java/io/druid/data/input/impl/DimensionsSpec.java b/api/src/main/java/io/druid/data/input/impl/DimensionsSpec.java index f7d30dae6b1..7cf15542831 100644 --- a/api/src/main/java/io/druid/data/input/impl/DimensionsSpec.java +++ b/api/src/main/java/io/druid/data/input/impl/DimensionsSpec.java @@ -42,7 +42,20 @@ public class DimensionsSpec private final Set dimensionExclusions; private final Map dimensionSchemaMap; + public static DimensionsSpec ofEmpty() + { + return new DimensionsSpec(null, null, null); + } + public static List getDefaultSchemas(List dimNames) + { + return getDefaultSchemas(dimNames, DimensionSchema.MultiValueHandling.ofDefault()); + } + + public static List getDefaultSchemas( + final List dimNames, + final DimensionSchema.MultiValueHandling multiValueHandling + ) { return Lists.transform( dimNames, @@ -51,7 +64,7 @@ public class DimensionsSpec @Override public DimensionSchema apply(String input) { - return new StringDimensionSchema(input); + return new StringDimensionSchema(input, multiValueHandling); } } ); diff --git a/api/src/main/java/io/druid/data/input/impl/FloatDimensionSchema.java b/api/src/main/java/io/druid/data/input/impl/FloatDimensionSchema.java index db3b04a631b..866fc282f40 100644 --- a/api/src/main/java/io/druid/data/input/impl/FloatDimensionSchema.java +++ b/api/src/main/java/io/druid/data/input/impl/FloatDimensionSchema.java @@ -30,7 +30,7 @@ public class FloatDimensionSchema extends DimensionSchema @JsonProperty("name") String name ) { - super(name); + super(name, null); } @Override diff --git a/api/src/main/java/io/druid/data/input/impl/LongDimensionSchema.java b/api/src/main/java/io/druid/data/input/impl/LongDimensionSchema.java index 4fd77d46924..ed5659739ed 100644 --- a/api/src/main/java/io/druid/data/input/impl/LongDimensionSchema.java +++ b/api/src/main/java/io/druid/data/input/impl/LongDimensionSchema.java @@ -30,7 +30,7 @@ public class LongDimensionSchema extends DimensionSchema @JsonProperty("name") String name ) { - super(name); + super(name, null); } @Override diff --git a/api/src/main/java/io/druid/data/input/impl/NewSpatialDimensionSchema.java b/api/src/main/java/io/druid/data/input/impl/NewSpatialDimensionSchema.java index ae834262bb9..e8f1c8ee313 100644 --- a/api/src/main/java/io/druid/data/input/impl/NewSpatialDimensionSchema.java +++ b/api/src/main/java/io/druid/data/input/impl/NewSpatialDimensionSchema.java @@ -41,7 +41,7 @@ public class NewSpatialDimensionSchema extends DimensionSchema @JsonProperty("dims") List dims ) { - super(name); + super(name, null); this.dims = dims; } diff --git a/api/src/main/java/io/druid/data/input/impl/StringDimensionSchema.java b/api/src/main/java/io/druid/data/input/impl/StringDimensionSchema.java index 02fef40dda4..054dfb3ae69 100644 --- a/api/src/main/java/io/druid/data/input/impl/StringDimensionSchema.java +++ b/api/src/main/java/io/druid/data/input/impl/StringDimensionSchema.java @@ -26,16 +26,23 @@ import com.fasterxml.jackson.annotation.JsonProperty; public class StringDimensionSchema extends DimensionSchema { @JsonCreator - public static StringDimensionSchema create(String name) { + public static StringDimensionSchema create(String name) + { return new StringDimensionSchema(name); } @JsonCreator public StringDimensionSchema( - @JsonProperty("name") String name + @JsonProperty("name") String name, + @JsonProperty("multiValueHandling") MultiValueHandling multiValueHandling ) { - super(name); + super(name, multiValueHandling); + } + + public StringDimensionSchema(String name) + { + this(name, null); } @Override diff --git a/processing/src/main/java/io/druid/segment/DimensionHandlerUtil.java b/processing/src/main/java/io/druid/segment/DimensionHandlerUtil.java index 41d4c6595b5..87376e149ff 100644 --- a/processing/src/main/java/io/druid/segment/DimensionHandlerUtil.java +++ b/processing/src/main/java/io/druid/segment/DimensionHandlerUtil.java @@ -20,6 +20,7 @@ package io.druid.segment; import io.druid.java.util.common.IAE; +import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ValueType; @@ -27,14 +28,20 @@ public final class DimensionHandlerUtil { private DimensionHandlerUtil() {} - public static DimensionHandler getHandlerFromCapabilities(String dimensionName, ColumnCapabilities capabilities) + public static DimensionHandler getHandlerFromCapabilities( + String dimensionName, + ColumnCapabilities capabilities, + MultiValueHandling multiValueHandling + ) { DimensionHandler handler = null; if (capabilities.getType() == ValueType.STRING) { if (!capabilities.isDictionaryEncoded() || !capabilities.hasBitmapIndexes()) { throw new IAE("String column must have dictionary encoding and bitmap index."); } - handler = new StringDimensionHandler(dimensionName); + // use default behavior + multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling; + handler = new StringDimensionHandler(dimensionName, multiValueHandling); } if (handler == null) { throw new IAE("Could not create handler from invalid column type: " + capabilities.getType()); diff --git a/processing/src/main/java/io/druid/segment/DimensionIndexer.java b/processing/src/main/java/io/druid/segment/DimensionIndexer.java index 38e6235989e..4ac8672632f 100644 --- a/processing/src/main/java/io/druid/segment/DimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/DimensionIndexer.java @@ -166,7 +166,7 @@ public interface DimensionIndexer, E * Get the minimum dimension value seen by this indexer. * * NOTE: - * On an in-memory segment (IncrementaIndex), we can determine min/max values by looking at the stream of + * On an in-memory segment (IncrementalIndex), we can determine min/max values by looking at the stream of * row values seen in calls to processSingleRowValToIndexKey(). * * However, on a disk-backed segment (QueryableIndex), the numeric dimensions do not currently have any @@ -259,9 +259,11 @@ public interface DimensionIndexer, E */ public int getUnsortedEncodedArrayHashCode(EncodedTypeArray key); + public static final boolean LIST = true; + public static final boolean ARRAY = false; /** - * Given a row value array from a TimeAndDims key, as described in the documentatiion for compareUnsortedEncodedArrays(), + * Given a row value array from a TimeAndDims key, as described in the documentation for compareUnsortedEncodedArrays(), * convert the unsorted encoded values to a list or array of actual values. * * If the key has one element, this method should return a single Object instead of an array or list, ignoring @@ -275,7 +277,7 @@ public interface DimensionIndexer, E /** - * Given a row value array from a TimeAndDims key, as described in the documentatiion for compareUnsortedEncodedArrays(), + * Given a row value array from a TimeAndDims key, as described in the documentation for compareUnsortedEncodedArrays(), * convert the unsorted encoded values to an array of sorted encoded values (i.e., sorted by their corresponding actual values) * * @param key dimension value array from a TimeAndDims key diff --git a/processing/src/main/java/io/druid/segment/IndexMerger.java b/processing/src/main/java/io/druid/segment/IndexMerger.java index 298c64bd4e8..b42c190c562 100644 --- a/processing/src/main/java/io/druid/segment/IndexMerger.java +++ b/processing/src/main/java/io/druid/segment/IndexMerger.java @@ -912,7 +912,7 @@ public class IndexMerger for (int i = 0; i < mergedDimensions.size(); i++) { ColumnCapabilities capabilities = dimCapabilities.get(i); String dimName = mergedDimensions.get(i); - handlers[i] = DimensionHandlerUtil.getHandlerFromCapabilities(dimName, capabilities); + handlers[i] = DimensionHandlerUtil.getHandlerFromCapabilities(dimName, capabilities, null); } return handlers; } diff --git a/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java b/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java index af4fe719b3b..76261309299 100644 --- a/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java +++ b/processing/src/main/java/io/druid/segment/SimpleQueryableIndex.java @@ -124,7 +124,7 @@ public class SimpleQueryableIndex implements QueryableIndex { for (String dim : availableDimensions) { ColumnCapabilities capabilities = getColumn(dim).getCapabilities(); - DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dim, capabilities); + DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dim, capabilities, null); dimensionHandlers.put(dim, handler); } } diff --git a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java index 4bf635d034e..44253f68042 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionHandler.java @@ -19,9 +19,8 @@ package io.druid.segment; -import com.google.common.base.Function; import com.google.common.primitives.Ints; -import io.druid.java.util.common.logger.Logger; +import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.DictionaryEncodedColumn; @@ -37,13 +36,13 @@ import java.util.Comparator; public class StringDimensionHandler implements DimensionHandler { - private static final Logger log = new Logger(StringDimensionHandler.class); - private final String dimensionName; + private final MultiValueHandling multiValueHandling; - public StringDimensionHandler(String dimensionName) + public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling) { this.dimensionName = dimensionName; + this.multiValueHandling = multiValueHandling; } @Override @@ -193,7 +192,7 @@ public class StringDimensionHandler implements DimensionHandler makeIndexer() { - return new StringDimensionIndexer(); + return new StringDimensionIndexer(multiValueHandling); } @Override @@ -220,21 +219,6 @@ public class StringDimensionHandler implements DimensionHandler STRING_TRANSFORMER = new Function() - { - @Override - public String apply(final Object o) - { - if (o == null) { - return null; - } - if (o instanceof String) { - return (String) o; - } - return o.toString(); - } - }; - public static final Comparator ENCODED_COMPARATOR = new Comparator() { @Override @@ -250,18 +234,4 @@ public class StringDimensionHandler implements DimensionHandler UNENCODED_COMPARATOR = new Comparator() - { - @Override - public int compare(String o1, String o2) - { - if (o1 == null) { - return o2 == null ? 0 : -1; - } - if (o2 == null) { - return 1; - } - return o1.compareTo(o2); - } - }; } diff --git a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java index 76383c078ca..31866ad449e 100644 --- a/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/io/druid/segment/StringDimensionIndexer.java @@ -19,6 +19,7 @@ package io.druid.segment; +import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Strings; import com.google.common.collect.Lists; @@ -26,7 +27,7 @@ import com.google.common.collect.Maps; import com.google.common.primitives.Ints; import com.metamx.collections.bitmap.BitmapFactory; import com.metamx.collections.bitmap.MutableBitmap; -import io.druid.java.util.common.logger.Logger; +import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; import io.druid.query.dimension.DimensionSpec; import io.druid.query.extraction.ExtractionFn; import io.druid.query.filter.DruidPredicateFactory; @@ -45,13 +46,42 @@ import it.unimi.dsi.fastutil.ints.IntLists; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.Map; public class StringDimensionIndexer implements DimensionIndexer { - private static final Logger log = new Logger(StringDimensionIndexer.class); + public static final Function STRING_TRANSFORMER = new Function() + { + @Override + public String apply(final Object o) + { + if (o == null) { + return null; + } + if (o instanceof String) { + return (String) o; + } + return o.toString(); + } + }; + + public static final Comparator UNENCODED_COMPARATOR = new Comparator() + { + @Override + public int compare(String o1, String o2) + { + if (o1 == null) { + return o2 == null ? 0 : -1; + } + if (o2 == null) { + return 1; + } + return o1.compareTo(o2); + } + }; private static class DimensionDictionary { @@ -176,12 +206,14 @@ public class StringDimensionIndexer implements DimensionIndexer dimValuesList = (List) dimValues; + if (dimValuesList.size() == 1) { + encodedDimensionValues = new int[]{dimLookup.add(STRING_TRANSFORMER.apply(dimValuesList.get(0)))}; + } else { + final String[] dimensionValues = new String[dimValuesList.size()]; + for (int i = 0; i < dimValuesList.size(); i++) { + dimensionValues[i] = STRING_TRANSFORMER.apply(dimValuesList.get(i)); + } + if (multiValueHandling.needSorting()) { + // Sort multival row by their unencoded values first. + Arrays.sort(dimensionValues, UNENCODED_COMPARATOR); + } - // Sort multival row by their unencoded values first. - final String[] dimensionValues = new String[dimValuesList.size()]; - for (int i = 0; i < dimValuesList.size(); i++) { - dimensionValues[i] = StringDimensionHandler.STRING_TRANSFORMER.apply(dimValuesList.get(i)); - } - Arrays.sort(dimensionValues, StringDimensionHandler.UNENCODED_COMPARATOR); + final int[] retVal = new int[dimensionValues.length]; - encodedDimensionValues = new int[dimensionValues.length]; - for (int i = 0; i < dimensionValues.length; i++) { - encodedDimensionValues[i] = dimLookup.add(dimensionValues[i]); + int prevId = -1; + int pos = 0; + for (int i = 0; i < dimensionValues.length; i++) { + if (multiValueHandling != MultiValueHandling.SORTED_SET) { + retVal[pos++] = dimLookup.add(dimensionValues[i]); + continue; + } + int index = dimLookup.add(dimensionValues[i]); + if (index != prevId) { + prevId = retVal[pos++] = index; + } + } + + encodedDimensionValues = pos == retVal.length ? retVal : Arrays.copyOf(retVal, pos); } } else { - String transformedVal = StringDimensionHandler.STRING_TRANSFORMER.apply(dimValues); - encodedDimensionValues = new int[]{dimLookup.add(transformedVal)}; + encodedDimensionValues = new int[]{dimLookup.add(STRING_TRANSFORMER.apply(dimValues))}; } // If dictionary size has changed, the sorted lookup is no longer valid. @@ -223,21 +271,18 @@ public class StringDimensionIndexer implements DimensionIndexer getSortedIndexedValues() { - updateSortedLookup(); return new Indexed() { @Override @@ -491,7 +536,7 @@ public class StringDimensionIndexer implements DimensionIndexer implements Iterable, if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) { capabilities.setHasSpatialIndexes(true); } else { - DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dimName, capabilities); + DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities( + dimName, + capabilities, + dimSchema.getMultiValueHandling() + ); addNewDimension(dimName, capabilities, handler); } columnCapabilities.put(dimName, capabilities); @@ -556,7 +560,6 @@ public abstract class IncrementalIndex implements Iterable, for (String dimension : rowDimensions) { boolean wasNewDim = false; ColumnCapabilitiesImpl capabilities; - ValueType valType = null; DimensionDesc desc = dimensionDescs.get(dimension); if (desc != null) { capabilities = desc.getCapabilities(); @@ -571,7 +574,7 @@ public abstract class IncrementalIndex implements Iterable, capabilities.setHasBitmapIndexes(true); columnCapabilities.put(dimension, capabilities); } - DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dimension, capabilities); + DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dimension, capabilities, null); desc = addNewDimension(dimension, capabilities, handler); } DimensionHandler handler = desc.getHandler(); @@ -751,7 +754,7 @@ public abstract class IncrementalIndex implements Iterable, if (dimensionDescs.get(dim) == null) { ColumnCapabilitiesImpl capabilities = oldColumnCapabilities.get(dim); columnCapabilities.put(dim, capabilities); - DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dim, capabilities); + DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dim, capabilities, null); addNewDimension(dim, capabilities, handler); } } @@ -852,7 +855,7 @@ public abstract class IncrementalIndex implements Iterable, continue; } final DimensionIndexer indexer = dimensionDesc.getIndexer(); - Object rowVals = indexer.convertUnsortedEncodedArrayToActualArrayOrList(dim, true); + Object rowVals = indexer.convertUnsortedEncodedArrayToActualArrayOrList(dim, DimensionIndexer.LIST); theVals.put(dimensionName, rowVals); } diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexSchema.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexSchema.java index 282249a0d7b..b3bbfb18789 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexSchema.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexSchema.java @@ -135,7 +135,7 @@ public class IncrementalIndexSchema public Builder withDimensionsSpec(DimensionsSpec dimensionsSpec) { - this.dimensionsSpec = dimensionsSpec; + this.dimensionsSpec = dimensionsSpec == null ? DimensionsSpec.ofEmpty() : dimensionsSpec; return this; } diff --git a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java index 0c3d892ec25..df93cf1eaf2 100644 --- a/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java +++ b/processing/src/main/java/io/druid/segment/incremental/IncrementalIndexStorageAdapter.java @@ -530,8 +530,9 @@ public class IncrementalIndexStorageAdapter implements StorageAdapter return null; } - Object dimVals = indexer.convertUnsortedEncodedArrayToActualArrayOrList(dims[dimensionIndex], false); - return dimVals; + return indexer.convertUnsortedEncodedArrayToActualArrayOrList( + dims[dimensionIndex], DimensionIndexer.ARRAY + ); } }; } diff --git a/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java b/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java index 0b898878d90..4604b9ce8e0 100644 --- a/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java +++ b/processing/src/main/java/io/druid/segment/incremental/OnheapIncrementalIndex.java @@ -22,6 +22,7 @@ package io.druid.segment.incremental; import com.google.common.base.Supplier; import com.google.common.collect.Maps; import io.druid.data.input.InputRow; +import io.druid.data.input.impl.DimensionsSpec; import io.druid.granularity.QueryGranularity; import io.druid.java.util.common.logger.Logger; import io.druid.java.util.common.parsers.ParseException; @@ -97,13 +98,15 @@ public class OnheapIncrementalIndex extends IncrementalIndex long minTimestamp, QueryGranularity gran, boolean rollup, - final AggregatorFactory[] metrics, + DimensionsSpec dimensionsSpec, + AggregatorFactory[] metrics, int maxRowCount ) { this( new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp) .withQueryGranularity(gran) + .withDimensionsSpec(dimensionsSpec) .withMetrics(metrics) .withRollup(rollup) .build(), @@ -125,6 +128,7 @@ public class OnheapIncrementalIndex extends IncrementalIndex minTimestamp, gran, IncrementalIndexSchema.DEFAULT_ROLLUP, + null, metrics, maxRowCount ); diff --git a/processing/src/test/java/io/druid/segment/IndexMergerTest.java b/processing/src/test/java/io/druid/segment/IndexMergerTest.java index 1fd0a312b2c..a6651c09d8f 100644 --- a/processing/src/test/java/io/druid/segment/IndexMergerTest.java +++ b/processing/src/test/java/io/druid/segment/IndexMergerTest.java @@ -28,7 +28,10 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.primitives.Ints; import com.metamx.collections.bitmap.RoaringBitmapFactory; +import io.druid.data.input.InputRow; import io.druid.data.input.MapBasedInputRow; +import io.druid.data.input.impl.DimensionSchema; +import io.druid.data.input.impl.DimensionSchema.MultiValueHandling; import io.druid.data.input.impl.DimensionsSpec; import io.druid.granularity.QueryGranularities; import io.druid.java.util.common.IAE; @@ -39,6 +42,7 @@ import io.druid.query.aggregation.CountAggregatorFactory; import io.druid.query.aggregation.LongSumAggregatorFactory; import io.druid.segment.column.Column; import io.druid.segment.column.ColumnCapabilitiesImpl; +import io.druid.segment.column.DictionaryEncodedColumn; import io.druid.segment.column.SimpleDictionaryEncodedColumn; import io.druid.segment.data.BitmapSerdeFactory; import io.druid.segment.data.CompressedObjectStrategy; @@ -795,11 +799,21 @@ public class IndexMergerTest return; } - Object encodedColumn = index.getColumn("dim2").getDictionaryEncoding(); - Field field = SimpleDictionaryEncodedColumn.class.getDeclaredField("column"); - field.setAccessible(true); + DictionaryEncodedColumn encodedColumn = index.getColumn("dim2").getDictionaryEncoding(); + Object obj; + if (encodedColumn.hasMultipleValues()) { + Field field = SimpleDictionaryEncodedColumn.class.getDeclaredField("multiValueColumn"); + field.setAccessible(true); - Object obj = field.get(encodedColumn); + obj = field.get(encodedColumn); + } else { + Field field = SimpleDictionaryEncodedColumn.class.getDeclaredField("column"); + field.setAccessible(true); + + obj = field.get(encodedColumn); + } + // CompressedVSizeIntsIndexedSupplier$CompressedByteSizeIndexedInts + // CompressedVSizeIndexedSupplier$CompressedVSizeIndexed Field compressedSupplierField = obj.getClass().getDeclaredField("this$0"); compressedSupplierField.setAccessible(true); @@ -1716,11 +1730,13 @@ public class IndexMergerTest IncrementalIndex index1 = IncrementalIndexTest.createIndex(new AggregatorFactory[]{ new LongSumAggregatorFactory("A", "A") }); - index1.add(new MapBasedInputRow( - 1L, - Lists.newArrayList("d1", "d2"), - ImmutableMap.of("d1", "a", "d2", "z", "A", 1) - )); + index1.add( + new MapBasedInputRow( + 1L, + Lists.newArrayList("d1", "d2"), + ImmutableMap.of("d1", "a", "d2", "z", "A", 1) + ) + ); closer.closeLater(index1); IncrementalIndex index2 = IncrementalIndexTest.createIndex(new AggregatorFactory[]{ @@ -2170,4 +2186,113 @@ public class IndexMergerTest } } } + + @Test + public void testMultiValueHandling() throws Exception + { + InputRow[] rows = new InputRow[]{ + new MapBasedInputRow( + 1, + Arrays.asList("dim1", "dim2"), + ImmutableMap.of( + "dim1", Arrays.asList("x", "a", "a", "b"), + "dim2", Arrays.asList("a", "x", "b", "x") + ) + ), + new MapBasedInputRow( + 1, + Arrays.asList("dim1", "dim2"), + ImmutableMap.of( + "dim1", Arrays.asList("a", "b", "x"), + "dim2", Arrays.asList("x", "a", "b") + ) + ) + }; + + List schema; + QueryableIndex index; + QueryableIndexIndexableAdapter adapter; + List boatList; + + // xaab-axbx + abx-xab --> aabx-abxx + abx-abx --> abx-abx + aabx-abxx + schema = DimensionsSpec.getDefaultSchemas(Arrays.asList("dim1", "dim2"), MultiValueHandling.SORTED_ARRAY); + index = persistAndLoad(schema, rows); + adapter = new QueryableIndexIndexableAdapter(index); + boatList = ImmutableList.copyOf(adapter.getRows()); + + Assert.assertEquals(2, index.getColumn(Column.TIME_COLUMN_NAME).getLength()); + Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index.getAvailableDimensions())); + Assert.assertEquals(3, index.getColumnNames().size()); + + Assert.assertEquals(2, boatList.size()); + Assert.assertArrayEquals(new int[][]{{0, 1, 2}, {0, 1, 2}}, boatList.get(0).getDims()); + Assert.assertArrayEquals(new int[][]{{0, 0, 1, 2}, {0, 1, 2, 2}}, boatList.get(1).getDims()); + + checkBitmapIndex(new ArrayList(), adapter.getBitmapIndex("dim1", "")); + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "a")); + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "b")); + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "x")); + + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "a")); + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "b")); + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "x")); + + // xaab-axbx + abx-xab --> abx-abx + abx-abx --> abx-abx + schema = DimensionsSpec.getDefaultSchemas(Arrays.asList("dim1", "dim2"), MultiValueHandling.SORTED_SET); + index = persistAndLoad(schema, rows); + + Assert.assertEquals(1, index.getColumn(Column.TIME_COLUMN_NAME).getLength()); + Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index.getAvailableDimensions())); + Assert.assertEquals(3, index.getColumnNames().size()); + + adapter = new QueryableIndexIndexableAdapter(index); + boatList = ImmutableList.copyOf(adapter.getRows()); + + Assert.assertEquals(1, boatList.size()); + Assert.assertArrayEquals(new int[][]{{0, 1, 2}, {0, 1, 2}}, boatList.get(0).getDims()); + + checkBitmapIndex(new ArrayList(), adapter.getBitmapIndex("dim1", "")); + checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim1", "a")); + checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim1", "b")); + checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim1", "x")); + + checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim2", "a")); + checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim2", "b")); + checkBitmapIndex(Lists.newArrayList(0), adapter.getBitmapIndex("dim2", "x")); + + // xaab-axbx + abx-xab --> abx-xab + xaab-axbx + schema = DimensionsSpec.getDefaultSchemas(Arrays.asList("dim1", "dim2"), MultiValueHandling.ARRAY); + index = persistAndLoad(schema, rows); + + Assert.assertEquals(2, index.getColumn(Column.TIME_COLUMN_NAME).getLength()); + Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index.getAvailableDimensions())); + Assert.assertEquals(3, index.getColumnNames().size()); + + adapter = new QueryableIndexIndexableAdapter(index); + boatList = ImmutableList.copyOf(adapter.getRows()); + + Assert.assertEquals(2, boatList.size()); + Assert.assertArrayEquals(new int[][]{{0, 1, 2}, {2, 0, 1}}, boatList.get(0).getDims()); + Assert.assertArrayEquals(new int[][]{{2, 0, 0, 1}, {0, 2, 1, 2}}, boatList.get(1).getDims()); + + checkBitmapIndex(new ArrayList(), adapter.getBitmapIndex("dim1", "")); + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "a")); + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "b")); + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim1", "x")); + + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "a")); + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "b")); + checkBitmapIndex(Lists.newArrayList(0, 1), adapter.getBitmapIndex("dim2", "x")); + } + + private QueryableIndex persistAndLoad(List schema, InputRow... rows) throws IOException + { + IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null, new DimensionsSpec(schema, null, null)); + for (InputRow row : rows) { + toPersist.add(row); + } + + final File tempDir = temporaryFolder.newFolder(); + return closer.closeLater(INDEX_IO.loadIndex(INDEX_MERGER.persist(toPersist, tempDir, indexSpec))); + } } diff --git a/processing/src/test/java/io/druid/segment/data/IncrementalIndexTest.java b/processing/src/test/java/io/druid/segment/data/IncrementalIndexTest.java index 182554f7e78..59b48b9fa67 100644 --- a/processing/src/test/java/io/druid/segment/data/IncrementalIndexTest.java +++ b/processing/src/test/java/io/druid/segment/data/IncrementalIndexTest.java @@ -196,6 +196,19 @@ public class IncrementalIndexTest return defaultCombiningAggregatorFactories; } + public static IncrementalIndex createIndex( + AggregatorFactory[] aggregatorFactories, + DimensionsSpec dimensionsSpec) + { + if (null == aggregatorFactories) { + aggregatorFactories = defaultAggregatorFactories; + } + + return new OnheapIncrementalIndex( + 0L, QueryGranularities.NONE, true, dimensionsSpec, aggregatorFactories, 1000000 + ); + } + public static IncrementalIndex createIndex(AggregatorFactory[] aggregatorFactories) { if (null == aggregatorFactories) { @@ -203,7 +216,7 @@ public class IncrementalIndexTest } return new OnheapIncrementalIndex( - 0L, QueryGranularities.NONE, aggregatorFactories, 1000000 + 0L, QueryGranularities.NONE, true, null, aggregatorFactories, 1000000 ); } @@ -214,7 +227,7 @@ public class IncrementalIndexTest } return new OnheapIncrementalIndex( - 0L, QueryGranularities.NONE, false, aggregatorFactories, 1000000 + 0L, QueryGranularities.NONE, false, null, aggregatorFactories, 1000000 ); } diff --git a/processing/src/test/java/io/druid/segment/filter/RowboatTest.java b/processing/src/test/java/io/druid/segment/filter/RowboatTest.java index c5283ac4d03..bc2bc94d273 100644 --- a/processing/src/test/java/io/druid/segment/filter/RowboatTest.java +++ b/processing/src/test/java/io/druid/segment/filter/RowboatTest.java @@ -33,7 +33,7 @@ public class RowboatTest private static DimensionHandler[] getDefaultHandlers(int size) { DimensionHandler[] handlers = new DimensionHandler[size]; for (int i = 0; i < size; i++) { - handlers[i] = new StringDimensionHandler(String.valueOf(i)); + handlers[i] = new StringDimensionHandler(String.valueOf(i), null); } return handlers; } diff --git a/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexMultiValueSpecTest.java b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexMultiValueSpecTest.java new file mode 100644 index 00000000000..48c98f2ce89 --- /dev/null +++ b/processing/src/test/java/io/druid/segment/incremental/IncrementalIndexMultiValueSpecTest.java @@ -0,0 +1,92 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.segment.incremental; + +import com.google.common.collect.Lists; +import io.druid.data.input.MapBasedInputRow; +import io.druid.data.input.Row; +import io.druid.data.input.impl.DimensionSchema; +import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.StringDimensionSchema; +import io.druid.data.input.impl.TimestampSpec; +import io.druid.granularity.QueryGranularities; +import io.druid.query.aggregation.AggregatorFactory; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + */ +public class IncrementalIndexMultiValueSpecTest +{ + @Test + public void test() throws IndexSizeExceededException + { + DimensionsSpec dimensionsSpec = new DimensionsSpec( + Arrays.asList( + new StringDimensionSchema("string1", DimensionSchema.MultiValueHandling.ARRAY), + new StringDimensionSchema("string2", DimensionSchema.MultiValueHandling.SORTED_ARRAY), + new StringDimensionSchema("string3", DimensionSchema.MultiValueHandling.SORTED_SET) + ), + null, null + ); + IncrementalIndexSchema schema = new IncrementalIndexSchema( + 0, + new TimestampSpec("ds", "auto", null), + QueryGranularities.ALL, + dimensionsSpec, + new AggregatorFactory[0], + false + ); + Map map = new HashMap() + { + @Override + public Object get(Object key) + { + if (((String) key).startsWith("string")) { + return Arrays.asList("xsd", "aba", "fds", "aba"); + } + if (((String) key).startsWith("float")) { + return Arrays.asList(3.92f, -2.76f, 42.153f, Float.NaN, -2.76f, -2.76f); + } + if (((String) key).startsWith("long")) { + return Arrays.asList(-231238789L, 328L, 923L, 328L, -2L, 0L); + } + return null; + } + }; + IncrementalIndex index = new OnheapIncrementalIndex(schema, true, 10000); + index.add( + new MapBasedInputRow( + 0, Arrays.asList( + "string1", "string2", "string3", "float1", "float2", "float3", "long1", "long2", "long3" + ), map + ) + ); + + Row row = index.iterator().next(); + Assert.assertEquals(Lists.newArrayList("xsd", "aba", "fds", "aba"), row.getRaw("string1")); + Assert.assertEquals(Lists.newArrayList("aba", "aba", "fds", "xsd"), row.getRaw("string2")); + Assert.assertEquals(Lists.newArrayList("aba", "fds", "xsd"), row.getRaw("string3")); + } +}