mirror of https://github.com/apache/druid.git
Merge pull request #2111 from binlijin/optimize-create-inverted-indexes
optimize create inverted indexes
This commit is contained in:
commit
1d1f4d996d
|
@ -56,7 +56,6 @@ import io.druid.common.guava.GuavaUtils;
|
|||
import io.druid.common.utils.JodaUtils;
|
||||
import io.druid.common.utils.SerializerUtils;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.segment.column.BitmapIndexSeeker;
|
||||
import io.druid.segment.column.ColumnCapabilities;
|
||||
import io.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import io.druid.segment.column.ValueType;
|
||||
|
@ -970,19 +969,28 @@ public class IndexMerger
|
|||
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
|
||||
}
|
||||
|
||||
BitmapIndexSeeker[] bitmapIndexSeeker = new BitmapIndexSeeker[indexes.size()];
|
||||
DictIdSeeker[] dictIdSeeker = new DictIdSeeker[indexes.size()];
|
||||
for (int j = 0; j < indexes.size(); j++) {
|
||||
bitmapIndexSeeker[j] = indexes.get(j).getBitmapIndexSeeker(dimension);
|
||||
IntBuffer dimConversion = dimConversions.get(j).get(dimension);
|
||||
if (dimConversion != null) {
|
||||
dictIdSeeker[j] = new DictIdSeeker((IntBuffer) dimConversion.asReadOnlyBuffer().rewind());
|
||||
} else {
|
||||
dictIdSeeker[j] = new DictIdSeeker(null);
|
||||
}
|
||||
}
|
||||
for (String dimVal : IndexedIterable.create(dimVals)) {
|
||||
//Iterate all dim values's dictionary id in ascending order which in line with dim values's compare result.
|
||||
for (int dictId = 0; dictId < dimVals.size(); dictId++) {
|
||||
progress.progress();
|
||||
List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(indexes.size());
|
||||
for (int j = 0; j < indexes.size(); ++j) {
|
||||
convertedInverteds.add(
|
||||
new ConvertingIndexedInts(
|
||||
bitmapIndexSeeker[j].seek(dimVal), rowNumConversions.get(j)
|
||||
)
|
||||
);
|
||||
int seekedDictId = dictIdSeeker[j].seek(dictId);
|
||||
if (seekedDictId != DictIdSeeker.NOT_EXIST) {
|
||||
convertedInverteds.add(
|
||||
new ConvertingIndexedInts(
|
||||
indexes.get(j).getBitmapIndex(dimension, seekedDictId), rowNumConversions.get(j)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
|
||||
|
@ -999,13 +1007,16 @@ public class IndexMerger
|
|||
bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset)
|
||||
);
|
||||
|
||||
if (isSpatialDim && dimVal != null) {
|
||||
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
|
||||
float[] coords = new float[stringCoords.size()];
|
||||
for (int j = 0; j < coords.length; j++) {
|
||||
coords[j] = Float.valueOf(stringCoords.get(j));
|
||||
if (isSpatialDim) {
|
||||
String dimVal = dimVals.get(dictId);
|
||||
if (dimVal != null) {
|
||||
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
|
||||
float[] coords = new float[stringCoords.size()];
|
||||
for (int j = 0; j < coords.length; j++) {
|
||||
coords[j] = Float.valueOf(stringCoords.get(j));
|
||||
}
|
||||
tree.insert(coords, bitset);
|
||||
}
|
||||
tree.insert(coords, bitset);
|
||||
}
|
||||
}
|
||||
writer.close();
|
||||
|
@ -1193,6 +1204,63 @@ public class IndexMerger
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get old dictId from new dictId, and only support access in order
|
||||
*/
|
||||
public static class DictIdSeeker
|
||||
{
|
||||
static final int NOT_EXIST = -1;
|
||||
static final int NOT_INIT = -1;
|
||||
private final IntBuffer dimConversions;
|
||||
private int currIndex;
|
||||
private int currVal;
|
||||
private int lastVal;
|
||||
|
||||
DictIdSeeker(
|
||||
IntBuffer dimConversions
|
||||
)
|
||||
{
|
||||
this.dimConversions = dimConversions;
|
||||
this.currIndex = 0;
|
||||
this.currVal = NOT_INIT;
|
||||
this.lastVal = NOT_INIT;
|
||||
}
|
||||
|
||||
public int seek(int dictId)
|
||||
{
|
||||
if (dimConversions == null) {
|
||||
return NOT_EXIST;
|
||||
}
|
||||
if (lastVal != NOT_INIT) {
|
||||
if (dictId <= lastVal) {
|
||||
throw new ISE("Value dictId[%d] is less than the last value dictId[%d] I have, cannot be.",
|
||||
dictId, lastVal
|
||||
);
|
||||
}
|
||||
return NOT_EXIST;
|
||||
}
|
||||
if (currVal == NOT_INIT) {
|
||||
currVal = dimConversions.get();
|
||||
}
|
||||
if (currVal == dictId) {
|
||||
int ret = currIndex;
|
||||
++currIndex;
|
||||
if (dimConversions.hasRemaining()) {
|
||||
currVal = dimConversions.get();
|
||||
} else {
|
||||
lastVal = dictId;
|
||||
}
|
||||
return ret;
|
||||
} else if (currVal < dictId) {
|
||||
throw new ISE("Skipped currValue dictId[%d], currIndex[%d]; incoming value dictId[%d]",
|
||||
currVal, currIndex, dictId
|
||||
);
|
||||
} else {
|
||||
return NOT_EXIST;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class ConvertingIndexedInts implements Iterable<Integer>
|
||||
{
|
||||
private final IndexedInts baseIndex;
|
||||
|
|
|
@ -43,7 +43,6 @@ import com.metamx.common.logger.Logger;
|
|||
import io.druid.collections.CombiningIterable;
|
||||
import io.druid.common.utils.JodaUtils;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.segment.column.BitmapIndexSeeker;
|
||||
import io.druid.segment.column.Column;
|
||||
import io.druid.segment.column.ColumnCapabilities;
|
||||
import io.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
|
@ -59,7 +58,6 @@ import io.druid.segment.data.GenericIndexedWriter;
|
|||
import io.druid.segment.data.IOPeon;
|
||||
import io.druid.segment.data.Indexed;
|
||||
import io.druid.segment.data.IndexedIntsWriter;
|
||||
import io.druid.segment.data.IndexedIterable;
|
||||
import io.druid.segment.data.IndexedRTree;
|
||||
import io.druid.segment.data.TmpFileIOPeon;
|
||||
import io.druid.segment.data.VSizeIndexedIntsWriter;
|
||||
|
@ -220,7 +218,7 @@ public class IndexMergerV9 extends IndexMerger
|
|||
);
|
||||
makeInvertedIndexes(
|
||||
adapters, progress, mergedDimensions, indexSpec, v9TmpDir, rowNumConversions,
|
||||
nullRowsList, dimValueWriters, bitmapIndexWriters, spatialIndexWriters
|
||||
nullRowsList, dimValueWriters, bitmapIndexWriters, spatialIndexWriters, dimConversions
|
||||
);
|
||||
|
||||
/************ Finalize Build Columns *************/
|
||||
|
@ -499,7 +497,8 @@ public class IndexMergerV9 extends IndexMerger
|
|||
final ArrayList<MutableBitmap> nullRowsList,
|
||||
final ArrayList<GenericIndexedWriter<String>> dimValueWriters,
|
||||
final ArrayList<GenericIndexedWriter<ImmutableBitmap>> bitmapIndexWriters,
|
||||
final ArrayList<ByteBufferWriter<ImmutableRTree>> spatialIndexWriters
|
||||
final ArrayList<ByteBufferWriter<ImmutableRTree>> spatialIndexWriters,
|
||||
final ArrayList<Map<String, IntBuffer>> dimConversions
|
||||
) throws IOException
|
||||
{
|
||||
final String section = "build inverted index";
|
||||
|
@ -527,24 +526,33 @@ public class IndexMergerV9 extends IndexMerger
|
|||
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
|
||||
}
|
||||
|
||||
BitmapIndexSeeker[] bitmapIndexSeeker = new BitmapIndexSeeker[adapters.size()];
|
||||
DictIdSeeker[] dictIdSeeker = new DictIdSeeker[adapters.size()];
|
||||
for (int j = 0; j < adapters.size(); j++) {
|
||||
bitmapIndexSeeker[j] = adapters.get(j).getBitmapIndexSeeker(dimension);
|
||||
IntBuffer dimConversion = dimConversions.get(j).get(dimension);
|
||||
if (dimConversion != null) {
|
||||
dictIdSeeker[j] = new DictIdSeeker((IntBuffer)dimConversion.asReadOnlyBuffer().rewind());
|
||||
} else {
|
||||
dictIdSeeker[j] = new DictIdSeeker(null);
|
||||
}
|
||||
}
|
||||
|
||||
ImmutableBitmap nullRowBitmap = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(
|
||||
nullRowsList.get(dimIndex)
|
||||
);
|
||||
|
||||
for (String dimVal : IndexedIterable.create(dimVals)) {
|
||||
//Iterate all dim values's dictionary id in ascending order which in line with dim values's compare result.
|
||||
for (int dictId = 0; dictId < dimVals.size(); dictId++) {
|
||||
progress.progress();
|
||||
List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(adapters.size());
|
||||
for (int j = 0; j < adapters.size(); ++j) {
|
||||
convertedInverteds.add(
|
||||
new ConvertingIndexedInts(
|
||||
bitmapIndexSeeker[j].seek(dimVal), rowNumConversions.get(j)
|
||||
)
|
||||
);
|
||||
int seekedDictId = dictIdSeeker[j].seek(dictId);
|
||||
if (seekedDictId != DictIdSeeker.NOT_EXIST) {
|
||||
convertedInverteds.add(
|
||||
new ConvertingIndexedInts(
|
||||
adapters.get(j).getBitmapIndex(dimension, seekedDictId), rowNumConversions.get(j)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
|
||||
|
@ -558,19 +566,22 @@ public class IndexMergerV9 extends IndexMerger
|
|||
}
|
||||
|
||||
ImmutableBitmap bitmapToWrite = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset);
|
||||
if (dimVal == null) {
|
||||
if ((dictId == 0) && (Iterables.getFirst(dimVals, "") == null)) {
|
||||
bitmapIndexWriters.get(dimIndex).write(nullRowBitmap.union(bitmapToWrite));
|
||||
} else {
|
||||
bitmapIndexWriters.get(dimIndex).write(bitmapToWrite);
|
||||
}
|
||||
|
||||
if (spatialIndexWriter != null && dimVal != null) {
|
||||
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
|
||||
float[] coords = new float[stringCoords.size()];
|
||||
for (int j = 0; j < coords.length; j++) {
|
||||
coords[j] = Float.valueOf(stringCoords.get(j));
|
||||
if (spatialIndexWriter != null) {
|
||||
String dimVal = dimVals.get(dictId);
|
||||
if (dimVal != null) {
|
||||
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
|
||||
float[] coords = new float[stringCoords.size()];
|
||||
for (int j = 0; j < coords.length; j++) {
|
||||
coords[j] = Float.valueOf(stringCoords.get(j));
|
||||
}
|
||||
tree.insert(coords, bitset);
|
||||
}
|
||||
tree.insert(coords, bitset);
|
||||
}
|
||||
}
|
||||
if (spatialIndexWriter != null) {
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package io.druid.segment;
|
||||
|
||||
import io.druid.segment.column.BitmapIndexSeeker;
|
||||
import io.druid.segment.column.ColumnCapabilities;
|
||||
import io.druid.segment.data.Indexed;
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
|
@ -44,7 +43,7 @@ public interface IndexableAdapter
|
|||
|
||||
IndexedInts getBitmapIndex(String dimension, String value);
|
||||
|
||||
BitmapIndexSeeker getBitmapIndexSeeker(String dimension);
|
||||
IndexedInts getBitmapIndex(String dimension, int dictId);
|
||||
|
||||
String getMetricType(String metric);
|
||||
|
||||
|
|
|
@ -27,12 +27,10 @@ import com.metamx.common.ISE;
|
|||
import com.metamx.common.guava.CloseQuietly;
|
||||
import com.metamx.common.logger.Logger;
|
||||
import io.druid.segment.column.BitmapIndex;
|
||||
import io.druid.segment.column.BitmapIndexSeeker;
|
||||
import io.druid.segment.column.Column;
|
||||
import io.druid.segment.column.ColumnCapabilities;
|
||||
import io.druid.segment.column.ComplexColumn;
|
||||
import io.druid.segment.column.DictionaryEncodedColumn;
|
||||
import io.druid.segment.column.EmptyBitmapIndexSeeker;
|
||||
import io.druid.segment.column.GenericColumn;
|
||||
import io.druid.segment.column.IndexedFloatsGenericColumn;
|
||||
import io.druid.segment.column.IndexedLongsGenericColumn;
|
||||
|
@ -40,7 +38,6 @@ import io.druid.segment.column.ValueType;
|
|||
import io.druid.segment.data.ArrayBasedIndexedInts;
|
||||
import io.druid.segment.data.BitmapCompressedIndexedInts;
|
||||
import io.druid.segment.data.EmptyIndexedInts;
|
||||
import io.druid.segment.data.GenericIndexed;
|
||||
import io.druid.segment.data.Indexed;
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
import io.druid.segment.data.IndexedIterable;
|
||||
|
@ -303,12 +300,12 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter
|
|||
final Column column = input.getColumn(dimension);
|
||||
|
||||
if (column == null) {
|
||||
return new EmptyIndexedInts();
|
||||
return EmptyIndexedInts.EMPTY_INDEXED_INTS;
|
||||
}
|
||||
|
||||
final BitmapIndex bitmaps = column.getBitmapIndex();
|
||||
if (bitmaps == null) {
|
||||
return new EmptyIndexedInts();
|
||||
return EmptyIndexedInts.EMPTY_INDEXED_INTS;
|
||||
}
|
||||
|
||||
return new BitmapCompressedIndexedInts(bitmaps.getBitmap(value));
|
||||
|
@ -339,79 +336,23 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter
|
|||
}
|
||||
|
||||
@Override
|
||||
public BitmapIndexSeeker getBitmapIndexSeeker(String dimension)
|
||||
public IndexedInts getBitmapIndex(String dimension, int dictId)
|
||||
{
|
||||
final Column column = input.getColumn(dimension);
|
||||
|
||||
if (column == null) {
|
||||
return new EmptyBitmapIndexSeeker();
|
||||
return EmptyIndexedInts.EMPTY_INDEXED_INTS;
|
||||
}
|
||||
|
||||
final BitmapIndex bitmaps = column.getBitmapIndex();
|
||||
if (bitmaps == null) {
|
||||
return new EmptyBitmapIndexSeeker();
|
||||
return EmptyIndexedInts.EMPTY_INDEXED_INTS;
|
||||
}
|
||||
|
||||
final Indexed<String> dimSet = getDimValueLookup(dimension);
|
||||
|
||||
// BitmapIndexSeeker is the main performance boost comes from.
|
||||
// In the previous version of index merge, during the creation of invert index, we do something like
|
||||
// merge sort of multiply bitmap indexes. It simply iterator all the previous sorted values,
|
||||
// and "binary find" the id in each bitmap indexes, which involves disk IO and is really slow.
|
||||
// Suppose we have N (which is 100 in our test) small segments, each have M (which is 50000 in our case) rows.
|
||||
// In high cardinality scenario, we will almost have N * M uniq values. So the complexity will be O(N * M * M * LOG(M)).
|
||||
|
||||
// There are 2 properties we did not use during the merging:
|
||||
// 1. We always travel the dimension values sequentially
|
||||
// 2. One single dimension value is valid only in one index when cardinality is high enough
|
||||
// So we introduced the BitmapIndexSeeker, which can only seek value sequentially and can never seek back.
|
||||
// By using this and the help of "getDimValueLookup", we only need to translate all dimension value to its ID once,
|
||||
// and the translation is done by self-increase of the integer. We only need to change the CACHED value once after
|
||||
// previous value is hit, renew the value and increase the ID. The complexity now is O(N * M * LOG(M)).
|
||||
return new BitmapIndexSeeker()
|
||||
{
|
||||
private int currIndex = 0;
|
||||
private String currVal = null;
|
||||
private String lastVal = null;
|
||||
|
||||
@Override
|
||||
public IndexedInts seek(String value)
|
||||
{
|
||||
if (dimSet == null || dimSet.size() == 0) {
|
||||
return new EmptyIndexedInts();
|
||||
}
|
||||
if (lastVal != null) {
|
||||
if (GenericIndexed.STRING_STRATEGY.compare(value, lastVal) <= 0) {
|
||||
throw new ISE(
|
||||
"Value[%s] is less than the last value[%s] I have, cannot be.",
|
||||
value, lastVal
|
||||
);
|
||||
}
|
||||
return new EmptyIndexedInts();
|
||||
}
|
||||
if (currVal == null) {
|
||||
currVal = dimSet.get(currIndex);
|
||||
}
|
||||
int compareResult = GenericIndexed.STRING_STRATEGY.compare(currVal, value);
|
||||
if (compareResult == 0) {
|
||||
IndexedInts ret = new BitmapCompressedIndexedInts(bitmaps.getBitmap(currIndex));
|
||||
++currIndex;
|
||||
if (currIndex == dimSet.size()) {
|
||||
lastVal = value;
|
||||
} else {
|
||||
currVal = dimSet.get(currIndex);
|
||||
}
|
||||
return ret;
|
||||
} else if (compareResult < 0) {
|
||||
throw new ISE(
|
||||
"Skipped currValue[%s], currIndex[%,d]; incoming value[%s]",
|
||||
currVal, currIndex, value
|
||||
);
|
||||
} else {
|
||||
return new EmptyIndexedInts();
|
||||
}
|
||||
}
|
||||
};
|
||||
if (dictId >= 0) {
|
||||
return new BitmapCompressedIndexedInts(bitmaps.getBitmap(dictId));
|
||||
} else {
|
||||
return EmptyIndexedInts.EMPTY_INDEXED_INTS;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,7 +21,6 @@ package io.druid.segment;
|
|||
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.collect.Iterables;
|
||||
import io.druid.segment.column.BitmapIndexSeeker;
|
||||
import io.druid.segment.column.ColumnCapabilities;
|
||||
import io.druid.segment.data.Indexed;
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
|
@ -95,9 +94,9 @@ public class RowboatFilteringIndexAdapter implements IndexableAdapter
|
|||
}
|
||||
|
||||
@Override
|
||||
public BitmapIndexSeeker getBitmapIndexSeeker(String dimension)
|
||||
public IndexedInts getBitmapIndex(String dimension, int dictId)
|
||||
{
|
||||
return baseAdapter.getBitmapIndexSeeker(dimension);
|
||||
return baseAdapter.getBitmapIndex(dimension, dictId);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,30 +0,0 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.segment.column;
|
||||
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
|
||||
/**
|
||||
* Only support access in order
|
||||
*/
|
||||
public interface BitmapIndexSeeker
|
||||
{
|
||||
public IndexedInts seek(String value);
|
||||
}
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.segment.column;
|
||||
|
||||
import io.druid.segment.data.EmptyIndexedInts;
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
|
||||
public class EmptyBitmapIndexSeeker implements BitmapIndexSeeker
|
||||
{
|
||||
|
||||
@Override
|
||||
public IndexedInts seek(String value)
|
||||
{
|
||||
return new EmptyIndexedInts();
|
||||
}
|
||||
|
||||
}
|
|
@ -28,7 +28,11 @@ import java.util.Iterator;
|
|||
*/
|
||||
public class EmptyIndexedInts implements IndexedInts
|
||||
{
|
||||
public static EmptyIndexedInts instance = new EmptyIndexedInts();
|
||||
public static final EmptyIndexedInts EMPTY_INDEXED_INTS = new EmptyIndexedInts();
|
||||
|
||||
private EmptyIndexedInts()
|
||||
{
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size()
|
||||
|
|
|
@ -26,16 +26,12 @@ import com.google.common.collect.Maps;
|
|||
import com.google.common.collect.Sets;
|
||||
import com.metamx.collections.bitmap.BitmapFactory;
|
||||
import com.metamx.collections.bitmap.MutableBitmap;
|
||||
import com.metamx.common.ISE;
|
||||
import com.metamx.common.logger.Logger;
|
||||
import io.druid.segment.IndexableAdapter;
|
||||
import io.druid.segment.Metadata;
|
||||
import io.druid.segment.Rowboat;
|
||||
import io.druid.segment.column.BitmapIndexSeeker;
|
||||
import io.druid.segment.column.ColumnCapabilities;
|
||||
import io.druid.segment.column.EmptyBitmapIndexSeeker;
|
||||
import io.druid.segment.data.EmptyIndexedInts;
|
||||
import io.druid.segment.data.GenericIndexed;
|
||||
import io.druid.segment.data.Indexed;
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
import io.druid.segment.data.IndexedIterable;
|
||||
|
@ -276,13 +272,13 @@ public class IncrementalIndexAdapter implements IndexableAdapter
|
|||
Map<String, MutableBitmap> dimInverted = invertedIndexes.get(dimension);
|
||||
|
||||
if (dimInverted == null) {
|
||||
return new EmptyIndexedInts();
|
||||
return EmptyIndexedInts.EMPTY_INDEXED_INTS;
|
||||
}
|
||||
|
||||
final MutableBitmap bitmapIndex = dimInverted.get(value);
|
||||
|
||||
if (bitmapIndex == null) {
|
||||
return new EmptyIndexedInts();
|
||||
return EmptyIndexedInts.EMPTY_INDEXED_INTS;
|
||||
}
|
||||
|
||||
return new BitmapIndexedInts(bitmapIndex);
|
||||
|
@ -301,35 +297,17 @@ public class IncrementalIndexAdapter implements IndexableAdapter
|
|||
}
|
||||
|
||||
@Override
|
||||
public BitmapIndexSeeker getBitmapIndexSeeker(String dimension)
|
||||
public IndexedInts getBitmapIndex(String dimension, int dictId)
|
||||
{
|
||||
final Map<String, MutableBitmap> dimInverted = invertedIndexes.get(dimension);
|
||||
if (dimInverted == null) {
|
||||
return new EmptyBitmapIndexSeeker();
|
||||
if (dictId >= 0) {
|
||||
final Indexed<String> dimValues = getDimValueLookup(dimension);
|
||||
//NullValueConverterDimDim will convert empty to null, we need convert it back to the actual values,
|
||||
//because getBitmapIndex relies on the actual values stored in DimDim.
|
||||
String value = Strings.nullToEmpty(dimValues.get(dictId));
|
||||
return getBitmapIndex(dimension, value);
|
||||
} else {
|
||||
return EmptyIndexedInts.EMPTY_INDEXED_INTS;
|
||||
}
|
||||
|
||||
return new BitmapIndexSeeker()
|
||||
{
|
||||
private String lastVal = null;
|
||||
|
||||
@Override
|
||||
public IndexedInts seek(String value)
|
||||
{
|
||||
if (value != null && GenericIndexed.STRING_STRATEGY.compare(value, lastVal) <= 0) {
|
||||
throw new ISE(
|
||||
"Value[%s] is less than the last value[%s] I have, cannot be.",
|
||||
value, lastVal
|
||||
);
|
||||
}
|
||||
value = Strings.nullToEmpty(value);
|
||||
lastVal = value;
|
||||
final MutableBitmap bitmapIndex = dimInverted.get(value);
|
||||
if (bitmapIndex == null) {
|
||||
return new EmptyIndexedInts();
|
||||
}
|
||||
return new BitmapIndexedInts(bitmapIndex);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private boolean hasNullValue(String[] dimValues)
|
||||
|
|
|
@ -26,8 +26,10 @@ import com.google.common.collect.ImmutableMap;
|
|||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.common.primitives.Ints;
|
||||
import com.metamx.collections.bitmap.RoaringBitmapFactory;
|
||||
import com.metamx.common.IAE;
|
||||
import com.metamx.common.ISE;
|
||||
import io.druid.data.input.MapBasedInputRow;
|
||||
import io.druid.data.input.impl.DimensionsSpec;
|
||||
import io.druid.granularity.QueryGranularity;
|
||||
|
@ -60,6 +62,8 @@ import javax.annotation.Nullable;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.IntBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
@ -1656,4 +1660,27 @@ public class IndexMergerTest
|
|||
}
|
||||
return combiningAggregators;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDictIdSeeker() throws Exception
|
||||
{
|
||||
IntBuffer dimConversions = ByteBuffer.allocateDirect(3 * Ints.BYTES).asIntBuffer();
|
||||
dimConversions.put(0);
|
||||
dimConversions.put(2);
|
||||
dimConversions.put(4);
|
||||
IndexMerger.DictIdSeeker dictIdSeeker = new IndexMerger.DictIdSeeker((IntBuffer) dimConversions.asReadOnlyBuffer().rewind());
|
||||
Assert.assertEquals(0, dictIdSeeker.seek(0));
|
||||
Assert.assertEquals(-1, dictIdSeeker.seek(1));
|
||||
Assert.assertEquals(1, dictIdSeeker.seek(2));
|
||||
try {
|
||||
dictIdSeeker.seek(5);
|
||||
Assert.fail("Only support access in order");
|
||||
}
|
||||
catch (ISE ise) {
|
||||
Assert.assertTrue("Only support access in order", true);
|
||||
}
|
||||
Assert.assertEquals(-1, dictIdSeeker.seek(3));
|
||||
Assert.assertEquals(2, dictIdSeeker.seek(4));
|
||||
Assert.assertEquals(-1, dictIdSeeker.seek(5));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,14 +19,11 @@
|
|||
|
||||
package io.druid.segment;
|
||||
|
||||
import com.metamx.common.ISE;
|
||||
import io.druid.segment.column.BitmapIndexSeeker;
|
||||
import io.druid.segment.data.CompressedObjectStrategy;
|
||||
import io.druid.segment.data.ConciseBitmapSerdeFactory;
|
||||
import io.druid.segment.data.IncrementalIndexTest;
|
||||
import io.druid.segment.data.IndexedInts;
|
||||
import io.druid.segment.incremental.IncrementalIndex;
|
||||
import io.druid.segment.incremental.IncrementalIndexAdapter;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
|
@ -34,7 +31,8 @@ import org.junit.rules.TemporaryFolder;
|
|||
|
||||
import java.io.File;
|
||||
|
||||
public class QueryableIndexIndexableAdapterTest {
|
||||
public class QueryableIndexIndexableAdapterTest
|
||||
{
|
||||
private final static IndexMerger INDEX_MERGER = TestHelper.getTestIndexMerger();
|
||||
private final static IndexIO INDEX_IO = TestHelper.getTestIndexIO();
|
||||
private static final IndexSpec INDEX_SPEC = IndexMergerTest.makeIndexSpec(
|
||||
|
@ -47,21 +45,15 @@ public class QueryableIndexIndexableAdapterTest {
|
|||
public final TemporaryFolder temporaryFolder = new TemporaryFolder();
|
||||
@Rule
|
||||
public final CloserRule closer = new CloserRule(false);
|
||||
|
||||
|
||||
@Test
|
||||
public void testGetBitmapIndexSeeker() throws Exception
|
||||
public void testGetBitmapIndex() throws Exception
|
||||
{
|
||||
final long timestamp = System.currentTimeMillis();
|
||||
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
|
||||
IncrementalIndexTest.populateIndex(timestamp, toPersist);
|
||||
|
||||
final File tempDir = temporaryFolder.newFolder();
|
||||
final IndexableAdapter incrementalAdapter = new IncrementalIndexAdapter(
|
||||
toPersist.getInterval(),
|
||||
toPersist,
|
||||
INDEX_SPEC.getBitmapSerdeFactory().getBitmapFactory()
|
||||
);
|
||||
|
||||
QueryableIndex index = closer.closeLater(
|
||||
INDEX_IO.loadIndex(
|
||||
INDEX_MERGER.persist(
|
||||
|
@ -73,24 +65,12 @@ public class QueryableIndexIndexableAdapterTest {
|
|||
);
|
||||
|
||||
IndexableAdapter adapter = new QueryableIndexIndexableAdapter(index);
|
||||
BitmapIndexSeeker bitmapIndexSeeker = adapter.getBitmapIndexSeeker("dim1");
|
||||
IndexedInts indexedIntsNull = bitmapIndexSeeker.seek(null);
|
||||
Assert.assertEquals(0, indexedIntsNull.size());
|
||||
IndexedInts indexedInts0 = bitmapIndexSeeker.seek("0");
|
||||
Assert.assertEquals(0, indexedInts0.size());
|
||||
IndexedInts indexedInts1 = bitmapIndexSeeker.seek("1");
|
||||
Assert.assertEquals(1, indexedInts1.size());
|
||||
try {
|
||||
bitmapIndexSeeker.seek("4");
|
||||
Assert.assertFalse("Only support access in order", true);
|
||||
} catch(ISE ise) {
|
||||
Assert.assertTrue("Only support access in order", true);
|
||||
String dimension = "dim1";
|
||||
//null is added to all dimensions with value
|
||||
IndexedInts indexedInts = adapter.getBitmapIndex(dimension, 0);
|
||||
for (int i = 0; i < adapter.getDimValueLookup(dimension).size(); i++) {
|
||||
indexedInts = adapter.getBitmapIndex(dimension, i);
|
||||
Assert.assertEquals(1, indexedInts.size());
|
||||
}
|
||||
IndexedInts indexedInts2 = bitmapIndexSeeker.seek("2");
|
||||
Assert.assertEquals(0, indexedInts2.size());
|
||||
IndexedInts indexedInts3 = bitmapIndexSeeker.seek("3");
|
||||
Assert.assertEquals(1, indexedInts3.size());
|
||||
IndexedInts indexedInts4 = bitmapIndexSeeker.seek("4");
|
||||
Assert.assertEquals(0, indexedInts4.size());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ package io.druid.segment.incremental;
|
|||
import io.druid.segment.IndexSpec;
|
||||
import io.druid.segment.IndexableAdapter;
|
||||
import io.druid.segment.Rowboat;
|
||||
import io.druid.segment.column.BitmapIndexSeeker;
|
||||
import io.druid.segment.data.CompressedObjectStrategy;
|
||||
import io.druid.segment.data.ConciseBitmapSerdeFactory;
|
||||
import io.druid.segment.data.IncrementalIndexTest;
|
||||
|
@ -31,8 +30,6 @@ import io.druid.segment.data.IndexedInts;
|
|||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.metamx.common.ISE;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -45,7 +42,7 @@ public class IncrementalIndexAdapterTest
|
|||
);
|
||||
|
||||
@Test
|
||||
public void testGetBitmapIndexSeeker() throws Exception
|
||||
public void testGetBitmapIndex() throws Exception
|
||||
{
|
||||
final long timestamp = System.currentTimeMillis();
|
||||
IncrementalIndex incrementalIndex = IncrementalIndexTest.createIndex(null);
|
||||
|
@ -55,24 +52,11 @@ public class IncrementalIndexAdapterTest
|
|||
incrementalIndex,
|
||||
INDEX_SPEC.getBitmapSerdeFactory().getBitmapFactory()
|
||||
);
|
||||
BitmapIndexSeeker bitmapIndexSeeker = adapter.getBitmapIndexSeeker("dim1");
|
||||
IndexedInts indexedInts0 = bitmapIndexSeeker.seek("0");
|
||||
Assert.assertEquals(0, indexedInts0.size());
|
||||
IndexedInts indexedInts1 = bitmapIndexSeeker.seek("1");
|
||||
Assert.assertEquals(1, indexedInts1.size());
|
||||
try {
|
||||
bitmapIndexSeeker.seek("01");
|
||||
Assert.assertFalse("Only support access in order", true);
|
||||
String dimension = "dim1";
|
||||
for (int i = 0; i < adapter.getDimValueLookup(dimension).size(); i++) {
|
||||
IndexedInts indexedInts = adapter.getBitmapIndex(dimension, i);
|
||||
Assert.assertEquals(1, indexedInts.size());
|
||||
}
|
||||
catch (ISE ise) {
|
||||
Assert.assertTrue("Only support access in order", true);
|
||||
}
|
||||
IndexedInts indexedInts2 = bitmapIndexSeeker.seek("2");
|
||||
Assert.assertEquals(0, indexedInts2.size());
|
||||
IndexedInts indexedInts3 = bitmapIndexSeeker.seek("3");
|
||||
Assert.assertEquals(1, indexedInts3.size());
|
||||
IndexedInts indexedInts4 = bitmapIndexSeeker.seek("4");
|
||||
Assert.assertEquals(0, indexedInts4.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue