Merge pull request #2111 from binlijin/optimize-create-inverted-indexes

optimize create inverted indexes
This commit is contained in:
binlijin 2016-01-22 11:36:27 +08:00
commit 1d1f4d996d
12 changed files with 184 additions and 257 deletions

View File

@ -56,7 +56,6 @@ import io.druid.common.guava.GuavaUtils;
import io.druid.common.utils.JodaUtils; import io.druid.common.utils.JodaUtils;
import io.druid.common.utils.SerializerUtils; import io.druid.common.utils.SerializerUtils;
import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorFactory;
import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.ColumnCapabilitiesImpl; import io.druid.segment.column.ColumnCapabilitiesImpl;
import io.druid.segment.column.ValueType; import io.druid.segment.column.ValueType;
@ -970,19 +969,28 @@ public class IndexMerger
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory); tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
} }
BitmapIndexSeeker[] bitmapIndexSeeker = new BitmapIndexSeeker[indexes.size()]; DictIdSeeker[] dictIdSeeker = new DictIdSeeker[indexes.size()];
for (int j = 0; j < indexes.size(); j++) { for (int j = 0; j < indexes.size(); j++) {
bitmapIndexSeeker[j] = indexes.get(j).getBitmapIndexSeeker(dimension); IntBuffer dimConversion = dimConversions.get(j).get(dimension);
if (dimConversion != null) {
dictIdSeeker[j] = new DictIdSeeker((IntBuffer) dimConversion.asReadOnlyBuffer().rewind());
} else {
dictIdSeeker[j] = new DictIdSeeker(null);
}
} }
for (String dimVal : IndexedIterable.create(dimVals)) { //Iterate all dim values's dictionary id in ascending order which in line with dim values's compare result.
for (int dictId = 0; dictId < dimVals.size(); dictId++) {
progress.progress(); progress.progress();
List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(indexes.size()); List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(indexes.size());
for (int j = 0; j < indexes.size(); ++j) { for (int j = 0; j < indexes.size(); ++j) {
convertedInverteds.add( int seekedDictId = dictIdSeeker[j].seek(dictId);
new ConvertingIndexedInts( if (seekedDictId != DictIdSeeker.NOT_EXIST) {
bitmapIndexSeeker[j].seek(dimVal), rowNumConversions.get(j) convertedInverteds.add(
) new ConvertingIndexedInts(
); indexes.get(j).getBitmapIndex(dimension, seekedDictId), rowNumConversions.get(j)
)
);
}
} }
MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap(); MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
@ -999,13 +1007,16 @@ public class IndexMerger
bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset) bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset)
); );
if (isSpatialDim && dimVal != null) { if (isSpatialDim) {
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal)); String dimVal = dimVals.get(dictId);
float[] coords = new float[stringCoords.size()]; if (dimVal != null) {
for (int j = 0; j < coords.length; j++) { List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
coords[j] = Float.valueOf(stringCoords.get(j)); float[] coords = new float[stringCoords.size()];
for (int j = 0; j < coords.length; j++) {
coords[j] = Float.valueOf(stringCoords.get(j));
}
tree.insert(coords, bitset);
} }
tree.insert(coords, bitset);
} }
} }
writer.close(); writer.close();
@ -1193,6 +1204,63 @@ public class IndexMerger
} }
} }
/**
* Get old dictId from new dictId, and only support access in order
*/
public static class DictIdSeeker
{
static final int NOT_EXIST = -1;
static final int NOT_INIT = -1;
private final IntBuffer dimConversions;
private int currIndex;
private int currVal;
private int lastVal;
DictIdSeeker(
IntBuffer dimConversions
)
{
this.dimConversions = dimConversions;
this.currIndex = 0;
this.currVal = NOT_INIT;
this.lastVal = NOT_INIT;
}
public int seek(int dictId)
{
if (dimConversions == null) {
return NOT_EXIST;
}
if (lastVal != NOT_INIT) {
if (dictId <= lastVal) {
throw new ISE("Value dictId[%d] is less than the last value dictId[%d] I have, cannot be.",
dictId, lastVal
);
}
return NOT_EXIST;
}
if (currVal == NOT_INIT) {
currVal = dimConversions.get();
}
if (currVal == dictId) {
int ret = currIndex;
++currIndex;
if (dimConversions.hasRemaining()) {
currVal = dimConversions.get();
} else {
lastVal = dictId;
}
return ret;
} else if (currVal < dictId) {
throw new ISE("Skipped currValue dictId[%d], currIndex[%d]; incoming value dictId[%d]",
currVal, currIndex, dictId
);
} else {
return NOT_EXIST;
}
}
}
public static class ConvertingIndexedInts implements Iterable<Integer> public static class ConvertingIndexedInts implements Iterable<Integer>
{ {
private final IndexedInts baseIndex; private final IndexedInts baseIndex;

View File

@ -43,7 +43,6 @@ import com.metamx.common.logger.Logger;
import io.druid.collections.CombiningIterable; import io.druid.collections.CombiningIterable;
import io.druid.common.utils.JodaUtils; import io.druid.common.utils.JodaUtils;
import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.AggregatorFactory;
import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.column.Column; import io.druid.segment.column.Column;
import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.ColumnCapabilitiesImpl; import io.druid.segment.column.ColumnCapabilitiesImpl;
@ -59,7 +58,6 @@ import io.druid.segment.data.GenericIndexedWriter;
import io.druid.segment.data.IOPeon; import io.druid.segment.data.IOPeon;
import io.druid.segment.data.Indexed; import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedIntsWriter; import io.druid.segment.data.IndexedIntsWriter;
import io.druid.segment.data.IndexedIterable;
import io.druid.segment.data.IndexedRTree; import io.druid.segment.data.IndexedRTree;
import io.druid.segment.data.TmpFileIOPeon; import io.druid.segment.data.TmpFileIOPeon;
import io.druid.segment.data.VSizeIndexedIntsWriter; import io.druid.segment.data.VSizeIndexedIntsWriter;
@ -220,7 +218,7 @@ public class IndexMergerV9 extends IndexMerger
); );
makeInvertedIndexes( makeInvertedIndexes(
adapters, progress, mergedDimensions, indexSpec, v9TmpDir, rowNumConversions, adapters, progress, mergedDimensions, indexSpec, v9TmpDir, rowNumConversions,
nullRowsList, dimValueWriters, bitmapIndexWriters, spatialIndexWriters nullRowsList, dimValueWriters, bitmapIndexWriters, spatialIndexWriters, dimConversions
); );
/************ Finalize Build Columns *************/ /************ Finalize Build Columns *************/
@ -499,7 +497,8 @@ public class IndexMergerV9 extends IndexMerger
final ArrayList<MutableBitmap> nullRowsList, final ArrayList<MutableBitmap> nullRowsList,
final ArrayList<GenericIndexedWriter<String>> dimValueWriters, final ArrayList<GenericIndexedWriter<String>> dimValueWriters,
final ArrayList<GenericIndexedWriter<ImmutableBitmap>> bitmapIndexWriters, final ArrayList<GenericIndexedWriter<ImmutableBitmap>> bitmapIndexWriters,
final ArrayList<ByteBufferWriter<ImmutableRTree>> spatialIndexWriters final ArrayList<ByteBufferWriter<ImmutableRTree>> spatialIndexWriters,
final ArrayList<Map<String, IntBuffer>> dimConversions
) throws IOException ) throws IOException
{ {
final String section = "build inverted index"; final String section = "build inverted index";
@ -527,24 +526,33 @@ public class IndexMergerV9 extends IndexMerger
tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory); tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory);
} }
BitmapIndexSeeker[] bitmapIndexSeeker = new BitmapIndexSeeker[adapters.size()]; DictIdSeeker[] dictIdSeeker = new DictIdSeeker[adapters.size()];
for (int j = 0; j < adapters.size(); j++) { for (int j = 0; j < adapters.size(); j++) {
bitmapIndexSeeker[j] = adapters.get(j).getBitmapIndexSeeker(dimension); IntBuffer dimConversion = dimConversions.get(j).get(dimension);
if (dimConversion != null) {
dictIdSeeker[j] = new DictIdSeeker((IntBuffer)dimConversion.asReadOnlyBuffer().rewind());
} else {
dictIdSeeker[j] = new DictIdSeeker(null);
}
} }
ImmutableBitmap nullRowBitmap = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap( ImmutableBitmap nullRowBitmap = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(
nullRowsList.get(dimIndex) nullRowsList.get(dimIndex)
); );
for (String dimVal : IndexedIterable.create(dimVals)) { //Iterate all dim values's dictionary id in ascending order which in line with dim values's compare result.
for (int dictId = 0; dictId < dimVals.size(); dictId++) {
progress.progress(); progress.progress();
List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(adapters.size()); List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(adapters.size());
for (int j = 0; j < adapters.size(); ++j) { for (int j = 0; j < adapters.size(); ++j) {
convertedInverteds.add( int seekedDictId = dictIdSeeker[j].seek(dictId);
new ConvertingIndexedInts( if (seekedDictId != DictIdSeeker.NOT_EXIST) {
bitmapIndexSeeker[j].seek(dimVal), rowNumConversions.get(j) convertedInverteds.add(
) new ConvertingIndexedInts(
); adapters.get(j).getBitmapIndex(dimension, seekedDictId), rowNumConversions.get(j)
)
);
}
} }
MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap(); MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
@ -558,19 +566,22 @@ public class IndexMergerV9 extends IndexMerger
} }
ImmutableBitmap bitmapToWrite = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset); ImmutableBitmap bitmapToWrite = bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset);
if (dimVal == null) { if ((dictId == 0) && (Iterables.getFirst(dimVals, "") == null)) {
bitmapIndexWriters.get(dimIndex).write(nullRowBitmap.union(bitmapToWrite)); bitmapIndexWriters.get(dimIndex).write(nullRowBitmap.union(bitmapToWrite));
} else { } else {
bitmapIndexWriters.get(dimIndex).write(bitmapToWrite); bitmapIndexWriters.get(dimIndex).write(bitmapToWrite);
} }
if (spatialIndexWriter != null && dimVal != null) { if (spatialIndexWriter != null) {
List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal)); String dimVal = dimVals.get(dictId);
float[] coords = new float[stringCoords.size()]; if (dimVal != null) {
for (int j = 0; j < coords.length; j++) { List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal));
coords[j] = Float.valueOf(stringCoords.get(j)); float[] coords = new float[stringCoords.size()];
for (int j = 0; j < coords.length; j++) {
coords[j] = Float.valueOf(stringCoords.get(j));
}
tree.insert(coords, bitset);
} }
tree.insert(coords, bitset);
} }
} }
if (spatialIndexWriter != null) { if (spatialIndexWriter != null) {

View File

@ -19,7 +19,6 @@
package io.druid.segment; package io.druid.segment;
import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.data.Indexed; import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts; import io.druid.segment.data.IndexedInts;
@ -44,7 +43,7 @@ public interface IndexableAdapter
IndexedInts getBitmapIndex(String dimension, String value); IndexedInts getBitmapIndex(String dimension, String value);
BitmapIndexSeeker getBitmapIndexSeeker(String dimension); IndexedInts getBitmapIndex(String dimension, int dictId);
String getMetricType(String metric); String getMetricType(String metric);

View File

@ -27,12 +27,10 @@ import com.metamx.common.ISE;
import com.metamx.common.guava.CloseQuietly; import com.metamx.common.guava.CloseQuietly;
import com.metamx.common.logger.Logger; import com.metamx.common.logger.Logger;
import io.druid.segment.column.BitmapIndex; import io.druid.segment.column.BitmapIndex;
import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.column.Column; import io.druid.segment.column.Column;
import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.ComplexColumn; import io.druid.segment.column.ComplexColumn;
import io.druid.segment.column.DictionaryEncodedColumn; import io.druid.segment.column.DictionaryEncodedColumn;
import io.druid.segment.column.EmptyBitmapIndexSeeker;
import io.druid.segment.column.GenericColumn; import io.druid.segment.column.GenericColumn;
import io.druid.segment.column.IndexedFloatsGenericColumn; import io.druid.segment.column.IndexedFloatsGenericColumn;
import io.druid.segment.column.IndexedLongsGenericColumn; import io.druid.segment.column.IndexedLongsGenericColumn;
@ -40,7 +38,6 @@ import io.druid.segment.column.ValueType;
import io.druid.segment.data.ArrayBasedIndexedInts; import io.druid.segment.data.ArrayBasedIndexedInts;
import io.druid.segment.data.BitmapCompressedIndexedInts; import io.druid.segment.data.BitmapCompressedIndexedInts;
import io.druid.segment.data.EmptyIndexedInts; import io.druid.segment.data.EmptyIndexedInts;
import io.druid.segment.data.GenericIndexed;
import io.druid.segment.data.Indexed; import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts; import io.druid.segment.data.IndexedInts;
import io.druid.segment.data.IndexedIterable; import io.druid.segment.data.IndexedIterable;
@ -303,12 +300,12 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter
final Column column = input.getColumn(dimension); final Column column = input.getColumn(dimension);
if (column == null) { if (column == null) {
return new EmptyIndexedInts(); return EmptyIndexedInts.EMPTY_INDEXED_INTS;
} }
final BitmapIndex bitmaps = column.getBitmapIndex(); final BitmapIndex bitmaps = column.getBitmapIndex();
if (bitmaps == null) { if (bitmaps == null) {
return new EmptyIndexedInts(); return EmptyIndexedInts.EMPTY_INDEXED_INTS;
} }
return new BitmapCompressedIndexedInts(bitmaps.getBitmap(value)); return new BitmapCompressedIndexedInts(bitmaps.getBitmap(value));
@ -339,79 +336,23 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter
} }
@Override @Override
public BitmapIndexSeeker getBitmapIndexSeeker(String dimension) public IndexedInts getBitmapIndex(String dimension, int dictId)
{ {
final Column column = input.getColumn(dimension); final Column column = input.getColumn(dimension);
if (column == null) { if (column == null) {
return new EmptyBitmapIndexSeeker(); return EmptyIndexedInts.EMPTY_INDEXED_INTS;
} }
final BitmapIndex bitmaps = column.getBitmapIndex(); final BitmapIndex bitmaps = column.getBitmapIndex();
if (bitmaps == null) { if (bitmaps == null) {
return new EmptyBitmapIndexSeeker(); return EmptyIndexedInts.EMPTY_INDEXED_INTS;
} }
final Indexed<String> dimSet = getDimValueLookup(dimension); if (dictId >= 0) {
return new BitmapCompressedIndexedInts(bitmaps.getBitmap(dictId));
// BitmapIndexSeeker is the main performance boost comes from. } else {
// In the previous version of index merge, during the creation of invert index, we do something like return EmptyIndexedInts.EMPTY_INDEXED_INTS;
// merge sort of multiply bitmap indexes. It simply iterator all the previous sorted values, }
// and "binary find" the id in each bitmap indexes, which involves disk IO and is really slow.
// Suppose we have N (which is 100 in our test) small segments, each have M (which is 50000 in our case) rows.
// In high cardinality scenario, we will almost have N * M uniq values. So the complexity will be O(N * M * M * LOG(M)).
// There are 2 properties we did not use during the merging:
// 1. We always travel the dimension values sequentially
// 2. One single dimension value is valid only in one index when cardinality is high enough
// So we introduced the BitmapIndexSeeker, which can only seek value sequentially and can never seek back.
// By using this and the help of "getDimValueLookup", we only need to translate all dimension value to its ID once,
// and the translation is done by self-increase of the integer. We only need to change the CACHED value once after
// previous value is hit, renew the value and increase the ID. The complexity now is O(N * M * LOG(M)).
return new BitmapIndexSeeker()
{
private int currIndex = 0;
private String currVal = null;
private String lastVal = null;
@Override
public IndexedInts seek(String value)
{
if (dimSet == null || dimSet.size() == 0) {
return new EmptyIndexedInts();
}
if (lastVal != null) {
if (GenericIndexed.STRING_STRATEGY.compare(value, lastVal) <= 0) {
throw new ISE(
"Value[%s] is less than the last value[%s] I have, cannot be.",
value, lastVal
);
}
return new EmptyIndexedInts();
}
if (currVal == null) {
currVal = dimSet.get(currIndex);
}
int compareResult = GenericIndexed.STRING_STRATEGY.compare(currVal, value);
if (compareResult == 0) {
IndexedInts ret = new BitmapCompressedIndexedInts(bitmaps.getBitmap(currIndex));
++currIndex;
if (currIndex == dimSet.size()) {
lastVal = value;
} else {
currVal = dimSet.get(currIndex);
}
return ret;
} else if (compareResult < 0) {
throw new ISE(
"Skipped currValue[%s], currIndex[%,d]; incoming value[%s]",
currVal, currIndex, value
);
} else {
return new EmptyIndexedInts();
}
}
};
} }
@Override @Override

View File

@ -21,7 +21,6 @@ package io.druid.segment;
import com.google.common.base.Predicate; import com.google.common.base.Predicate;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.data.Indexed; import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts; import io.druid.segment.data.IndexedInts;
@ -95,9 +94,9 @@ public class RowboatFilteringIndexAdapter implements IndexableAdapter
} }
@Override @Override
public BitmapIndexSeeker getBitmapIndexSeeker(String dimension) public IndexedInts getBitmapIndex(String dimension, int dictId)
{ {
return baseAdapter.getBitmapIndexSeeker(dimension); return baseAdapter.getBitmapIndex(dimension, dictId);
} }
@Override @Override

View File

@ -1,30 +0,0 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.segment.column;
import io.druid.segment.data.IndexedInts;
/**
* Only support access in order
*/
public interface BitmapIndexSeeker
{
public IndexedInts seek(String value);
}

View File

@ -1,34 +0,0 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.segment.column;
import io.druid.segment.data.EmptyIndexedInts;
import io.druid.segment.data.IndexedInts;
public class EmptyBitmapIndexSeeker implements BitmapIndexSeeker
{
@Override
public IndexedInts seek(String value)
{
return new EmptyIndexedInts();
}
}

View File

@ -28,7 +28,11 @@ import java.util.Iterator;
*/ */
public class EmptyIndexedInts implements IndexedInts public class EmptyIndexedInts implements IndexedInts
{ {
public static EmptyIndexedInts instance = new EmptyIndexedInts(); public static final EmptyIndexedInts EMPTY_INDEXED_INTS = new EmptyIndexedInts();
private EmptyIndexedInts()
{
}
@Override @Override
public int size() public int size()

View File

@ -26,16 +26,12 @@ import com.google.common.collect.Maps;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import com.metamx.collections.bitmap.BitmapFactory; import com.metamx.collections.bitmap.BitmapFactory;
import com.metamx.collections.bitmap.MutableBitmap; import com.metamx.collections.bitmap.MutableBitmap;
import com.metamx.common.ISE;
import com.metamx.common.logger.Logger; import com.metamx.common.logger.Logger;
import io.druid.segment.IndexableAdapter; import io.druid.segment.IndexableAdapter;
import io.druid.segment.Metadata; import io.druid.segment.Metadata;
import io.druid.segment.Rowboat; import io.druid.segment.Rowboat;
import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.column.ColumnCapabilities; import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.EmptyBitmapIndexSeeker;
import io.druid.segment.data.EmptyIndexedInts; import io.druid.segment.data.EmptyIndexedInts;
import io.druid.segment.data.GenericIndexed;
import io.druid.segment.data.Indexed; import io.druid.segment.data.Indexed;
import io.druid.segment.data.IndexedInts; import io.druid.segment.data.IndexedInts;
import io.druid.segment.data.IndexedIterable; import io.druid.segment.data.IndexedIterable;
@ -276,13 +272,13 @@ public class IncrementalIndexAdapter implements IndexableAdapter
Map<String, MutableBitmap> dimInverted = invertedIndexes.get(dimension); Map<String, MutableBitmap> dimInverted = invertedIndexes.get(dimension);
if (dimInverted == null) { if (dimInverted == null) {
return new EmptyIndexedInts(); return EmptyIndexedInts.EMPTY_INDEXED_INTS;
} }
final MutableBitmap bitmapIndex = dimInverted.get(value); final MutableBitmap bitmapIndex = dimInverted.get(value);
if (bitmapIndex == null) { if (bitmapIndex == null) {
return new EmptyIndexedInts(); return EmptyIndexedInts.EMPTY_INDEXED_INTS;
} }
return new BitmapIndexedInts(bitmapIndex); return new BitmapIndexedInts(bitmapIndex);
@ -301,35 +297,17 @@ public class IncrementalIndexAdapter implements IndexableAdapter
} }
@Override @Override
public BitmapIndexSeeker getBitmapIndexSeeker(String dimension) public IndexedInts getBitmapIndex(String dimension, int dictId)
{ {
final Map<String, MutableBitmap> dimInverted = invertedIndexes.get(dimension); if (dictId >= 0) {
if (dimInverted == null) { final Indexed<String> dimValues = getDimValueLookup(dimension);
return new EmptyBitmapIndexSeeker(); //NullValueConverterDimDim will convert empty to null, we need convert it back to the actual values,
//because getBitmapIndex relies on the actual values stored in DimDim.
String value = Strings.nullToEmpty(dimValues.get(dictId));
return getBitmapIndex(dimension, value);
} else {
return EmptyIndexedInts.EMPTY_INDEXED_INTS;
} }
return new BitmapIndexSeeker()
{
private String lastVal = null;
@Override
public IndexedInts seek(String value)
{
if (value != null && GenericIndexed.STRING_STRATEGY.compare(value, lastVal) <= 0) {
throw new ISE(
"Value[%s] is less than the last value[%s] I have, cannot be.",
value, lastVal
);
}
value = Strings.nullToEmpty(value);
lastVal = value;
final MutableBitmap bitmapIndex = dimInverted.get(value);
if (bitmapIndex == null) {
return new EmptyIndexedInts();
}
return new BitmapIndexedInts(bitmapIndex);
}
};
} }
private boolean hasNullValue(String[] dimValues) private boolean hasNullValue(String[] dimValues)

View File

@ -26,8 +26,10 @@ import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import com.google.common.primitives.Ints;
import com.metamx.collections.bitmap.RoaringBitmapFactory; import com.metamx.collections.bitmap.RoaringBitmapFactory;
import com.metamx.common.IAE; import com.metamx.common.IAE;
import com.metamx.common.ISE;
import io.druid.data.input.MapBasedInputRow; import io.druid.data.input.MapBasedInputRow;
import io.druid.data.input.impl.DimensionsSpec; import io.druid.data.input.impl.DimensionsSpec;
import io.druid.granularity.QueryGranularity; import io.druid.granularity.QueryGranularity;
@ -60,6 +62,8 @@ import javax.annotation.Nullable;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.lang.reflect.Field; import java.lang.reflect.Field;
import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
@ -1656,4 +1660,27 @@ public class IndexMergerTest
} }
return combiningAggregators; return combiningAggregators;
} }
@Test
public void testDictIdSeeker() throws Exception
{
IntBuffer dimConversions = ByteBuffer.allocateDirect(3 * Ints.BYTES).asIntBuffer();
dimConversions.put(0);
dimConversions.put(2);
dimConversions.put(4);
IndexMerger.DictIdSeeker dictIdSeeker = new IndexMerger.DictIdSeeker((IntBuffer) dimConversions.asReadOnlyBuffer().rewind());
Assert.assertEquals(0, dictIdSeeker.seek(0));
Assert.assertEquals(-1, dictIdSeeker.seek(1));
Assert.assertEquals(1, dictIdSeeker.seek(2));
try {
dictIdSeeker.seek(5);
Assert.fail("Only support access in order");
}
catch (ISE ise) {
Assert.assertTrue("Only support access in order", true);
}
Assert.assertEquals(-1, dictIdSeeker.seek(3));
Assert.assertEquals(2, dictIdSeeker.seek(4));
Assert.assertEquals(-1, dictIdSeeker.seek(5));
}
} }

View File

@ -19,14 +19,11 @@
package io.druid.segment; package io.druid.segment;
import com.metamx.common.ISE;
import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.data.CompressedObjectStrategy; import io.druid.segment.data.CompressedObjectStrategy;
import io.druid.segment.data.ConciseBitmapSerdeFactory; import io.druid.segment.data.ConciseBitmapSerdeFactory;
import io.druid.segment.data.IncrementalIndexTest; import io.druid.segment.data.IncrementalIndexTest;
import io.druid.segment.data.IndexedInts; import io.druid.segment.data.IndexedInts;
import io.druid.segment.incremental.IncrementalIndex; import io.druid.segment.incremental.IncrementalIndex;
import io.druid.segment.incremental.IncrementalIndexAdapter;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Rule; import org.junit.Rule;
import org.junit.Test; import org.junit.Test;
@ -34,7 +31,8 @@ import org.junit.rules.TemporaryFolder;
import java.io.File; import java.io.File;
public class QueryableIndexIndexableAdapterTest { public class QueryableIndexIndexableAdapterTest
{
private final static IndexMerger INDEX_MERGER = TestHelper.getTestIndexMerger(); private final static IndexMerger INDEX_MERGER = TestHelper.getTestIndexMerger();
private final static IndexIO INDEX_IO = TestHelper.getTestIndexIO(); private final static IndexIO INDEX_IO = TestHelper.getTestIndexIO();
private static final IndexSpec INDEX_SPEC = IndexMergerTest.makeIndexSpec( private static final IndexSpec INDEX_SPEC = IndexMergerTest.makeIndexSpec(
@ -49,19 +47,13 @@ public class QueryableIndexIndexableAdapterTest {
public final CloserRule closer = new CloserRule(false); public final CloserRule closer = new CloserRule(false);
@Test @Test
public void testGetBitmapIndexSeeker() throws Exception public void testGetBitmapIndex() throws Exception
{ {
final long timestamp = System.currentTimeMillis(); final long timestamp = System.currentTimeMillis();
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null); IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
IncrementalIndexTest.populateIndex(timestamp, toPersist); IncrementalIndexTest.populateIndex(timestamp, toPersist);
final File tempDir = temporaryFolder.newFolder(); final File tempDir = temporaryFolder.newFolder();
final IndexableAdapter incrementalAdapter = new IncrementalIndexAdapter(
toPersist.getInterval(),
toPersist,
INDEX_SPEC.getBitmapSerdeFactory().getBitmapFactory()
);
QueryableIndex index = closer.closeLater( QueryableIndex index = closer.closeLater(
INDEX_IO.loadIndex( INDEX_IO.loadIndex(
INDEX_MERGER.persist( INDEX_MERGER.persist(
@ -73,24 +65,12 @@ public class QueryableIndexIndexableAdapterTest {
); );
IndexableAdapter adapter = new QueryableIndexIndexableAdapter(index); IndexableAdapter adapter = new QueryableIndexIndexableAdapter(index);
BitmapIndexSeeker bitmapIndexSeeker = adapter.getBitmapIndexSeeker("dim1"); String dimension = "dim1";
IndexedInts indexedIntsNull = bitmapIndexSeeker.seek(null); //null is added to all dimensions with value
Assert.assertEquals(0, indexedIntsNull.size()); IndexedInts indexedInts = adapter.getBitmapIndex(dimension, 0);
IndexedInts indexedInts0 = bitmapIndexSeeker.seek("0"); for (int i = 0; i < adapter.getDimValueLookup(dimension).size(); i++) {
Assert.assertEquals(0, indexedInts0.size()); indexedInts = adapter.getBitmapIndex(dimension, i);
IndexedInts indexedInts1 = bitmapIndexSeeker.seek("1"); Assert.assertEquals(1, indexedInts.size());
Assert.assertEquals(1, indexedInts1.size());
try {
bitmapIndexSeeker.seek("4");
Assert.assertFalse("Only support access in order", true);
} catch(ISE ise) {
Assert.assertTrue("Only support access in order", true);
} }
IndexedInts indexedInts2 = bitmapIndexSeeker.seek("2");
Assert.assertEquals(0, indexedInts2.size());
IndexedInts indexedInts3 = bitmapIndexSeeker.seek("3");
Assert.assertEquals(1, indexedInts3.size());
IndexedInts indexedInts4 = bitmapIndexSeeker.seek("4");
Assert.assertEquals(0, indexedInts4.size());
} }
} }

View File

@ -22,7 +22,6 @@ package io.druid.segment.incremental;
import io.druid.segment.IndexSpec; import io.druid.segment.IndexSpec;
import io.druid.segment.IndexableAdapter; import io.druid.segment.IndexableAdapter;
import io.druid.segment.Rowboat; import io.druid.segment.Rowboat;
import io.druid.segment.column.BitmapIndexSeeker;
import io.druid.segment.data.CompressedObjectStrategy; import io.druid.segment.data.CompressedObjectStrategy;
import io.druid.segment.data.ConciseBitmapSerdeFactory; import io.druid.segment.data.ConciseBitmapSerdeFactory;
import io.druid.segment.data.IncrementalIndexTest; import io.druid.segment.data.IncrementalIndexTest;
@ -31,8 +30,6 @@ import io.druid.segment.data.IndexedInts;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import com.metamx.common.ISE;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -45,7 +42,7 @@ public class IncrementalIndexAdapterTest
); );
@Test @Test
public void testGetBitmapIndexSeeker() throws Exception public void testGetBitmapIndex() throws Exception
{ {
final long timestamp = System.currentTimeMillis(); final long timestamp = System.currentTimeMillis();
IncrementalIndex incrementalIndex = IncrementalIndexTest.createIndex(null); IncrementalIndex incrementalIndex = IncrementalIndexTest.createIndex(null);
@ -55,24 +52,11 @@ public class IncrementalIndexAdapterTest
incrementalIndex, incrementalIndex,
INDEX_SPEC.getBitmapSerdeFactory().getBitmapFactory() INDEX_SPEC.getBitmapSerdeFactory().getBitmapFactory()
); );
BitmapIndexSeeker bitmapIndexSeeker = adapter.getBitmapIndexSeeker("dim1"); String dimension = "dim1";
IndexedInts indexedInts0 = bitmapIndexSeeker.seek("0"); for (int i = 0; i < adapter.getDimValueLookup(dimension).size(); i++) {
Assert.assertEquals(0, indexedInts0.size()); IndexedInts indexedInts = adapter.getBitmapIndex(dimension, i);
IndexedInts indexedInts1 = bitmapIndexSeeker.seek("1"); Assert.assertEquals(1, indexedInts.size());
Assert.assertEquals(1, indexedInts1.size());
try {
bitmapIndexSeeker.seek("01");
Assert.assertFalse("Only support access in order", true);
} }
catch (ISE ise) {
Assert.assertTrue("Only support access in order", true);
}
IndexedInts indexedInts2 = bitmapIndexSeeker.seek("2");
Assert.assertEquals(0, indexedInts2.size());
IndexedInts indexedInts3 = bitmapIndexSeeker.seek("3");
Assert.assertEquals(1, indexedInts3.size());
IndexedInts indexedInts4 = bitmapIndexSeeker.seek("4");
Assert.assertEquals(0, indexedInts4.size());
} }
@Test @Test