segment metadata fallback analysis if no bitmaps (#7116)

* segment metadata fallback analysis if no bitmaps

* remove accidental line

* remove nonsense size estimation

* less ternary

* fix it

* do the thing
This commit is contained in:
Clint Wylie 2019-02-26 11:27:41 -08:00 committed by Fangjin Yang
parent b8f762037a
commit 9fa649b3bd
3 changed files with 189 additions and 123 deletions

View File

@ -42,6 +42,7 @@ import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ComplexColumn;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.serde.ComplexMetricSerde;
@ -194,30 +195,38 @@ public class SegmentAnalyzer
final ColumnHolder columnHolder
)
{
long size = 0;
Comparable min = null;
Comparable max = null;
long size = 0;
final int cardinality;
if (capabilities.hasBitmapIndexes()) {
final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
cardinality = bitmapIndex.getCardinality();
if (!capabilities.hasBitmapIndexes()) {
return ColumnAnalysis.error("string_no_bitmap");
}
final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
final int cardinality = bitmapIndex.getCardinality();
if (analyzingSize()) {
for (int i = 0; i < cardinality; ++i) {
String value = bitmapIndex.getValue(i);
if (value != null) {
size += StringUtils.estimatedBinaryLengthAsUTF8(value) * bitmapIndex.getBitmap(bitmapIndex.getIndex(value)).size();
if (analyzingSize()) {
for (int i = 0; i < cardinality; ++i) {
String value = bitmapIndex.getValue(i);
if (value != null) {
size += StringUtils.estimatedBinaryLengthAsUTF8(value) * bitmapIndex.getBitmap(bitmapIndex.getIndex(value))
.size();
}
}
}
}
if (analyzingMinMax() && cardinality > 0) {
min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0));
max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1));
if (analyzingMinMax() && cardinality > 0) {
min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0));
max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1));
}
} else if (capabilities.isDictionaryEncoded()) {
// fallback if no bitmap index
DictionaryEncodedColumn<String> theColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn();
cardinality = theColumn.getCardinality();
if (analyzingMinMax() && cardinality > 0) {
min = NullHandling.nullToEmptyIfNeeded(theColumn.lookupName(0));
max = NullHandling.nullToEmptyIfNeeded(theColumn.lookupName(cardinality - 1));
}
} else {
cardinality = 0;
}
return new ColumnAnalysis(

View File

@ -83,10 +83,16 @@ public class SegmentMetadataQueryTest
public static QueryRunner makeMMappedQueryRunner(
SegmentId segmentId,
boolean rollup,
boolean bitmaps,
QueryRunnerFactory factory
)
{
QueryableIndex index = rollup ? TestIndex.getMMappedTestIndex() : TestIndex.getNoRollupMMappedTestIndex();
QueryableIndex index;
if (bitmaps) {
index = rollup ? TestIndex.getMMappedTestIndex() : TestIndex.getNoRollupMMappedTestIndex();
} else {
index = TestIndex.getNoBitmapMMappedTestIndex();
}
return QueryRunnerTestHelper.makeQueryRunner(
factory,
segmentId,
@ -99,10 +105,16 @@ public class SegmentMetadataQueryTest
public static QueryRunner makeIncrementalIndexQueryRunner(
SegmentId segmentId,
boolean rollup,
boolean bitmaps,
QueryRunnerFactory factory
)
{
IncrementalIndex index = rollup ? TestIndex.getIncrementalTestIndex() : TestIndex.getNoRollupIncrementalTestIndex();
IncrementalIndex index;
if (bitmaps) {
index = rollup ? TestIndex.getIncrementalTestIndex() : TestIndex.getNoRollupIncrementalTestIndex();
} else {
index = TestIndex.getNoBitmapIncrementalTestIndex();
}
return QueryRunnerTestHelper.makeQueryRunner(
factory,
segmentId,
@ -121,17 +133,19 @@ public class SegmentMetadataQueryTest
private final SegmentMetadataQuery testQuery;
private final SegmentAnalysis expectedSegmentAnalysis1;
private final SegmentAnalysis expectedSegmentAnalysis2;
private final boolean bitmaps;
@Parameterized.Parameters(name = "mmap1 = {0}, mmap2 = {1}, rollup1 = {2}, rollup2 = {3}, differentIds = {4}")
@Parameterized.Parameters(name = "mmap1 = {0}, mmap2 = {1}, rollup1 = {2}, rollup2 = {3}, differentIds = {4}, bitmaps={5}")
public static Collection<Object[]> constructorFeeder()
{
return ImmutableList.of(
new Object[]{true, true, true, true, false},
new Object[]{true, false, true, false, false},
new Object[]{false, true, true, false, false},
new Object[]{false, false, false, false, false},
new Object[]{false, false, true, true, false},
new Object[]{false, false, false, true, true}
new Object[]{true, true, true, true, false, true},
new Object[]{true, false, true, false, false, true},
new Object[]{false, true, true, false, false, true},
new Object[]{false, false, false, false, false, true},
new Object[]{false, false, true, true, false, true},
new Object[]{false, false, false, true, true, true},
new Object[]{true, true, false, false, false, false}
);
}
@ -140,22 +154,24 @@ public class SegmentMetadataQueryTest
boolean mmap2,
boolean rollup1,
boolean rollup2,
boolean differentIds
boolean differentIds,
boolean bitmaps
)
{
final SegmentId id1 = SegmentId.dummy(differentIds ? "testSegment1" : "testSegment");
final SegmentId id2 = SegmentId.dummy(differentIds ? "testSegment2" : "testSegment");
this.runner1 = mmap1
? makeMMappedQueryRunner(id1, rollup1, FACTORY)
: makeIncrementalIndexQueryRunner(id1, rollup1, FACTORY);
? makeMMappedQueryRunner(id1, rollup1, bitmaps, FACTORY)
: makeIncrementalIndexQueryRunner(id1, rollup1, bitmaps, FACTORY);
this.runner2 = mmap2
? makeMMappedQueryRunner(id2, rollup2, FACTORY)
: makeIncrementalIndexQueryRunner(id2, rollup2, FACTORY);
? makeMMappedQueryRunner(id2, rollup2, bitmaps, FACTORY)
: makeIncrementalIndexQueryRunner(id2, rollup2, bitmaps, FACTORY);
this.mmap1 = mmap1;
this.mmap2 = mmap2;
this.rollup1 = rollup1;
this.rollup2 = rollup2;
this.differentIds = differentIds;
this.bitmaps = bitmaps;
testQuery = Druids.newSegmentMetadataQueryBuilder()
.dataSource("testing")
.intervals("2013/2014")
@ -169,6 +185,16 @@ public class SegmentMetadataQueryTest
.merge(true)
.build();
int preferedSize1 = 0;
int placementSize2 = 0;
int overallSize1 = 119691;
int overallSize2 = 119691;
if (bitmaps) {
preferedSize1 = mmap1 ? 10881 : 10764;
placementSize2 = mmap2 ? 10881 : 0;
overallSize1 = mmap1 ? 167493 : 168188;
overallSize2 = mmap2 ? 167493 : 168188;
}
expectedSegmentAnalysis1 = new SegmentAnalysis(
id1.toString(),
ImmutableList.of(Intervals.of("2011-01-12T00:00:00.000Z/2011-04-15T00:00:00.001Z")),
@ -187,7 +213,7 @@ public class SegmentMetadataQueryTest
new ColumnAnalysis(
ValueType.STRING.toString(),
false,
mmap1 ? 10881 : 10764,
preferedSize1,
1,
"preferred",
"preferred",
@ -203,7 +229,7 @@ public class SegmentMetadataQueryTest
null,
null
)
), mmap1 ? 167493 : 168188,
), overallSize1,
1209,
null,
null,
@ -228,7 +254,7 @@ public class SegmentMetadataQueryTest
new ColumnAnalysis(
ValueType.STRING.toString(),
false,
mmap2 ? 10881 : 0,
placementSize2,
1,
null,
null,
@ -245,7 +271,7 @@ public class SegmentMetadataQueryTest
null
)
// null_column will be included only for incremental index, which makes a little bigger result than expected
), mmap2 ? 167493 : 168188,
), overallSize2,
1209,
null,
null,
@ -470,10 +496,16 @@ public class SegmentMetadataQueryTest
@Test
public void testSegmentMetadataQueryWithDefaultAnalysisMerge()
{
int size1 = 0;
int size2 = 0;
if (bitmaps) {
size1 = mmap1 ? 10881 : 10764;
size2 = mmap2 ? 10881 : 10764;
}
ColumnAnalysis analysis = new ColumnAnalysis(
ValueType.STRING.toString(),
false,
(mmap1 ? 10881 : 10764) + (mmap2 ? 10881 : 10764),
size1 + size2,
1,
"preferred",
"preferred",
@ -485,10 +517,16 @@ public class SegmentMetadataQueryTest
@Test
public void testSegmentMetadataQueryWithDefaultAnalysisMerge2()
{
int size1 = 0;
int size2 = 0;
if (bitmaps) {
size1 = mmap1 ? 6882 : 6808;
size2 = mmap2 ? 6882 : 6808;
}
ColumnAnalysis analysis = new ColumnAnalysis(
ValueType.STRING.toString(),
false,
(mmap1 ? 6882 : 6808) + (mmap2 ? 6882 : 6808),
size1 + size2,
3,
"spot",
"upfront",
@ -500,10 +538,16 @@ public class SegmentMetadataQueryTest
@Test
public void testSegmentMetadataQueryWithDefaultAnalysisMerge3()
{
int size1 = 0;
int size2 = 0;
if (bitmaps) {
size1 = mmap1 ? 9765 : 9660;
size2 = mmap2 ? 9765 : 9660;
}
ColumnAnalysis analysis = new ColumnAnalysis(
ValueType.STRING.toString(),
false,
(mmap1 ? 9765 : 9660) + (mmap2 ? 9765 : 9660),
size1 + size2,
9,
"automotive",
"travel",

View File

@ -20,6 +20,7 @@
package org.apache.druid.segment;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.base.Throwables;
import com.google.common.io.CharSource;
import com.google.common.io.LineProcessor;
@ -111,12 +112,31 @@ public class TestIndex
new StringDimensionSchema("null_column")
);
public static final List<DimensionSchema> DIMENSION_SCHEMAS_NO_BITMAP = Arrays.asList(
new StringDimensionSchema("market", null, false),
new StringDimensionSchema("quality", null, false),
new LongDimensionSchema("qualityLong"),
new FloatDimensionSchema("qualityFloat"),
new DoubleDimensionSchema("qualityDouble"),
new StringDimensionSchema("qualityNumericString", null, false),
new StringDimensionSchema("placement", null, false),
new StringDimensionSchema("placementish", null, false),
new StringDimensionSchema("partial_null_column", null, false),
new StringDimensionSchema("null_column", null, false)
);
public static final DimensionsSpec DIMENSIONS_SPEC = new DimensionsSpec(
DIMENSION_SCHEMAS,
null,
null
);
public static final DimensionsSpec DIMENSIONS_SPEC_NO_BITMAPS = new DimensionsSpec(
DIMENSION_SCHEMAS_NO_BITMAP,
null,
null
);
public static final String[] DOUBLE_METRICS = new String[]{"index", "indexMin", "indexMaxPlusTen"};
public static final String[] FLOAT_METRICS = new String[]{"indexFloat", "indexMinFloat", "indexMaxFloat"};
private static final Logger log = new Logger(TestIndex.class);
@ -147,107 +167,95 @@ public class TestIndex
}
}
private static IncrementalIndex realtimeIndex = null;
private static IncrementalIndex noRollupRealtimeIndex = null;
private static QueryableIndex mmappedIndex = null;
private static QueryableIndex noRollupMmappedIndex = null;
private static QueryableIndex mergedRealtime = null;
private static Supplier<IncrementalIndex> realtimeIndex = Suppliers.memoize(
() -> makeRealtimeIndex("druid.sample.numeric.tsv")
);
private static Supplier<IncrementalIndex> noRollupRealtimeIndex = Suppliers.memoize(
() -> makeRealtimeIndex("druid.sample.numeric.tsv", false)
);
private static Supplier<IncrementalIndex> noBitmapRealtimeIndex = Suppliers.memoize(
() -> makeRealtimeIndex("druid.sample.numeric.tsv", false, false)
);
private static Supplier<QueryableIndex> mmappedIndex = Suppliers.memoize(
() -> persistRealtimeAndLoadMMapped(realtimeIndex.get())
);
private static Supplier<QueryableIndex> noRollupMmappedIndex = Suppliers.memoize(
() -> persistRealtimeAndLoadMMapped(noRollupRealtimeIndex.get())
);
private static Supplier<QueryableIndex> noBitmapMmappedIndex = Suppliers.memoize(
() -> persistRealtimeAndLoadMMapped(noBitmapRealtimeIndex.get())
);
private static Supplier<QueryableIndex> mergedRealtime = Suppliers.memoize(() -> {
try {
IncrementalIndex top = makeRealtimeIndex("druid.sample.numeric.tsv.top");
IncrementalIndex bottom = makeRealtimeIndex("druid.sample.numeric.tsv.bottom");
File tmpFile = File.createTempFile("yay", "who");
tmpFile.delete();
File topFile = new File(tmpFile, "top");
File bottomFile = new File(tmpFile, "bottom");
File mergedFile = new File(tmpFile, "merged");
topFile.mkdirs();
topFile.deleteOnExit();
bottomFile.mkdirs();
bottomFile.deleteOnExit();
mergedFile.mkdirs();
mergedFile.deleteOnExit();
INDEX_MERGER.persist(top, DATA_INTERVAL, topFile, indexSpec, null);
INDEX_MERGER.persist(bottom, DATA_INTERVAL, bottomFile, indexSpec, null);
return INDEX_IO.loadIndex(
INDEX_MERGER.mergeQueryableIndex(
Arrays.asList(INDEX_IO.loadIndex(topFile), INDEX_IO.loadIndex(bottomFile)),
true,
METRIC_AGGS,
mergedFile,
indexSpec,
null
)
);
}
catch (IOException e) {
throw Throwables.propagate(e);
}
});
public static IncrementalIndex getIncrementalTestIndex()
{
synchronized (log) {
if (realtimeIndex != null) {
return realtimeIndex;
}
}
return realtimeIndex = makeRealtimeIndex("druid.sample.numeric.tsv");
return realtimeIndex.get();
}
public static IncrementalIndex getNoRollupIncrementalTestIndex()
{
synchronized (log) {
if (noRollupRealtimeIndex != null) {
return noRollupRealtimeIndex;
}
}
return noRollupRealtimeIndex.get();
}
return noRollupRealtimeIndex = makeRealtimeIndex("druid.sample.numeric.tsv", false);
public static IncrementalIndex getNoBitmapIncrementalTestIndex()
{
return noBitmapRealtimeIndex.get();
}
public static QueryableIndex getMMappedTestIndex()
{
synchronized (log) {
if (mmappedIndex != null) {
return mmappedIndex;
}
}
IncrementalIndex incrementalIndex = getIncrementalTestIndex();
mmappedIndex = persistRealtimeAndLoadMMapped(incrementalIndex);
return mmappedIndex;
return mmappedIndex.get();
}
public static QueryableIndex getNoRollupMMappedTestIndex()
{
synchronized (log) {
if (noRollupMmappedIndex != null) {
return noRollupMmappedIndex;
}
}
return noRollupMmappedIndex.get();
}
IncrementalIndex incrementalIndex = getNoRollupIncrementalTestIndex();
noRollupMmappedIndex = persistRealtimeAndLoadMMapped(incrementalIndex);
return noRollupMmappedIndex;
public static QueryableIndex getNoBitmapMMappedTestIndex()
{
return noBitmapMmappedIndex.get();
}
public static QueryableIndex mergedRealtimeIndex()
{
synchronized (log) {
if (mergedRealtime != null) {
return mergedRealtime;
}
try {
IncrementalIndex top = makeRealtimeIndex("druid.sample.numeric.tsv.top");
IncrementalIndex bottom = makeRealtimeIndex("druid.sample.numeric.tsv.bottom");
File tmpFile = File.createTempFile("yay", "who");
tmpFile.delete();
File topFile = new File(tmpFile, "top");
File bottomFile = new File(tmpFile, "bottom");
File mergedFile = new File(tmpFile, "merged");
topFile.mkdirs();
topFile.deleteOnExit();
bottomFile.mkdirs();
bottomFile.deleteOnExit();
mergedFile.mkdirs();
mergedFile.deleteOnExit();
INDEX_MERGER.persist(top, DATA_INTERVAL, topFile, indexSpec, null);
INDEX_MERGER.persist(bottom, DATA_INTERVAL, bottomFile, indexSpec, null);
mergedRealtime = INDEX_IO.loadIndex(
INDEX_MERGER.mergeQueryableIndex(
Arrays.asList(INDEX_IO.loadIndex(topFile), INDEX_IO.loadIndex(bottomFile)),
true,
METRIC_AGGS,
mergedFile,
indexSpec,
null
)
);
return mergedRealtime;
}
catch (IOException e) {
throw Throwables.propagate(e);
}
}
return mergedRealtime.get();
}
public static IncrementalIndex makeRealtimeIndex(final String resourceFilename)
@ -256,6 +264,11 @@ public class TestIndex
}
public static IncrementalIndex makeRealtimeIndex(final String resourceFilename, boolean rollup)
{
return makeRealtimeIndex(resourceFilename, rollup, true);
}
public static IncrementalIndex makeRealtimeIndex(final String resourceFilename, boolean rollup, boolean bitmap)
{
final URL resource = TestIndex.class.getClassLoader().getResource(resourceFilename);
if (resource == null) {
@ -263,20 +276,20 @@ public class TestIndex
}
log.info("Realtime loading index file[%s]", resource);
CharSource stream = Resources.asByteSource(resource).asCharSource(StandardCharsets.UTF_8);
return makeRealtimeIndex(stream, rollup);
return makeRealtimeIndex(stream, rollup, bitmap);
}
public static IncrementalIndex makeRealtimeIndex(final CharSource source)
{
return makeRealtimeIndex(source, true);
return makeRealtimeIndex(source, true, true);
}
public static IncrementalIndex makeRealtimeIndex(final CharSource source, boolean rollup)
public static IncrementalIndex makeRealtimeIndex(final CharSource source, boolean rollup, boolean bitmap)
{
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder()
.withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis())
.withTimestampSpec(new TimestampSpec("ds", "auto", null))
.withDimensionsSpec(DIMENSIONS_SPEC)
.withDimensionsSpec(bitmap ? DIMENSIONS_SPEC : DIMENSIONS_SPEC_NO_BITMAPS)
.withVirtualColumns(VIRTUAL_COLUMNS)
.withMetrics(METRIC_AGGS)
.withRollup(rollup)