mirror of https://github.com/apache/druid.git
segment metadata fallback analysis if no bitmaps (#7116)
* segment metadata fallback analysis if no bitmaps * remove accidental line * remove nonsense size estimation * less ternary * fix it * do the thing
This commit is contained in:
parent
b8f762037a
commit
9fa649b3bd
|
@ -42,6 +42,7 @@ import org.apache.druid.segment.column.ColumnCapabilities;
|
|||
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
|
||||
import org.apache.druid.segment.column.ColumnHolder;
|
||||
import org.apache.druid.segment.column.ComplexColumn;
|
||||
import org.apache.druid.segment.column.DictionaryEncodedColumn;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.data.IndexedInts;
|
||||
import org.apache.druid.segment.serde.ComplexMetricSerde;
|
||||
|
@ -194,23 +195,20 @@ public class SegmentAnalyzer
|
|||
final ColumnHolder columnHolder
|
||||
)
|
||||
{
|
||||
long size = 0;
|
||||
|
||||
Comparable min = null;
|
||||
Comparable max = null;
|
||||
|
||||
if (!capabilities.hasBitmapIndexes()) {
|
||||
return ColumnAnalysis.error("string_no_bitmap");
|
||||
}
|
||||
|
||||
long size = 0;
|
||||
final int cardinality;
|
||||
if (capabilities.hasBitmapIndexes()) {
|
||||
final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
|
||||
final int cardinality = bitmapIndex.getCardinality();
|
||||
cardinality = bitmapIndex.getCardinality();
|
||||
|
||||
if (analyzingSize()) {
|
||||
for (int i = 0; i < cardinality; ++i) {
|
||||
String value = bitmapIndex.getValue(i);
|
||||
if (value != null) {
|
||||
size += StringUtils.estimatedBinaryLengthAsUTF8(value) * bitmapIndex.getBitmap(bitmapIndex.getIndex(value)).size();
|
||||
size += StringUtils.estimatedBinaryLengthAsUTF8(value) * bitmapIndex.getBitmap(bitmapIndex.getIndex(value))
|
||||
.size();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -219,6 +217,17 @@ public class SegmentAnalyzer
|
|||
min = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(0));
|
||||
max = NullHandling.nullToEmptyIfNeeded(bitmapIndex.getValue(cardinality - 1));
|
||||
}
|
||||
} else if (capabilities.isDictionaryEncoded()) {
|
||||
// fallback if no bitmap index
|
||||
DictionaryEncodedColumn<String> theColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn();
|
||||
cardinality = theColumn.getCardinality();
|
||||
if (analyzingMinMax() && cardinality > 0) {
|
||||
min = NullHandling.nullToEmptyIfNeeded(theColumn.lookupName(0));
|
||||
max = NullHandling.nullToEmptyIfNeeded(theColumn.lookupName(cardinality - 1));
|
||||
}
|
||||
} else {
|
||||
cardinality = 0;
|
||||
}
|
||||
|
||||
return new ColumnAnalysis(
|
||||
capabilities.getType().name(),
|
||||
|
|
|
@ -83,10 +83,16 @@ public class SegmentMetadataQueryTest
|
|||
public static QueryRunner makeMMappedQueryRunner(
|
||||
SegmentId segmentId,
|
||||
boolean rollup,
|
||||
boolean bitmaps,
|
||||
QueryRunnerFactory factory
|
||||
)
|
||||
{
|
||||
QueryableIndex index = rollup ? TestIndex.getMMappedTestIndex() : TestIndex.getNoRollupMMappedTestIndex();
|
||||
QueryableIndex index;
|
||||
if (bitmaps) {
|
||||
index = rollup ? TestIndex.getMMappedTestIndex() : TestIndex.getNoRollupMMappedTestIndex();
|
||||
} else {
|
||||
index = TestIndex.getNoBitmapMMappedTestIndex();
|
||||
}
|
||||
return QueryRunnerTestHelper.makeQueryRunner(
|
||||
factory,
|
||||
segmentId,
|
||||
|
@ -99,10 +105,16 @@ public class SegmentMetadataQueryTest
|
|||
public static QueryRunner makeIncrementalIndexQueryRunner(
|
||||
SegmentId segmentId,
|
||||
boolean rollup,
|
||||
boolean bitmaps,
|
||||
QueryRunnerFactory factory
|
||||
)
|
||||
{
|
||||
IncrementalIndex index = rollup ? TestIndex.getIncrementalTestIndex() : TestIndex.getNoRollupIncrementalTestIndex();
|
||||
IncrementalIndex index;
|
||||
if (bitmaps) {
|
||||
index = rollup ? TestIndex.getIncrementalTestIndex() : TestIndex.getNoRollupIncrementalTestIndex();
|
||||
} else {
|
||||
index = TestIndex.getNoBitmapIncrementalTestIndex();
|
||||
}
|
||||
return QueryRunnerTestHelper.makeQueryRunner(
|
||||
factory,
|
||||
segmentId,
|
||||
|
@ -121,17 +133,19 @@ public class SegmentMetadataQueryTest
|
|||
private final SegmentMetadataQuery testQuery;
|
||||
private final SegmentAnalysis expectedSegmentAnalysis1;
|
||||
private final SegmentAnalysis expectedSegmentAnalysis2;
|
||||
private final boolean bitmaps;
|
||||
|
||||
@Parameterized.Parameters(name = "mmap1 = {0}, mmap2 = {1}, rollup1 = {2}, rollup2 = {3}, differentIds = {4}")
|
||||
@Parameterized.Parameters(name = "mmap1 = {0}, mmap2 = {1}, rollup1 = {2}, rollup2 = {3}, differentIds = {4}, bitmaps={5}")
|
||||
public static Collection<Object[]> constructorFeeder()
|
||||
{
|
||||
return ImmutableList.of(
|
||||
new Object[]{true, true, true, true, false},
|
||||
new Object[]{true, false, true, false, false},
|
||||
new Object[]{false, true, true, false, false},
|
||||
new Object[]{false, false, false, false, false},
|
||||
new Object[]{false, false, true, true, false},
|
||||
new Object[]{false, false, false, true, true}
|
||||
new Object[]{true, true, true, true, false, true},
|
||||
new Object[]{true, false, true, false, false, true},
|
||||
new Object[]{false, true, true, false, false, true},
|
||||
new Object[]{false, false, false, false, false, true},
|
||||
new Object[]{false, false, true, true, false, true},
|
||||
new Object[]{false, false, false, true, true, true},
|
||||
new Object[]{true, true, false, false, false, false}
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -140,22 +154,24 @@ public class SegmentMetadataQueryTest
|
|||
boolean mmap2,
|
||||
boolean rollup1,
|
||||
boolean rollup2,
|
||||
boolean differentIds
|
||||
boolean differentIds,
|
||||
boolean bitmaps
|
||||
)
|
||||
{
|
||||
final SegmentId id1 = SegmentId.dummy(differentIds ? "testSegment1" : "testSegment");
|
||||
final SegmentId id2 = SegmentId.dummy(differentIds ? "testSegment2" : "testSegment");
|
||||
this.runner1 = mmap1
|
||||
? makeMMappedQueryRunner(id1, rollup1, FACTORY)
|
||||
: makeIncrementalIndexQueryRunner(id1, rollup1, FACTORY);
|
||||
? makeMMappedQueryRunner(id1, rollup1, bitmaps, FACTORY)
|
||||
: makeIncrementalIndexQueryRunner(id1, rollup1, bitmaps, FACTORY);
|
||||
this.runner2 = mmap2
|
||||
? makeMMappedQueryRunner(id2, rollup2, FACTORY)
|
||||
: makeIncrementalIndexQueryRunner(id2, rollup2, FACTORY);
|
||||
? makeMMappedQueryRunner(id2, rollup2, bitmaps, FACTORY)
|
||||
: makeIncrementalIndexQueryRunner(id2, rollup2, bitmaps, FACTORY);
|
||||
this.mmap1 = mmap1;
|
||||
this.mmap2 = mmap2;
|
||||
this.rollup1 = rollup1;
|
||||
this.rollup2 = rollup2;
|
||||
this.differentIds = differentIds;
|
||||
this.bitmaps = bitmaps;
|
||||
testQuery = Druids.newSegmentMetadataQueryBuilder()
|
||||
.dataSource("testing")
|
||||
.intervals("2013/2014")
|
||||
|
@ -169,6 +185,16 @@ public class SegmentMetadataQueryTest
|
|||
.merge(true)
|
||||
.build();
|
||||
|
||||
int preferedSize1 = 0;
|
||||
int placementSize2 = 0;
|
||||
int overallSize1 = 119691;
|
||||
int overallSize2 = 119691;
|
||||
if (bitmaps) {
|
||||
preferedSize1 = mmap1 ? 10881 : 10764;
|
||||
placementSize2 = mmap2 ? 10881 : 0;
|
||||
overallSize1 = mmap1 ? 167493 : 168188;
|
||||
overallSize2 = mmap2 ? 167493 : 168188;
|
||||
}
|
||||
expectedSegmentAnalysis1 = new SegmentAnalysis(
|
||||
id1.toString(),
|
||||
ImmutableList.of(Intervals.of("2011-01-12T00:00:00.000Z/2011-04-15T00:00:00.001Z")),
|
||||
|
@ -187,7 +213,7 @@ public class SegmentMetadataQueryTest
|
|||
new ColumnAnalysis(
|
||||
ValueType.STRING.toString(),
|
||||
false,
|
||||
mmap1 ? 10881 : 10764,
|
||||
preferedSize1,
|
||||
1,
|
||||
"preferred",
|
||||
"preferred",
|
||||
|
@ -203,7 +229,7 @@ public class SegmentMetadataQueryTest
|
|||
null,
|
||||
null
|
||||
)
|
||||
), mmap1 ? 167493 : 168188,
|
||||
), overallSize1,
|
||||
1209,
|
||||
null,
|
||||
null,
|
||||
|
@ -228,7 +254,7 @@ public class SegmentMetadataQueryTest
|
|||
new ColumnAnalysis(
|
||||
ValueType.STRING.toString(),
|
||||
false,
|
||||
mmap2 ? 10881 : 0,
|
||||
placementSize2,
|
||||
1,
|
||||
null,
|
||||
null,
|
||||
|
@ -245,7 +271,7 @@ public class SegmentMetadataQueryTest
|
|||
null
|
||||
)
|
||||
// null_column will be included only for incremental index, which makes a little bigger result than expected
|
||||
), mmap2 ? 167493 : 168188,
|
||||
), overallSize2,
|
||||
1209,
|
||||
null,
|
||||
null,
|
||||
|
@ -470,10 +496,16 @@ public class SegmentMetadataQueryTest
|
|||
@Test
|
||||
public void testSegmentMetadataQueryWithDefaultAnalysisMerge()
|
||||
{
|
||||
int size1 = 0;
|
||||
int size2 = 0;
|
||||
if (bitmaps) {
|
||||
size1 = mmap1 ? 10881 : 10764;
|
||||
size2 = mmap2 ? 10881 : 10764;
|
||||
}
|
||||
ColumnAnalysis analysis = new ColumnAnalysis(
|
||||
ValueType.STRING.toString(),
|
||||
false,
|
||||
(mmap1 ? 10881 : 10764) + (mmap2 ? 10881 : 10764),
|
||||
size1 + size2,
|
||||
1,
|
||||
"preferred",
|
||||
"preferred",
|
||||
|
@ -485,10 +517,16 @@ public class SegmentMetadataQueryTest
|
|||
@Test
|
||||
public void testSegmentMetadataQueryWithDefaultAnalysisMerge2()
|
||||
{
|
||||
int size1 = 0;
|
||||
int size2 = 0;
|
||||
if (bitmaps) {
|
||||
size1 = mmap1 ? 6882 : 6808;
|
||||
size2 = mmap2 ? 6882 : 6808;
|
||||
}
|
||||
ColumnAnalysis analysis = new ColumnAnalysis(
|
||||
ValueType.STRING.toString(),
|
||||
false,
|
||||
(mmap1 ? 6882 : 6808) + (mmap2 ? 6882 : 6808),
|
||||
size1 + size2,
|
||||
3,
|
||||
"spot",
|
||||
"upfront",
|
||||
|
@ -500,10 +538,16 @@ public class SegmentMetadataQueryTest
|
|||
@Test
|
||||
public void testSegmentMetadataQueryWithDefaultAnalysisMerge3()
|
||||
{
|
||||
int size1 = 0;
|
||||
int size2 = 0;
|
||||
if (bitmaps) {
|
||||
size1 = mmap1 ? 9765 : 9660;
|
||||
size2 = mmap2 ? 9765 : 9660;
|
||||
}
|
||||
ColumnAnalysis analysis = new ColumnAnalysis(
|
||||
ValueType.STRING.toString(),
|
||||
false,
|
||||
(mmap1 ? 9765 : 9660) + (mmap2 ? 9765 : 9660),
|
||||
size1 + size2,
|
||||
9,
|
||||
"automotive",
|
||||
"travel",
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.apache.druid.segment;
|
||||
|
||||
import com.google.common.base.Supplier;
|
||||
import com.google.common.base.Suppliers;
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.io.CharSource;
|
||||
import com.google.common.io.LineProcessor;
|
||||
|
@ -111,12 +112,31 @@ public class TestIndex
|
|||
new StringDimensionSchema("null_column")
|
||||
);
|
||||
|
||||
public static final List<DimensionSchema> DIMENSION_SCHEMAS_NO_BITMAP = Arrays.asList(
|
||||
new StringDimensionSchema("market", null, false),
|
||||
new StringDimensionSchema("quality", null, false),
|
||||
new LongDimensionSchema("qualityLong"),
|
||||
new FloatDimensionSchema("qualityFloat"),
|
||||
new DoubleDimensionSchema("qualityDouble"),
|
||||
new StringDimensionSchema("qualityNumericString", null, false),
|
||||
new StringDimensionSchema("placement", null, false),
|
||||
new StringDimensionSchema("placementish", null, false),
|
||||
new StringDimensionSchema("partial_null_column", null, false),
|
||||
new StringDimensionSchema("null_column", null, false)
|
||||
);
|
||||
|
||||
public static final DimensionsSpec DIMENSIONS_SPEC = new DimensionsSpec(
|
||||
DIMENSION_SCHEMAS,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
public static final DimensionsSpec DIMENSIONS_SPEC_NO_BITMAPS = new DimensionsSpec(
|
||||
DIMENSION_SCHEMAS_NO_BITMAP,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
public static final String[] DOUBLE_METRICS = new String[]{"index", "indexMin", "indexMaxPlusTen"};
|
||||
public static final String[] FLOAT_METRICS = new String[]{"indexFloat", "indexMinFloat", "indexMaxFloat"};
|
||||
private static final Logger log = new Logger(TestIndex.class);
|
||||
|
@ -147,69 +167,25 @@ public class TestIndex
|
|||
}
|
||||
}
|
||||
|
||||
private static IncrementalIndex realtimeIndex = null;
|
||||
private static IncrementalIndex noRollupRealtimeIndex = null;
|
||||
private static QueryableIndex mmappedIndex = null;
|
||||
private static QueryableIndex noRollupMmappedIndex = null;
|
||||
private static QueryableIndex mergedRealtime = null;
|
||||
|
||||
public static IncrementalIndex getIncrementalTestIndex()
|
||||
{
|
||||
synchronized (log) {
|
||||
if (realtimeIndex != null) {
|
||||
return realtimeIndex;
|
||||
}
|
||||
}
|
||||
|
||||
return realtimeIndex = makeRealtimeIndex("druid.sample.numeric.tsv");
|
||||
}
|
||||
|
||||
public static IncrementalIndex getNoRollupIncrementalTestIndex()
|
||||
{
|
||||
synchronized (log) {
|
||||
if (noRollupRealtimeIndex != null) {
|
||||
return noRollupRealtimeIndex;
|
||||
}
|
||||
}
|
||||
|
||||
return noRollupRealtimeIndex = makeRealtimeIndex("druid.sample.numeric.tsv", false);
|
||||
}
|
||||
|
||||
public static QueryableIndex getMMappedTestIndex()
|
||||
{
|
||||
synchronized (log) {
|
||||
if (mmappedIndex != null) {
|
||||
return mmappedIndex;
|
||||
}
|
||||
}
|
||||
|
||||
IncrementalIndex incrementalIndex = getIncrementalTestIndex();
|
||||
mmappedIndex = persistRealtimeAndLoadMMapped(incrementalIndex);
|
||||
|
||||
return mmappedIndex;
|
||||
}
|
||||
|
||||
public static QueryableIndex getNoRollupMMappedTestIndex()
|
||||
{
|
||||
synchronized (log) {
|
||||
if (noRollupMmappedIndex != null) {
|
||||
return noRollupMmappedIndex;
|
||||
}
|
||||
}
|
||||
|
||||
IncrementalIndex incrementalIndex = getNoRollupIncrementalTestIndex();
|
||||
noRollupMmappedIndex = persistRealtimeAndLoadMMapped(incrementalIndex);
|
||||
|
||||
return noRollupMmappedIndex;
|
||||
}
|
||||
|
||||
public static QueryableIndex mergedRealtimeIndex()
|
||||
{
|
||||
synchronized (log) {
|
||||
if (mergedRealtime != null) {
|
||||
return mergedRealtime;
|
||||
}
|
||||
|
||||
private static Supplier<IncrementalIndex> realtimeIndex = Suppliers.memoize(
|
||||
() -> makeRealtimeIndex("druid.sample.numeric.tsv")
|
||||
);
|
||||
private static Supplier<IncrementalIndex> noRollupRealtimeIndex = Suppliers.memoize(
|
||||
() -> makeRealtimeIndex("druid.sample.numeric.tsv", false)
|
||||
);
|
||||
private static Supplier<IncrementalIndex> noBitmapRealtimeIndex = Suppliers.memoize(
|
||||
() -> makeRealtimeIndex("druid.sample.numeric.tsv", false, false)
|
||||
);
|
||||
private static Supplier<QueryableIndex> mmappedIndex = Suppliers.memoize(
|
||||
() -> persistRealtimeAndLoadMMapped(realtimeIndex.get())
|
||||
);
|
||||
private static Supplier<QueryableIndex> noRollupMmappedIndex = Suppliers.memoize(
|
||||
() -> persistRealtimeAndLoadMMapped(noRollupRealtimeIndex.get())
|
||||
);
|
||||
private static Supplier<QueryableIndex> noBitmapMmappedIndex = Suppliers.memoize(
|
||||
() -> persistRealtimeAndLoadMMapped(noBitmapRealtimeIndex.get())
|
||||
);
|
||||
private static Supplier<QueryableIndex> mergedRealtime = Suppliers.memoize(() -> {
|
||||
try {
|
||||
IncrementalIndex top = makeRealtimeIndex("druid.sample.numeric.tsv.top");
|
||||
IncrementalIndex bottom = makeRealtimeIndex("druid.sample.numeric.tsv.bottom");
|
||||
|
@ -231,7 +207,7 @@ public class TestIndex
|
|||
INDEX_MERGER.persist(top, DATA_INTERVAL, topFile, indexSpec, null);
|
||||
INDEX_MERGER.persist(bottom, DATA_INTERVAL, bottomFile, indexSpec, null);
|
||||
|
||||
mergedRealtime = INDEX_IO.loadIndex(
|
||||
return INDEX_IO.loadIndex(
|
||||
INDEX_MERGER.mergeQueryableIndex(
|
||||
Arrays.asList(INDEX_IO.loadIndex(topFile), INDEX_IO.loadIndex(bottomFile)),
|
||||
true,
|
||||
|
@ -241,13 +217,45 @@ public class TestIndex
|
|||
null
|
||||
)
|
||||
);
|
||||
|
||||
return mergedRealtime;
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw Throwables.propagate(e);
|
||||
}
|
||||
});
|
||||
|
||||
public static IncrementalIndex getIncrementalTestIndex()
|
||||
{
|
||||
return realtimeIndex.get();
|
||||
}
|
||||
|
||||
public static IncrementalIndex getNoRollupIncrementalTestIndex()
|
||||
{
|
||||
return noRollupRealtimeIndex.get();
|
||||
}
|
||||
|
||||
public static IncrementalIndex getNoBitmapIncrementalTestIndex()
|
||||
{
|
||||
return noBitmapRealtimeIndex.get();
|
||||
}
|
||||
|
||||
public static QueryableIndex getMMappedTestIndex()
|
||||
{
|
||||
return mmappedIndex.get();
|
||||
}
|
||||
|
||||
public static QueryableIndex getNoRollupMMappedTestIndex()
|
||||
{
|
||||
return noRollupMmappedIndex.get();
|
||||
}
|
||||
|
||||
public static QueryableIndex getNoBitmapMMappedTestIndex()
|
||||
{
|
||||
return noBitmapMmappedIndex.get();
|
||||
}
|
||||
|
||||
public static QueryableIndex mergedRealtimeIndex()
|
||||
{
|
||||
return mergedRealtime.get();
|
||||
}
|
||||
|
||||
public static IncrementalIndex makeRealtimeIndex(final String resourceFilename)
|
||||
|
@ -256,6 +264,11 @@ public class TestIndex
|
|||
}
|
||||
|
||||
public static IncrementalIndex makeRealtimeIndex(final String resourceFilename, boolean rollup)
|
||||
{
|
||||
return makeRealtimeIndex(resourceFilename, rollup, true);
|
||||
}
|
||||
|
||||
public static IncrementalIndex makeRealtimeIndex(final String resourceFilename, boolean rollup, boolean bitmap)
|
||||
{
|
||||
final URL resource = TestIndex.class.getClassLoader().getResource(resourceFilename);
|
||||
if (resource == null) {
|
||||
|
@ -263,20 +276,20 @@ public class TestIndex
|
|||
}
|
||||
log.info("Realtime loading index file[%s]", resource);
|
||||
CharSource stream = Resources.asByteSource(resource).asCharSource(StandardCharsets.UTF_8);
|
||||
return makeRealtimeIndex(stream, rollup);
|
||||
return makeRealtimeIndex(stream, rollup, bitmap);
|
||||
}
|
||||
|
||||
public static IncrementalIndex makeRealtimeIndex(final CharSource source)
|
||||
{
|
||||
return makeRealtimeIndex(source, true);
|
||||
return makeRealtimeIndex(source, true, true);
|
||||
}
|
||||
|
||||
public static IncrementalIndex makeRealtimeIndex(final CharSource source, boolean rollup)
|
||||
public static IncrementalIndex makeRealtimeIndex(final CharSource source, boolean rollup, boolean bitmap)
|
||||
{
|
||||
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder()
|
||||
.withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis())
|
||||
.withTimestampSpec(new TimestampSpec("ds", "auto", null))
|
||||
.withDimensionsSpec(DIMENSIONS_SPEC)
|
||||
.withDimensionsSpec(bitmap ? DIMENSIONS_SPEC : DIMENSIONS_SPEC_NO_BITMAPS)
|
||||
.withVirtualColumns(VIRTUAL_COLUMNS)
|
||||
.withMetrics(METRIC_AGGS)
|
||||
.withRollup(rollup)
|
||||
|
|
Loading…
Reference in New Issue