mirror of https://github.com/apache/druid.git
Added time ordering to the scan benchmark
This commit is contained in:
parent
12e51a2721
commit
01b25ed112
|
@ -91,7 +91,7 @@ import java.util.Map;
|
|||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/* Works with 4GB heap size or greater. Otherwise there's a good chance of an OOME. */
|
||||
/* Works with 8GB heap size or greater. Otherwise there's a good chance of an OOME. */
|
||||
@State(Scope.Benchmark)
|
||||
@Fork(value = 1)
|
||||
@Warmup(iterations = 10)
|
||||
|
@ -104,15 +104,18 @@ public class ScanBenchmark
|
|||
@Param({"1", "2"})
|
||||
private int numProcessingThreads;
|
||||
|
||||
@Param({"250000"})
|
||||
@Param({"750000"})
|
||||
private int rowsPerSegment;
|
||||
|
||||
@Param({"basic.A"})
|
||||
private String schemaAndQuery;
|
||||
|
||||
@Param({"1000"})
|
||||
@Param({"1000", "99999"})
|
||||
private int limit;
|
||||
|
||||
@Param({"none", "descending", "ascending"})
|
||||
private static String timeOrdering;
|
||||
|
||||
private static final Logger log = new Logger(ScanBenchmark.class);
|
||||
private static final ObjectMapper JSON_MAPPER;
|
||||
private static final IndexMergerV9 INDEX_MERGER_V9;
|
||||
|
@ -178,7 +181,7 @@ public class ScanBenchmark
|
|||
return Druids.newScanQueryBuilder()
|
||||
.dataSource("blah")
|
||||
.intervals(intervalSpec)
|
||||
.timeOrder("none");
|
||||
.timeOrder(timeOrdering);
|
||||
|
||||
}
|
||||
|
||||
|
@ -205,7 +208,7 @@ public class ScanBenchmark
|
|||
return Druids.newScanQueryBuilder()
|
||||
.filters(filter)
|
||||
.intervals(intervalSpec)
|
||||
.timeOrder("none");
|
||||
.timeOrder(timeOrdering);
|
||||
}
|
||||
|
||||
private static Druids.ScanQueryBuilder basicC(final BenchmarkSchemaInfo basicSchema)
|
||||
|
@ -223,7 +226,7 @@ public class ScanBenchmark
|
|||
return Druids.newScanQueryBuilder()
|
||||
.filters(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance()))
|
||||
.intervals(intervalSpec)
|
||||
.timeOrder("none");
|
||||
.timeOrder(timeOrdering);
|
||||
}
|
||||
|
||||
private static Druids.ScanQueryBuilder basicD(final BenchmarkSchemaInfo basicSchema)
|
||||
|
@ -245,7 +248,7 @@ public class ScanBenchmark
|
|||
return Druids.newScanQueryBuilder()
|
||||
.filters(new BoundDimFilter(dimName, "100", "10000", true, true, true, null, null))
|
||||
.intervals(intervalSpec)
|
||||
.timeOrder("none");
|
||||
.timeOrder(timeOrdering);
|
||||
}
|
||||
|
||||
@Setup
|
||||
|
|
|
@ -69,78 +69,71 @@ public class ScanQueryQueryToolChest extends QueryToolChest<ScanResultValue, Sca
|
|||
@Override
|
||||
public QueryRunner<ScanResultValue> mergeResults(final QueryRunner<ScanResultValue> runner)
|
||||
{
|
||||
return new QueryRunner<ScanResultValue>()
|
||||
{
|
||||
@Override
|
||||
public Sequence<ScanResultValue> run(
|
||||
final QueryPlus<ScanResultValue> queryPlus, final Map<String, Object> responseContext
|
||||
)
|
||||
{
|
||||
// Ensure "legacy" is a non-null value, such that all other nodes this query is forwarded to will treat it
|
||||
// the same way, even if they have different default legacy values.
|
||||
final ScanQuery scanQuery = ((ScanQuery) queryPlus.getQuery()).withNonNullLegacy(scanQueryConfig);
|
||||
final QueryPlus<ScanResultValue> queryPlusWithNonNullLegacy = queryPlus.withQuery(scanQuery);
|
||||
BaseSequence.IteratorMaker scanQueryLimitRowIteratorMaker =
|
||||
new BaseSequence.IteratorMaker<ScanResultValue, ScanQueryLimitRowIterator>()
|
||||
return (queryPlus, responseContext) -> {
|
||||
// Ensure "legacy" is a non-null value, such that all other nodes this query is forwarded to will treat it
|
||||
// the same way, even if they have different default legacy values.
|
||||
final ScanQuery scanQuery = ((ScanQuery) queryPlus.getQuery()).withNonNullLegacy(scanQueryConfig);
|
||||
final QueryPlus<ScanResultValue> queryPlusWithNonNullLegacy = queryPlus.withQuery(scanQuery);
|
||||
BaseSequence.IteratorMaker scanQueryLimitRowIteratorMaker =
|
||||
new BaseSequence.IteratorMaker<ScanResultValue, ScanQueryLimitRowIterator>()
|
||||
{
|
||||
@Override
|
||||
public ScanQueryLimitRowIterator make()
|
||||
{
|
||||
return new ScanQueryLimitRowIterator(runner, queryPlusWithNonNullLegacy, responseContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void cleanup(ScanQueryLimitRowIterator iterFromMake)
|
||||
{
|
||||
CloseQuietly.close(iterFromMake);
|
||||
}
|
||||
};
|
||||
|
||||
if (scanQuery.getTimeOrder().equals(ScanQuery.TIME_ORDER_NONE) ||
|
||||
scanQuery.getLimit() > maxRowsForInMemoryTimeOrdering) {
|
||||
if (scanQuery.getLimit() == Long.MAX_VALUE) {
|
||||
return runner.run(queryPlusWithNonNullLegacy, responseContext);
|
||||
}
|
||||
return new BaseSequence<>(scanQueryLimitRowIteratorMaker);
|
||||
} else if (scanQuery.getTimeOrder().equals(ScanQuery.TIME_ORDER_ASCENDING) ||
|
||||
scanQuery.getTimeOrder().equals(ScanQuery.TIME_ORDER_DESCENDING)) {
|
||||
Comparator priorityQComparator = new ScanResultValueTimestampComparator(scanQuery);
|
||||
|
||||
// Converting the limit from long to int could theoretically throw an ArithmeticException but this branch
|
||||
// only runs if limit < MAX_LIMIT_FOR_IN_MEMORY_TIME_ORDERING (which should be < Integer.MAX_VALUE)
|
||||
PriorityQueue q = new PriorityQueue<>(Math.toIntExact(scanQuery.getLimit()), priorityQComparator);
|
||||
Iterator<ScanResultValue> scanResultIterator = scanQueryLimitRowIteratorMaker.make();
|
||||
|
||||
while (scanResultIterator.hasNext()) {
|
||||
ScanResultValue next = scanResultIterator.next();
|
||||
List<Object> events = (List<Object>) next.getEvents();
|
||||
for (Object event : events) {
|
||||
// Using an intermediate unbatched ScanResultValue is not that great memory-wise, but the column list
|
||||
// needs to be preserved for queries using the compactedList result format
|
||||
q.offer(new ScanResultValue(null, next.getColumns(), Collections.singletonList(event)));
|
||||
}
|
||||
}
|
||||
|
||||
Iterator queueIterator = q.iterator();
|
||||
|
||||
return new BaseSequence(
|
||||
new BaseSequence.IteratorMaker<ScanResultValue, ScanBatchedTimeOrderedQueueIterator>()
|
||||
{
|
||||
@Override
|
||||
public ScanQueryLimitRowIterator make()
|
||||
public ScanBatchedTimeOrderedQueueIterator make()
|
||||
{
|
||||
return new ScanQueryLimitRowIterator(runner, queryPlusWithNonNullLegacy, responseContext);
|
||||
return new ScanBatchedTimeOrderedQueueIterator(queueIterator, scanQuery.getBatchSize());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void cleanup(ScanQueryLimitRowIterator iterFromMake)
|
||||
public void cleanup(ScanBatchedTimeOrderedQueueIterator iterFromMake)
|
||||
{
|
||||
CloseQuietly.close(iterFromMake);
|
||||
}
|
||||
};
|
||||
|
||||
if (scanQuery.getTimeOrder().equals(ScanQuery.TIME_ORDER_NONE) ||
|
||||
scanQuery.getLimit() > maxRowsForInMemoryTimeOrdering) {
|
||||
if (scanQuery.getLimit() == Long.MAX_VALUE) {
|
||||
return runner.run(queryPlusWithNonNullLegacy, responseContext);
|
||||
}
|
||||
return new BaseSequence<>(scanQueryLimitRowIteratorMaker);
|
||||
} else if (scanQuery.getTimeOrder().equals(ScanQuery.TIME_ORDER_ASCENDING) ||
|
||||
scanQuery.getTimeOrder().equals(ScanQuery.TIME_ORDER_DESCENDING)) {
|
||||
Comparator priorityQComparator = new ScanResultValueTimestampComparator(scanQuery);
|
||||
|
||||
// Converting the limit from long to int could theoretically throw an ArithmeticException but this branch
|
||||
// only runs if limit < MAX_LIMIT_FOR_IN_MEMORY_TIME_ORDERING (which should be < Integer.MAX_VALUE)
|
||||
PriorityQueue<Object> q = new PriorityQueue<>(Math.toIntExact(scanQuery.getLimit()), priorityQComparator);
|
||||
Iterator<ScanResultValue> scanResultIterator = scanQueryLimitRowIteratorMaker.make();
|
||||
|
||||
while (scanResultIterator.hasNext()) {
|
||||
ScanResultValue next = scanResultIterator.next();
|
||||
List<Object> events = (List<Object>) next.getEvents();
|
||||
for (Object event : events) {
|
||||
// Using an intermediate unbatched ScanResultValue is not that great memory-wise, but the column list
|
||||
// needs to be preserved for queries using the compactedList result format
|
||||
q.offer(new ScanResultValue(null, next.getColumns(), Collections.singletonList(event)));
|
||||
}
|
||||
}
|
||||
|
||||
Iterator queueIterator = q.iterator();
|
||||
|
||||
return new BaseSequence(
|
||||
new BaseSequence.IteratorMaker<ScanResultValue, ScanBatchedTimeOrderedQueueIterator>()
|
||||
{
|
||||
@Override
|
||||
public ScanBatchedTimeOrderedQueueIterator make()
|
||||
{
|
||||
return new ScanBatchedTimeOrderedQueueIterator(queueIterator, scanQuery.getBatchSize());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void cleanup(ScanBatchedTimeOrderedQueueIterator iterFromMake)
|
||||
{
|
||||
CloseQuietly.close(iterFromMake);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
throw new UOE("Time ordering [%s] is not supported", scanQuery.getTimeOrder());
|
||||
}
|
||||
});
|
||||
} else {
|
||||
throw new UOE("Time ordering [%s] is not supported", scanQuery.getTimeOrder());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue