Merge remote-tracking branch 'apache/master' into quidem-kttm

This commit is contained in:
Zoltan Haindrich 2024-09-20 11:02:10 +00:00
commit 6075163aaa
887 changed files with 26027 additions and 13338 deletions

View File

@ -117,6 +117,28 @@ jobs:
MAVEN_PROJECTS: ${{ inputs.maven_projects }} MAVEN_PROJECTS: ${{ inputs.maven_projects }}
run: ./.github/scripts/unit_tests_script.sh run: ./.github/scripts/unit_tests_script.sh
- name: Check for .hprof files on failure
if: ${{ failure() }}
id: check_for_heap_dump
run: |
if ls ${GITHUB_WORKSPACE}/target/*.hprof 1> /dev/null 2>&1; then
echo "found_hprof=true" >> "$GITHUB_ENV"
else
echo "found_hprof=false" >> "$GITHUB_ENV"
fi
- name: Collect tarball hprof dumps if they exist on failure
if: ${{ failure() && env.found_hprof == 'true' }}
run: |
tar cvzf ${RUNNER_TEMP}/hprof-dumps.tgz ${GITHUB_WORKSPACE}/target/*.hprof
- name: Upload hprof dumps to GitHub if they exist on failure
if: ${{ failure() && env.found_hprof == 'true' }}
uses: actions/upload-artifact@master
with:
name: Hprof-${{ inputs.group }} hprof dumps (Compile=jdk${{ inputs.build_jdk }}, Run=jdk${{ inputs.runtime_jdk }})
path: ${{ runner.temp }}/hprof-dumps.tgz
- name: set outputs on failure - name: set outputs on failure
id: set_outputs id: set_outputs
if: ${{ failure() }} if: ${{ failure() }}

View File

@ -27,7 +27,7 @@
<parent> <parent>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
</parent> </parent>
<dependencies> <dependencies>

View File

@ -132,7 +132,7 @@ public class DelimitedInputFormatBenchmark
@Setup(Level.Trial) @Setup(Level.Trial)
public void prepareFormat() public void prepareFormat()
{ {
format = new DelimitedInputFormat(fromHeader ? null : COLUMNS, null, "\t", null, fromHeader, fromHeader ? 0 : 1); format = new DelimitedInputFormat(fromHeader ? null : COLUMNS, null, "\t", null, fromHeader, fromHeader ? 0 : 1, null);
} }
@Benchmark @Benchmark

View File

@ -36,7 +36,7 @@ import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.CursorBuildSpec; import org.apache.druid.segment.CursorBuildSpec;
import org.apache.druid.segment.CursorHolder; import org.apache.druid.segment.CursorHolder;
import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.QueryableIndexCursorFactory;
import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.generator.GeneratorColumnSchema; import org.apache.druid.segment.generator.GeneratorColumnSchema;
import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.GeneratorSchemaInfo;
@ -161,9 +161,8 @@ public class ExpressionAggregationBenchmark
private double compute(final Function<ColumnSelectorFactory, BufferAggregator> aggregatorFactory) private double compute(final Function<ColumnSelectorFactory, BufferAggregator> aggregatorFactory)
{ {
final QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(index); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
try (final CursorHolder cursorHolder = adapter.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final BufferAggregator bufferAggregator = aggregatorFactory.apply(cursor.getColumnSelectorFactory()); final BufferAggregator bufferAggregator = aggregatorFactory.apply(cursor.getColumnSelectorFactory());

View File

@ -35,7 +35,7 @@ import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.CursorBuildSpec; import org.apache.druid.segment.CursorBuildSpec;
import org.apache.druid.segment.CursorHolder; import org.apache.druid.segment.CursorHolder;
import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.QueryableIndexCursorFactory;
import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.generator.GeneratorColumnSchema; import org.apache.druid.segment.generator.GeneratorColumnSchema;
import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.GeneratorSchemaInfo;
@ -148,7 +148,9 @@ public class ExpressionFilterBenchmark
final CursorBuildSpec buildSpec = CursorBuildSpec.builder() final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
.setFilter(expressionFilter.toFilter()) .setFilter(expressionFilter.toFilter())
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
@ -166,7 +168,9 @@ public class ExpressionFilterBenchmark
final CursorBuildSpec buildSpec = CursorBuildSpec.builder() final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
.setFilter(nativeFilter.toFilter()) .setFilter(nativeFilter.toFilter())
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("x"); final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("x");
while (!cursor.isDone()) { while (!cursor.isDone()) {

View File

@ -42,9 +42,8 @@ import org.apache.druid.segment.CursorHolder;
import org.apache.druid.segment.Cursors; import org.apache.druid.segment.Cursors;
import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.QueryableIndexCursorFactory;
import org.apache.druid.segment.QueryableIndexTimeBoundaryInspector; import org.apache.druid.segment.QueryableIndexTimeBoundaryInspector;
import org.apache.druid.segment.StorageAdapter;
import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
@ -160,7 +159,8 @@ public class ExpressionSelectorBenchmark
) )
) )
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
@ -171,7 +171,8 @@ public class ExpressionSelectorBenchmark
@Benchmark @Benchmark
public void timeFloorUsingExtractionFn(Blackhole blackhole) public void timeFloorUsingExtractionFn(Blackhole blackhole)
{ {
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final DimensionSelector selector = cursor final DimensionSelector selector = cursor
@ -190,15 +191,15 @@ public class ExpressionSelectorBenchmark
@Benchmark @Benchmark
public void timeFloorUsingCursor(Blackhole blackhole) public void timeFloorUsingCursor(Blackhole blackhole)
{ {
final StorageAdapter adapter = new QueryableIndexStorageAdapter(index); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = adapter.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final CursorGranularizer granularizer = CursorGranularizer.create( final CursorGranularizer granularizer = CursorGranularizer.create(
cursor, cursor,
QueryableIndexTimeBoundaryInspector.create(index), QueryableIndexTimeBoundaryInspector.create(index),
Cursors.getTimeOrdering(index.getOrdering()), Cursors.getTimeOrdering(index.getOrdering()),
Granularities.HOUR, Granularities.HOUR,
adapter.getInterval() index.getDataInterval()
); );
final Sequence<Long> results = final Sequence<Long> results =
Sequences.simple(granularizer.getBucketIterable()) Sequences.simple(granularizer.getBucketIterable())
@ -241,7 +242,8 @@ public class ExpressionSelectorBenchmark
) )
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector( final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(
DefaultDimensionSpec.of("v") DefaultDimensionSpec.of("v")
@ -253,7 +255,8 @@ public class ExpressionSelectorBenchmark
@Benchmark @Benchmark
public void timeFormatUsingExtractionFn(Blackhole blackhole) public void timeFormatUsingExtractionFn(Blackhole blackhole)
{ {
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final DimensionSelector selector = cursor final DimensionSelector selector = cursor
.getColumnSelectorFactory() .getColumnSelectorFactory()
@ -284,7 +287,8 @@ public class ExpressionSelectorBenchmark
) )
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole); consumeLong(cursor, selector, blackhole);
@ -307,7 +311,8 @@ public class ExpressionSelectorBenchmark
) )
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final DimensionSelector selector = cursor final DimensionSelector selector = cursor
.getColumnSelectorFactory() .getColumnSelectorFactory()
@ -320,7 +325,8 @@ public class ExpressionSelectorBenchmark
@Benchmark @Benchmark
public void strlenUsingExtractionFn(Blackhole blackhole) public void strlenUsingExtractionFn(Blackhole blackhole)
{ {
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final DimensionSelector selector = cursor final DimensionSelector selector = cursor
.getColumnSelectorFactory() .getColumnSelectorFactory()
@ -346,7 +352,8 @@ public class ExpressionSelectorBenchmark
) )
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole); consumeLong(cursor, selector, blackhole);
@ -368,7 +375,8 @@ public class ExpressionSelectorBenchmark
) )
) )
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole); consumeLong(cursor, selector, blackhole);
@ -390,7 +398,8 @@ public class ExpressionSelectorBenchmark
) )
) )
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole); consumeLong(cursor, selector, blackhole);
@ -412,7 +421,8 @@ public class ExpressionSelectorBenchmark
) )
) )
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole); consumeLong(cursor, selector, blackhole);
@ -447,7 +457,8 @@ public class ExpressionSelectorBenchmark
) )
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole); consumeLong(cursor, selector, blackhole);
@ -476,7 +487,8 @@ public class ExpressionSelectorBenchmark
) )
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole); consumeLong(cursor, selector, blackhole);
@ -513,7 +525,8 @@ public class ExpressionSelectorBenchmark
) )
.build(); .build();
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) { final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v"); final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole); consumeLong(cursor, selector, blackhole);

View File

@ -35,7 +35,7 @@ import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.CursorBuildSpec; import org.apache.druid.segment.CursorBuildSpec;
import org.apache.druid.segment.CursorHolder; import org.apache.druid.segment.CursorHolder;
import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.QueryableIndexCursorFactory;
import org.apache.druid.segment.VirtualColumns; import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.GeneratorSchemaInfo;
@ -155,7 +155,7 @@ public class ExpressionVectorSelectorBenchmark
.setVirtualColumns(virtualColumns) .setVirtualColumns(virtualColumns)
.build(); .build();
final CursorHolder cursorHolder = closer.register( final CursorHolder cursorHolder = closer.register(
new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec) new QueryableIndexCursorFactory(index).makeCursorHolder(buildSpec)
); );
if (vectorize) { if (vectorize) {
VectorCursor cursor = cursorHolder.asVectorCursor(); VectorCursor cursor = cursorHolder.asVectorCursor();

View File

@ -46,14 +46,14 @@ import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.segment.BaseLongColumnValueSelector; import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.Cursor; import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.CursorBuildSpec; import org.apache.druid.segment.CursorBuildSpec;
import org.apache.druid.segment.CursorFactory;
import org.apache.druid.segment.CursorHolder; import org.apache.druid.segment.CursorHolder;
import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.IndexIO; import org.apache.druid.segment.IndexIO;
import org.apache.druid.segment.IndexMergerV9; import org.apache.druid.segment.IndexMergerV9;
import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.segment.QueryableIndexCursorFactory;
import org.apache.druid.segment.StorageAdapter;
import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.IndexedInts;
@ -231,8 +231,8 @@ public class FilterPartitionBenchmark
@OutputTimeUnit(TimeUnit.MICROSECONDS) @OutputTimeUnit(TimeUnit.MICROSECONDS)
public void stringRead(Blackhole blackhole) public void stringRead(Blackhole blackhole)
{ {
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, null)) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, null)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursor(cursor, blackhole); readCursor(cursor, blackhole);
} }
@ -243,8 +243,8 @@ public class FilterPartitionBenchmark
@OutputTimeUnit(TimeUnit.MICROSECONDS) @OutputTimeUnit(TimeUnit.MICROSECONDS)
public void longRead(Blackhole blackhole) public void longRead(Blackhole blackhole)
{ {
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, null)) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, null)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursorLong(cursor, blackhole); readCursorLong(cursor, blackhole);
} }
@ -255,8 +255,8 @@ public class FilterPartitionBenchmark
@OutputTimeUnit(TimeUnit.MICROSECONDS) @OutputTimeUnit(TimeUnit.MICROSECONDS)
public void timeFilterNone(Blackhole blackhole) public void timeFilterNone(Blackhole blackhole)
{ {
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (CursorHolder cursorHolder = makeCursorHolder(sa, timeFilterNone)) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, timeFilterNone)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursorLong(cursor, blackhole); readCursorLong(cursor, blackhole);
} }
@ -267,8 +267,8 @@ public class FilterPartitionBenchmark
@OutputTimeUnit(TimeUnit.MICROSECONDS) @OutputTimeUnit(TimeUnit.MICROSECONDS)
public void timeFilterHalf(Blackhole blackhole) public void timeFilterHalf(Blackhole blackhole)
{ {
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, timeFilterHalf)) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, timeFilterHalf)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursorLong(cursor, blackhole); readCursorLong(cursor, blackhole);
} }
@ -279,8 +279,8 @@ public class FilterPartitionBenchmark
@OutputTimeUnit(TimeUnit.MICROSECONDS) @OutputTimeUnit(TimeUnit.MICROSECONDS)
public void timeFilterAll(Blackhole blackhole) public void timeFilterAll(Blackhole blackhole)
{ {
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, timeFilterAll)) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, timeFilterAll)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursorLong(cursor, blackhole); readCursorLong(cursor, blackhole);
} }
@ -293,8 +293,8 @@ public class FilterPartitionBenchmark
{ {
Filter filter = new SelectorFilter("dimSequential", "199"); Filter filter = new SelectorFilter("dimSequential", "199");
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, filter)) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, filter)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursor(cursor, blackhole); readCursor(cursor, blackhole);
} }
@ -307,8 +307,8 @@ public class FilterPartitionBenchmark
{ {
Filter filter = new NoBitmapSelectorFilter("dimSequential", "199"); Filter filter = new NoBitmapSelectorFilter("dimSequential", "199");
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, filter)) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, filter)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursor(cursor, blackhole); readCursor(cursor, blackhole);
} }
@ -321,8 +321,8 @@ public class FilterPartitionBenchmark
{ {
Filter filter = new SelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter(); Filter filter = new SelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter();
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, filter)) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, filter)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursor(cursor, blackhole); readCursor(cursor, blackhole);
} }
@ -335,8 +335,8 @@ public class FilterPartitionBenchmark
{ {
Filter filter = new NoBitmapSelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter(); Filter filter = new NoBitmapSelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter();
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, filter)) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, filter)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursor(cursor, blackhole); readCursor(cursor, blackhole);
} }
@ -354,8 +354,8 @@ public class FilterPartitionBenchmark
) )
); );
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, andFilter)) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, andFilter)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursor(cursor, blackhole); readCursor(cursor, blackhole);
} }
@ -370,8 +370,8 @@ public class FilterPartitionBenchmark
Filter filter2 = new AndFilter(Arrays.asList(new SelectorFilter("dimMultivalEnumerated2", "Corundum"), new NoBitmapSelectorFilter("dimMultivalEnumerated", "Bar"))); Filter filter2 = new AndFilter(Arrays.asList(new SelectorFilter("dimMultivalEnumerated2", "Corundum"), new NoBitmapSelectorFilter("dimMultivalEnumerated", "Bar")));
Filter orFilter = new OrFilter(Arrays.asList(filter, filter2)); Filter orFilter = new OrFilter(Arrays.asList(filter, filter2));
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, orFilter)) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, orFilter)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursor(cursor, blackhole); readCursor(cursor, blackhole);
} }
@ -386,8 +386,8 @@ public class FilterPartitionBenchmark
Filter filter2 = new AndFilter(Arrays.asList(new SelectorFilter("dimMultivalEnumerated2", "Corundum"), new NoBitmapSelectorFilter("dimMultivalEnumerated", "Bar"))); Filter filter2 = new AndFilter(Arrays.asList(new SelectorFilter("dimMultivalEnumerated2", "Corundum"), new NoBitmapSelectorFilter("dimMultivalEnumerated", "Bar")));
Filter orFilter = new OrFilter(Arrays.asList(filter, filter2)); Filter orFilter = new OrFilter(Arrays.asList(filter, filter2));
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, Filters.toCnf(orFilter))) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, Filters.toCnf(orFilter))) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursor(cursor, blackhole); readCursor(cursor, blackhole);
} }
@ -425,8 +425,8 @@ public class FilterPartitionBenchmark
)) ))
); );
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, dimFilter3.toFilter())) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, dimFilter3.toFilter())) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursor(cursor, blackhole); readCursor(cursor, blackhole);
} }
@ -464,16 +464,16 @@ public class FilterPartitionBenchmark
)) ))
); );
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex); final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
try (final CursorHolder cursorHolder = makeCursorHolder(sa, Filters.toCnf(dimFilter3.toFilter()))) { try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, Filters.toCnf(dimFilter3.toFilter()))) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
readCursor(cursor, blackhole); readCursor(cursor, blackhole);
} }
} }
private CursorHolder makeCursorHolder(StorageAdapter sa, Filter filter) private CursorHolder makeCursorHolder(CursorFactory factory, Filter filter)
{ {
return sa.makeCursorHolder( return factory.makeCursorHolder(
CursorBuildSpec.builder() CursorBuildSpec.builder()
.setFilter(filter) .setFilter(filter)
.setInterval(schemaInfo.getDataInterval()) .setInterval(schemaInfo.getDataInterval())

View File

@ -378,7 +378,6 @@ public class GroupByTypeInterfaceBenchmark
final GroupingEngine groupingEngine = new GroupingEngine( final GroupingEngine groupingEngine = new GroupingEngine(
druidProcessingConfig, druidProcessingConfig,
configSupplier, configSupplier,
bufferPool,
groupByResourcesReservationPool, groupByResourcesReservationPool,
TestHelper.makeJsonMapper(), TestHelper.makeJsonMapper(),
new ObjectMapper(new SmileFactory()), new ObjectMapper(new SmileFactory()),
@ -387,7 +386,8 @@ public class GroupByTypeInterfaceBenchmark
factory = new GroupByQueryRunnerFactory( factory = new GroupByQueryRunnerFactory(
groupingEngine, groupingEngine,
new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool) new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool),
bufferPool
); );
} }

View File

@ -238,7 +238,7 @@ public class IndexedTableJoinCursorBenchmark
private CursorHolder makeCursorHolder() private CursorHolder makeCursorHolder()
{ {
return hashJoinSegment.asStorageAdapter().makeCursorHolder(CursorBuildSpec.FULL_SCAN); return hashJoinSegment.asCursorFactory().makeCursorHolder(CursorBuildSpec.FULL_SCAN);
} }

View File

@ -369,7 +369,7 @@ public class JoinAndLookupBenchmark
@OutputTimeUnit(TimeUnit.MILLISECONDS) @OutputTimeUnit(TimeUnit.MILLISECONDS)
public void baseSegment(Blackhole blackhole) public void baseSegment(Blackhole blackhole)
{ {
try (final CursorHolder cursorHolder = baseSegment.asStorageAdapter().makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { try (final CursorHolder cursorHolder = baseSegment.asCursorFactory().makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, "countryIsoCode")); blackhole.consume(getLastValue(cursor, "countryIsoCode"));
} }
@ -384,7 +384,7 @@ public class JoinAndLookupBenchmark
final CursorBuildSpec buildSpec = CursorBuildSpec.builder() final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
.setFilter(filter) .setFilter(filter)
.build(); .build();
try (final CursorHolder cursorHolder = baseSegment.asStorageAdapter().makeCursorHolder(buildSpec)) { try (final CursorHolder cursorHolder = baseSegment.asCursorFactory().makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, "countryIsoCode")); blackhole.consume(getLastValue(cursor, "countryIsoCode"));
} }
@ -395,7 +395,7 @@ public class JoinAndLookupBenchmark
@OutputTimeUnit(TimeUnit.MILLISECONDS) @OutputTimeUnit(TimeUnit.MILLISECONDS)
public void joinLookupStringKey(Blackhole blackhole) public void joinLookupStringKey(Blackhole blackhole)
{ {
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter() try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { .makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, "c.v")); blackhole.consume(getLastValue(cursor, "c.v"));
@ -411,7 +411,7 @@ public class JoinAndLookupBenchmark
final CursorBuildSpec buildSpec = CursorBuildSpec.builder() final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
.setFilter(filter) .setFilter(filter)
.build(); .build();
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter() try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
.makeCursorHolder(buildSpec)) { .makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, "c.v")); blackhole.consume(getLastValue(cursor, "c.v"));
@ -423,7 +423,7 @@ public class JoinAndLookupBenchmark
@OutputTimeUnit(TimeUnit.MILLISECONDS) @OutputTimeUnit(TimeUnit.MILLISECONDS)
public void joinLookupLongKey(Blackhole blackhole) public void joinLookupLongKey(Blackhole blackhole)
{ {
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter() try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { .makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, "c.v")); blackhole.consume(getLastValue(cursor, "c.v"));
@ -439,7 +439,7 @@ public class JoinAndLookupBenchmark
final CursorBuildSpec buildSpec = CursorBuildSpec.builder() final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
.setFilter(filter) .setFilter(filter)
.build(); .build();
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter() try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
.makeCursorHolder(buildSpec)) { .makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, "c.v")); blackhole.consume(getLastValue(cursor, "c.v"));
@ -451,7 +451,7 @@ public class JoinAndLookupBenchmark
@OutputTimeUnit(TimeUnit.MILLISECONDS) @OutputTimeUnit(TimeUnit.MILLISECONDS)
public void joinIndexedTableLongKey(Blackhole blackhole) public void joinIndexedTableLongKey(Blackhole blackhole)
{ {
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter() try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { .makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, "c.countryName")); blackhole.consume(getLastValue(cursor, "c.countryName"));
@ -467,7 +467,7 @@ public class JoinAndLookupBenchmark
final CursorBuildSpec buildSpec = CursorBuildSpec.builder() final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
.setFilter(filter) .setFilter(filter)
.build(); .build();
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter() try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
.makeCursorHolder(buildSpec)) { .makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, "c.countryName")); blackhole.consume(getLastValue(cursor, "c.countryName"));
@ -479,7 +479,7 @@ public class JoinAndLookupBenchmark
@OutputTimeUnit(TimeUnit.MILLISECONDS) @OutputTimeUnit(TimeUnit.MILLISECONDS)
public void joinIndexedTableStringKey(Blackhole blackhole) public void joinIndexedTableStringKey(Blackhole blackhole)
{ {
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter() try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { .makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, "c.countryName")); blackhole.consume(getLastValue(cursor, "c.countryName"));
@ -495,7 +495,7 @@ public class JoinAndLookupBenchmark
final CursorBuildSpec buildSpec = CursorBuildSpec.builder() final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
.setFilter(filter) .setFilter(filter)
.build(); .build();
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter() try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
.makeCursorHolder(buildSpec)) { .makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, "c.countryName")); blackhole.consume(getLastValue(cursor, "c.countryName"));
@ -510,7 +510,7 @@ public class JoinAndLookupBenchmark
final CursorBuildSpec buildSpec = CursorBuildSpec.builder() final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
.setVirtualColumns(lookupVirtualColumns) .setVirtualColumns(lookupVirtualColumns)
.build(); .build();
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter() try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
.makeCursorHolder(buildSpec)) { .makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_CODE_TO_NAME)); blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_CODE_TO_NAME));
@ -527,7 +527,7 @@ public class JoinAndLookupBenchmark
.setFilter(filter) .setFilter(filter)
.setVirtualColumns(lookupVirtualColumns) .setVirtualColumns(lookupVirtualColumns)
.build(); .build();
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter() try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
.makeCursorHolder(buildSpec)) { .makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_CODE_TO_NAME)); blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_CODE_TO_NAME));
@ -542,7 +542,7 @@ public class JoinAndLookupBenchmark
final CursorBuildSpec buildSpec = CursorBuildSpec.builder() final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
.setVirtualColumns(lookupVirtualColumns) .setVirtualColumns(lookupVirtualColumns)
.build(); .build();
try (final CursorHolder cursorHolder = baseSegment.asStorageAdapter().makeCursorHolder(buildSpec)) { try (final CursorHolder cursorHolder = baseSegment.asCursorFactory().makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_NUMBER_TO_NAME)); blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_NUMBER_TO_NAME));
} }
@ -558,7 +558,7 @@ public class JoinAndLookupBenchmark
.setVirtualColumns(lookupVirtualColumns) .setVirtualColumns(lookupVirtualColumns)
.setFilter(filter) .setFilter(filter)
.build(); .build();
try (final CursorHolder cursorHolder = baseSegment.asStorageAdapter().makeCursorHolder(buildSpec)) { try (final CursorHolder cursorHolder = baseSegment.asCursorFactory().makeCursorHolder(buildSpec)) {
final Cursor cursor = cursorHolder.asCursor(); final Cursor cursor = cursorHolder.asCursor();
blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_NUMBER_TO_NAME)); blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_NUMBER_TO_NAME));
} }

View File

@ -23,7 +23,7 @@ import com.google.common.base.Supplier;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.FileUtils;
import org.apache.druid.java.util.common.MappedByteBufferHandler; import org.apache.druid.java.util.common.MappedByteBufferHandler;
import org.apache.druid.segment.QueryableIndexStorageAdapter; import org.apache.druid.query.QueryContexts;
import org.apache.druid.segment.data.ColumnarLongs; import org.apache.druid.segment.data.ColumnarLongs;
import org.apache.druid.segment.data.CompressedColumnarLongsSupplier; import org.apache.druid.segment.data.CompressedColumnarLongsSupplier;
import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Benchmark;
@ -118,7 +118,7 @@ public class LongCompressionBenchmark
@Benchmark @Benchmark
public void readVectorizedContinuous(Blackhole bh) public void readVectorizedContinuous(Blackhole bh)
{ {
long[] vector = new long[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE]; long[] vector = new long[QueryContexts.DEFAULT_VECTOR_SIZE];
ColumnarLongs columnarLongs = supplier.get(); ColumnarLongs columnarLongs = supplier.get();
int count = columnarLongs.size(); int count = columnarLongs.size();
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {

View File

@ -21,6 +21,7 @@ package org.apache.druid.benchmark.frame;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.MoreExecutors;
import org.apache.druid.common.config.NullHandling; import org.apache.druid.common.config.NullHandling;
import org.apache.druid.common.guava.FutureUtils; import org.apache.druid.common.guava.FutureUtils;
@ -203,6 +204,7 @@ public class FrameChannelMergerBenchmark
private final List<KeyColumn> sortKey = ImmutableList.of(new KeyColumn(KEY, KeyOrder.ASCENDING)); private final List<KeyColumn> sortKey = ImmutableList.of(new KeyColumn(KEY, KeyOrder.ASCENDING));
private List<List<Frame>> channelFrames; private List<List<Frame>> channelFrames;
private ListeningExecutorService innerExec;
private FrameProcessorExecutor exec; private FrameProcessorExecutor exec;
private List<BlockingQueueFrameChannel> channels; private List<BlockingQueueFrameChannel> channels;
@ -226,7 +228,7 @@ public class FrameChannelMergerBenchmark
frameReader = FrameReader.create(signature); frameReader = FrameReader.create(signature);
exec = new FrameProcessorExecutor( exec = new FrameProcessorExecutor(
MoreExecutors.listeningDecorator( innerExec = MoreExecutors.listeningDecorator(
Execs.singleThreaded(StringUtils.encodeForFormat(getClass().getSimpleName())) Execs.singleThreaded(StringUtils.encodeForFormat(getClass().getSimpleName()))
) )
); );
@ -284,7 +286,7 @@ public class FrameChannelMergerBenchmark
signature signature
); );
final Sequence<Frame> frameSequence = final Sequence<Frame> frameSequence =
FrameSequenceBuilder.fromAdapter(segment.asStorageAdapter()) FrameSequenceBuilder.fromCursorFactory(segment.asCursorFactory())
.allocator(ArenaMemoryAllocator.createOnHeap(10_000_000)) .allocator(ArenaMemoryAllocator.createOnHeap(10_000_000))
.frameType(FrameType.ROW_BASED) .frameType(FrameType.ROW_BASED)
.frames(); .frames();
@ -335,8 +337,8 @@ public class FrameChannelMergerBenchmark
@TearDown(Level.Trial) @TearDown(Level.Trial)
public void tearDown() throws Exception public void tearDown() throws Exception
{ {
exec.getExecutorService().shutdownNow(); innerExec.shutdownNow();
if (!exec.getExecutorService().awaitTermination(1, TimeUnit.MINUTES)) { if (!innerExec.awaitTermination(1, TimeUnit.MINUTES)) {
throw new ISE("Could not terminate executor after 1 minute"); throw new ISE("Could not terminate executor after 1 minute");
} }
} }

View File

@ -35,6 +35,7 @@ import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.search.ContainsSearchQuerySpec; import org.apache.druid.query.search.ContainsSearchQuerySpec;
import org.apache.druid.segment.Cursor; import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.CursorBuildSpec; import org.apache.druid.segment.CursorBuildSpec;
import org.apache.druid.segment.CursorFactory;
import org.apache.druid.segment.CursorHolder; import org.apache.druid.segment.CursorHolder;
import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.data.IndexedInts; import org.apache.druid.segment.data.IndexedInts;
@ -44,8 +45,8 @@ import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.incremental.AppendableIndexSpec; import org.apache.druid.segment.incremental.AppendableIndexSpec;
import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexCreator; import org.apache.druid.segment.incremental.IncrementalIndexCreator;
import org.apache.druid.segment.incremental.IncrementalIndexCursorFactory;
import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
import org.apache.druid.segment.serde.ComplexMetrics; import org.apache.druid.segment.serde.ComplexMetrics;
import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.BenchmarkMode;
@ -147,8 +148,8 @@ public class IncrementalIndexReadBenchmark
@OutputTimeUnit(TimeUnit.MICROSECONDS) @OutputTimeUnit(TimeUnit.MICROSECONDS)
public void read(Blackhole blackhole) public void read(Blackhole blackhole)
{ {
IncrementalIndexStorageAdapter sa = new IncrementalIndexStorageAdapter(incIndex); final CursorFactory cursorFactory = new IncrementalIndexCursorFactory(incIndex);
try (final CursorHolder cursorHolder = makeCursor(sa, null)) { try (final CursorHolder cursorHolder = makeCursor(cursorFactory, null)) {
Cursor cursor = cursorHolder.asCursor(); Cursor cursor = cursorHolder.asCursor();
List<DimensionSelector> selectors = new ArrayList<>(); List<DimensionSelector> selectors = new ArrayList<>();
@ -183,8 +184,8 @@ public class IncrementalIndexReadBenchmark
) )
); );
IncrementalIndexStorageAdapter sa = new IncrementalIndexStorageAdapter(incIndex); IncrementalIndexCursorFactory cursorFactory = new IncrementalIndexCursorFactory(incIndex);
try (final CursorHolder cursorHolder = makeCursor(sa, filter)) { try (final CursorHolder cursorHolder = makeCursor(cursorFactory, filter)) {
Cursor cursor = cursorHolder.asCursor(); Cursor cursor = cursorHolder.asCursor();
List<DimensionSelector> selectors = new ArrayList<>(); List<DimensionSelector> selectors = new ArrayList<>();
@ -204,14 +205,14 @@ public class IncrementalIndexReadBenchmark
} }
} }
private CursorHolder makeCursor(IncrementalIndexStorageAdapter sa, DimFilter filter) private CursorHolder makeCursor(CursorFactory factory, DimFilter filter)
{ {
CursorBuildSpec.CursorBuildSpecBuilder builder = CursorBuildSpec.builder() CursorBuildSpec.CursorBuildSpecBuilder builder = CursorBuildSpec.builder()
.setInterval(schemaInfo.getDataInterval()); .setInterval(schemaInfo.getDataInterval());
if (filter != null) { if (filter != null) {
builder.setFilter(filter.toFilter()); builder.setFilter(filter.toFilter());
} }
return sa.makeCursorHolder(builder.build()); return factory.makeCursorHolder(builder.build());
} }
private static DimensionSelector makeDimensionSelector(Cursor cursor, String name) private static DimensionSelector makeDimensionSelector(Cursor cursor, String name)

View File

@ -362,14 +362,13 @@ public class CachingClusteredClientBenchmark
final GroupingEngine groupingEngine = new GroupingEngine( final GroupingEngine groupingEngine = new GroupingEngine(
processingConfig, processingConfig,
configSupplier, configSupplier,
bufferPool,
groupByResourcesReservationPool, groupByResourcesReservationPool,
mapper, mapper,
mapper, mapper,
QueryRunnerTestHelper.NOOP_QUERYWATCHER QueryRunnerTestHelper.NOOP_QUERYWATCHER
); );
final GroupByQueryQueryToolChest toolChest = new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool); final GroupByQueryQueryToolChest toolChest = new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool);
return new GroupByQueryRunnerFactory(groupingEngine, toolChest); return new GroupByQueryRunnerFactory(groupingEngine, toolChest, bufferPool);
} }
@TearDown(Level.Trial) @TearDown(Level.Trial)

View File

@ -495,7 +495,6 @@ public class GroupByBenchmark
final GroupingEngine groupingEngine = new GroupingEngine( final GroupingEngine groupingEngine = new GroupingEngine(
druidProcessingConfig, druidProcessingConfig,
configSupplier, configSupplier,
bufferPool,
groupByResourcesReservationPool, groupByResourcesReservationPool,
TestHelper.makeJsonMapper(), TestHelper.makeJsonMapper(),
new ObjectMapper(new SmileFactory()), new ObjectMapper(new SmileFactory()),
@ -504,7 +503,8 @@ public class GroupByBenchmark
factory = new GroupByQueryRunnerFactory( factory = new GroupByQueryRunnerFactory(
groupingEngine, groupingEngine,
new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool) new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool),
bufferPool
); );
} }

View File

@ -57,8 +57,8 @@ import org.apache.druid.query.lookup.LookupExtractor;
import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexCursorFactory;
import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.QueryableIndexStorageAdapter;
import org.apache.druid.segment.column.StringEncodingStrategy; import org.apache.druid.segment.column.StringEncodingStrategy;
import org.apache.druid.segment.data.FrontCodedIndexed; import org.apache.druid.segment.data.FrontCodedIndexed;
import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorBasicSchemas;
@ -694,8 +694,8 @@ public class SqlBenchmark
} else if (STORAGE_FRAME_ROW.equals(storageType)) { } else if (STORAGE_FRAME_ROW.equals(storageType)) {
walker.add( walker.add(
descriptor, descriptor,
FrameTestUtil.adapterToFrameSegment( FrameTestUtil.cursorFactoryToFrameSegment(
new QueryableIndexStorageAdapter(index), new QueryableIndexCursorFactory(index),
FrameType.ROW_BASED, FrameType.ROW_BASED,
descriptor.getId() descriptor.getId()
) )
@ -703,8 +703,8 @@ public class SqlBenchmark
} else if (STORAGE_FRAME_COLUMNAR.equals(storageType)) { } else if (STORAGE_FRAME_COLUMNAR.equals(storageType)) {
walker.add( walker.add(
descriptor, descriptor,
FrameTestUtil.adapterToFrameSegment( FrameTestUtil.cursorFactoryToFrameSegment(
new QueryableIndexStorageAdapter(index), new QueryableIndexCursorFactory(index),
FrameType.COLUMNAR, FrameType.COLUMNAR,
descriptor.getId() descriptor.getId()
) )

View File

@ -55,8 +55,8 @@ import org.apache.druid.query.lookup.LookupExtractor;
import org.apache.druid.segment.AutoTypeColumnSchema; import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.QueryableIndexCursorFactory;
import org.apache.druid.segment.QueryableIndexSegment; import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.QueryableIndexStorageAdapter;
import org.apache.druid.segment.column.StringEncodingStrategy; import org.apache.druid.segment.column.StringEncodingStrategy;
import org.apache.druid.segment.generator.GeneratorBasicSchemas; import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo; import org.apache.druid.segment.generator.GeneratorSchemaInfo;
@ -159,6 +159,12 @@ public class SqlWindowFunctionsBenchmark
{ {
return 3; return 3;
} }
@Override
public int intermediateComputeSizeBytes()
{
return 200_000_000;
}
}; };
@Setup(Level.Trial) @Setup(Level.Trial)
@ -281,8 +287,8 @@ public class SqlWindowFunctionsBenchmark
} else if (STORAGE_FRAME_ROW.equals(storageType)) { } else if (STORAGE_FRAME_ROW.equals(storageType)) {
walker.add( walker.add(
descriptor, descriptor,
FrameTestUtil.adapterToFrameSegment( FrameTestUtil.cursorFactoryToFrameSegment(
new QueryableIndexStorageAdapter(index), new QueryableIndexCursorFactory(index),
FrameType.ROW_BASED, FrameType.ROW_BASED,
descriptor.getId() descriptor.getId()
) )
@ -290,8 +296,8 @@ public class SqlWindowFunctionsBenchmark
} else if (STORAGE_FRAME_COLUMNAR.equals(storageType)) { } else if (STORAGE_FRAME_COLUMNAR.equals(storageType)) {
walker.add( walker.add(
descriptor, descriptor,
FrameTestUtil.adapterToFrameSegment( FrameTestUtil.cursorFactoryToFrameSegment(
new QueryableIndexStorageAdapter(index), new QueryableIndexCursorFactory(index),
FrameType.COLUMNAR, FrameType.COLUMNAR,
descriptor.getId() descriptor.getId()
) )
@ -336,7 +342,8 @@ public class SqlWindowFunctionsBenchmark
{ {
final Map<String, Object> context = ImmutableMap.of( final Map<String, Object> context = ImmutableMap.of(
PlannerContext.CTX_ENABLE_WINDOW_FNS, true, PlannerContext.CTX_ENABLE_WINDOW_FNS, true,
QueryContexts.MAX_SUBQUERY_BYTES_KEY, "auto" QueryContexts.MAX_SUBQUERY_BYTES_KEY, "disabled",
QueryContexts.MAX_SUBQUERY_ROWS_KEY, -1
); );
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) { try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) {
final PlannerResult plannerResult = planner.plan(); final PlannerResult plannerResult = planner.plan();
@ -420,4 +427,15 @@ public class SqlWindowFunctionsBenchmark
+ "GROUP BY dimUniform, dimSequential"; + "GROUP BY dimUniform, dimSequential";
querySql(sql, blackhole); querySql(sql, blackhole);
} }
@Benchmark
public void windowWithGroupbyTime(Blackhole blackhole)
{
String sql = "SELECT "
+ "SUM(dimSequentialHalfNull) + SUM(dimHyperUnique), "
+ "LAG(SUM(dimSequentialHalfNull + dimHyperUnique)) OVER (PARTITION BY dimUniform ORDER BY dimSequential) "
+ "FROM foo "
+ "GROUP BY __time, dimUniform, dimSequential";
querySql(sql, blackhole);
}
} }

View File

@ -28,7 +28,7 @@
<parent> <parent>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>

View File

@ -28,7 +28,7 @@
<parent> <parent>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>

19
dev/chmod-heap-dumps.sh Executable file
View File

@ -0,0 +1,19 @@
#!/bin/bash -eux
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
BASE_DIR=$(git rev-parse --show-toplevel)
chmod 644 ${BASE_DIR}/target/*.hprof

View File

@ -30,7 +30,7 @@
<parent> <parent>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
</parent> </parent>
<dependencies> <dependencies>
@ -458,6 +458,8 @@
<argument>org.apache.druid.extensions.contrib:druid-spectator-histogram</argument> <argument>org.apache.druid.extensions.contrib:druid-spectator-histogram</argument>
<argument>-c</argument> <argument>-c</argument>
<argument>org.apache.druid.extensions.contrib:druid-rabbit-indexing-service</argument> <argument>org.apache.druid.extensions.contrib:druid-rabbit-indexing-service</argument>
<argument>-c</argument>
<argument>org.apache.druid.extensions.contrib:grpc-query</argument>
</arguments> </arguments>
</configuration> </configuration>
</execution> </execution>

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 57 KiB

After

Width:  |  Height:  |  Size: 57 KiB

View File

@ -616,9 +616,10 @@ the [HDFS input source](../ingestion/input-sources.md#hdfs-input-source).
You can set the following property to specify permissible protocols for You can set the following property to specify permissible protocols for
the [HTTP input source](../ingestion/input-sources.md#http-input-source). the [HTTP input source](../ingestion/input-sources.md#http-input-source).
|Property|Possible values|Description|Default| |Property| Possible values | Description |Default|
|--------|---------------|-----------|-------| |--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------|-------|
|`druid.ingestion.http.allowedProtocols`|List of protocols|Allowed protocols for the HTTP input source.|`["http", "https"]`| |`druid.ingestion.http.allowedProtocols`| List of protocols | Allowed protocols for the HTTP input source. |`["http", "https"]`|
|`druid.ingestion.http.allowedHeaders`| A list of permitted request headers for the HTTP input source. By default, the list is empty, which means no headers are allowed in the ingestion specification. |`[]`|
### External data access security configuration ### External data access security configuration
@ -1795,6 +1796,7 @@ This strategy can be enabled by setting `druid.query.scheduler.prioritization.st
|`druid.query.scheduler.prioritization.periodThreshold`|ISO duration threshold for how old data can be queried before automatically adjusting query priority.|none| |`druid.query.scheduler.prioritization.periodThreshold`|ISO duration threshold for how old data can be queried before automatically adjusting query priority.|none|
|`druid.query.scheduler.prioritization.durationThreshold`|ISO duration threshold for maximum duration a queries interval can span before the priority is automatically adjusted.|none| |`druid.query.scheduler.prioritization.durationThreshold`|ISO duration threshold for maximum duration a queries interval can span before the priority is automatically adjusted.|none|
|`druid.query.scheduler.prioritization.segmentCountThreshold`|Number threshold for maximum number of segments that can take part in a query before its priority is automatically adjusted.|none| |`druid.query.scheduler.prioritization.segmentCountThreshold`|Number threshold for maximum number of segments that can take part in a query before its priority is automatically adjusted.|none|
|`druid.query.scheduler.prioritization.segmentRangeThreshold`|ISO duration threshold for maximum segment range a query can span before the priority is automatically adjusted.|none|
|`druid.query.scheduler.prioritization.adjustment`|Amount to reduce the priority of queries which cross any threshold.|none| |`druid.query.scheduler.prioritization.adjustment`|Amount to reduce the priority of queries which cross any threshold.|none|
##### Laning strategies ##### Laning strategies

View File

@ -51,9 +51,4 @@ java \
-c "org.apache.druid.extensions.contrib:druid-deltalake-extensions:<VERSION>" -c "org.apache.druid.extensions.contrib:druid-deltalake-extensions:<VERSION>"
``` ```
See [Loading community extensions](../../configuration/extensions.md#loading-community-extensions) for more information. See [Loading community extensions](../../configuration/extensions.md#loading-community-extensions) for more information.
## Known limitations
This extension relies on the Delta Kernel API and can only read from the latest Delta table snapshot. Ability to read from
arbitrary snapshots is tracked [here](https://github.com/delta-io/delta/issues/2581).

View File

@ -31,9 +31,9 @@ This module can be used side to side with other lookup module like the global ca
To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) `druid-lookups-cached-single` in the extensions load list. To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) `druid-lookups-cached-single` in the extensions load list.
:::info :::info
If using JDBC, you will need to add your database's client JAR files to the extension's directory. To use JDBC, you must add your database client JAR files to the extension's directory.
For Postgres, the connector JAR is already included. For Postgres, the connector JAR is already included.
See the MySQL extension documentation for instructions to obtain [MySQL](./mysql.md#installing-the-mysql-connector-library) or [MariaDB](./mysql.md#alternative-installing-the-mariadb-connector-library) connector libraries. See the MySQL extension documentation for instructions to obtain [MySQL](./mysql.md#install-mysql-connectorj) or [MariaDB](./mysql.md#install-mariadb-connectorj) connector libraries.
Copy or symlink the downloaded file to `extensions/druid-lookups-cached-single` under the distribution root directory. Copy or symlink the downloaded file to `extensions/druid-lookups-cached-single` under the distribution root directory.
::: :::

View File

@ -1,6 +1,6 @@
--- ---
id: mysql id: mysql
title: "MySQL Metadata Store" title: "MySQL metadata store"
--- ---
<!-- <!--
@ -25,41 +25,58 @@ title: "MySQL Metadata Store"
To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) `mysql-metadata-storage` in the extensions load list. To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) `mysql-metadata-storage` in the extensions load list.
:::info With the MySQL extension, you can use MySQL as a metadata store or ingest from a MySQL database.
The MySQL extension requires the MySQL Connector/J library or MariaDB Connector/J library, neither of which are included in the Druid distribution.
Refer to the following section for instructions on how to install this library.
:::
## Installing the MySQL connector library The extension requires a connector library that's not included with Druid.
See the [Prerequisites](#prerequisites) for installation instructions.
This extension can use Oracle's MySQL JDBC driver which is not included in the Druid distribution. You must ## Prerequisites
install it separately. There are a few ways to obtain this library:
- It can be downloaded from the MySQL site at: https://dev.mysql.com/downloads/connector/j/ To use the MySQL extension, you need to install one of the following libraries:
- It can be fetched from Maven Central at: https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.2.0/mysql-connector-j-8.2.0.jar * [MySQL Connector/J](#install-mysql-connectorj)
- It may be available through your package manager, e.g. as `libmysql-java` on APT for a Debian-based OS * [MariaDB Connector/J](#install-mariadb-connectorj)
This fetches the MySQL connector JAR file with a name like `mysql-connector-j-8.2.0.jar`. ### Install MySQL Connector/J
Copy or symlink this file inside the folder `extensions/mysql-metadata-storage` under the distribution root directory. The MySQL extension uses Oracle's MySQL JDBC driver.
The current version of Druid uses version 8.2.0.
Other versions may not work with this extension.
## Alternative: Installing the MariaDB connector library You can download the library from one of the following sources:
This extension also supports using the MariaDB connector jar, though it is also not included in the Druid distribution, so you must install it separately. - [MySQL website](https://dev.mysql.com/downloads/connector/j/)
Visit the archives page to access older product versions.
- [Maven Central (direct download)](https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.2.0/mysql-connector-j-8.2.0.jar)
- Your package manager. For example, `libmysql-java` on APT for a Debian-based OS.
- Download from the MariaDB site: https://mariadb.com/downloads/connector The download includes the MySQL connector JAR file with a name like `mysql-connector-j-8.2.0.jar`.
- Download from Maven Central: https://repo1.maven.org/maven2/org/mariadb/jdbc/mariadb-java-client/2.7.3/mariadb-java-client-2.7.3.jar Copy or create a symbolic link to this file inside the `lib` folder in the distribution root directory.
This fetches the MariaDB connector JAR file with a name like `maria-java-client-2.7.3.jar`. ### Install MariaDB Connector/J
Copy or symlink this file to `extensions/mysql-metadata-storage` under the distribution root directory. This extension also supports using the MariaDB connector jar.
The current version of Druid uses version 2.7.3.
Other versions may not work with this extension.
You can download the library from one of the following sources:
- [MariaDB website](https://mariadb.com/downloads/connectors/connectors-data-access/java8-connector)
Click **Show All Files** to access older product versions.
- [Maven Central (direct download)](https://repo1.maven.org/maven2/org/mariadb/jdbc/mariadb-java-client/2.7.3/mariadb-java-client-2.7.3.jar)
The download includes the MariaDB connector JAR file with a name like `maria-java-client-2.7.3.jar`.
Copy or create a symbolic link to this file inside the `lib` folder in the distribution root directory.
To configure the `mysql-metadata-storage` extension to use the MariaDB connector library instead of MySQL, set `druid.metadata.mysql.driver.driverClassName=org.mariadb.jdbc.Driver`. To configure the `mysql-metadata-storage` extension to use the MariaDB connector library instead of MySQL, set `druid.metadata.mysql.driver.driverClassName=org.mariadb.jdbc.Driver`.
Depending on the MariaDB client library version, the connector supports both `jdbc:mysql:` and `jdbc:mariadb:` connection URIs. However, the parameters to configure the connection vary between implementations, so be sure to [check the documentation](https://mariadb.com/kb/en/about-mariadb-connector-j/#connection-strings) for details. The protocol of the connection string is `jdbc:mysql:` or `jdbc:mariadb:`,
depending on your specific version of the MariaDB client library.
For more information on the parameters to configure a connection,
[see the MariaDB documentation](https://mariadb.com/kb/en/about-mariadb-connector-j/#connection-strings)
for your connector version.
## Setting up MySQL ## Set up MySQL
To avoid issues with upgrades that require schema changes to a large metadata table, consider a MySQL version that supports instant ADD COLUMN semantics. For example, MySQL 8. To avoid issues with upgrades that require schema changes to a large metadata table, consider a MySQL version that supports instant ADD COLUMN semantics. For example, MySQL 8.
@ -90,7 +107,7 @@ This extension also supports using MariaDB server, https://mariadb.org/download/
CREATE DATABASE druid DEFAULT CHARACTER SET utf8mb4; CREATE DATABASE druid DEFAULT CHARACTER SET utf8mb4;
-- create a druid user -- create a druid user
CREATE USER 'druid'@'localhost' IDENTIFIED BY 'diurd'; CREATE USER 'druid'@'localhost' IDENTIFIED BY 'password';
-- grant the user all the permissions on the database we just created -- grant the user all the permissions on the database we just created
GRANT ALL PRIVILEGES ON druid.* TO 'druid'@'localhost'; GRANT ALL PRIVILEGES ON druid.* TO 'druid'@'localhost';
@ -111,10 +128,11 @@ This extension also supports using MariaDB server, https://mariadb.org/download/
If using the MariaDB connector library, set `druid.metadata.mysql.driver.driverClassName=org.mariadb.jdbc.Driver`. If using the MariaDB connector library, set `druid.metadata.mysql.driver.driverClassName=org.mariadb.jdbc.Driver`.
## Encrypting MySQL connections ## Encrypt MySQL connections
This extension provides support for encrypting MySQL connections. To get more information about encrypting MySQL connections using TLS/SSL in general, please refer to this [guide](https://dev.mysql.com/doc/refman/5.7/en/using-encrypted-connections.html).
## Configuration This extension provides support for encrypting MySQL connections. To get more information about encrypting MySQL connections using TLS/SSL in general, please refer to this [guide](https://dev.mysql.com/doc/refman/5.7/en/using-encrypted-connections.html).
## Configuration properties
|Property|Description|Default|Required| |Property|Description|Default|Required|
|--------|-----------|-------|--------| |--------|-----------|-------|--------|
@ -129,7 +147,10 @@ If using the MariaDB connector library, set `druid.metadata.mysql.driver.driverC
|`druid.metadata.mysql.ssl.enabledSSLCipherSuites`|Overrides the existing cipher suites with these cipher suites.|none|no| |`druid.metadata.mysql.ssl.enabledSSLCipherSuites`|Overrides the existing cipher suites with these cipher suites.|none|no|
|`druid.metadata.mysql.ssl.enabledTLSProtocols`|Overrides the TLS protocols with these protocols.|none|no| |`druid.metadata.mysql.ssl.enabledTLSProtocols`|Overrides the TLS protocols with these protocols.|none|no|
### MySQL InputSource ## MySQL input source
The MySQL extension provides an implementation of an SQL input source to ingest data into Druid from a MySQL database.
For more information on the input source parameters, see [SQL input source](../../ingestion/input-sources.md#sql-input-source).
```json ```json
{ {

View File

@ -1,6 +1,6 @@
--- ---
id: postgresql id: postgresql
title: "PostgreSQL Metadata Store" title: "PostgreSQL metadata store"
--- ---
<!-- <!--
@ -25,7 +25,9 @@ title: "PostgreSQL Metadata Store"
To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) `postgresql-metadata-storage` in the extensions load list. To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) `postgresql-metadata-storage` in the extensions load list.
## Setting up PostgreSQL With the PostgreSQL extension, you can use PostgreSQL as a metadata store or ingest from a PostgreSQL database.
## Set up PostgreSQL
To avoid issues with upgrades that require schema changes to a large metadata table, consider a PostgreSQL version that supports instant ADD COLUMN semantics. To avoid issues with upgrades that require schema changes to a large metadata table, consider a PostgreSQL version that supports instant ADD COLUMN semantics.
@ -69,7 +71,7 @@ To avoid issues with upgrades that require schema changes to a large metadata ta
druid.metadata.storage.connector.password=diurd druid.metadata.storage.connector.password=diurd
``` ```
## Configuration ## Configuration properties
In most cases, the configuration options map directly to the [postgres JDBC connection options](https://jdbc.postgresql.org/documentation/use/#connecting-to-the-database). In most cases, the configuration options map directly to the [postgres JDBC connection options](https://jdbc.postgresql.org/documentation/use/#connecting-to-the-database).
@ -87,9 +89,10 @@ In most cases, the configuration options map directly to the [postgres JDBC conn
| `druid.metadata.postgres.ssl.sslPasswordCallback` | The classname of the SSL password provider. | none | no | | `druid.metadata.postgres.ssl.sslPasswordCallback` | The classname of the SSL password provider. | none | no |
| `druid.metadata.postgres.dbTableSchema` | druid meta table schema | `public` | no | | `druid.metadata.postgres.dbTableSchema` | druid meta table schema | `public` | no |
### PostgreSQL InputSource ## PostgreSQL input source
The PostgreSQL extension provides an implementation of an [SQL input source](../../ingestion/input-sources.md) which can be used to ingest data into Druid from a PostgreSQL database. The PostgreSQL extension provides an implementation of an SQL input source to ingest data into Druid from a PostgreSQL database.
For more information on the input source parameters, see [SQL input source](../../ingestion/input-sources.md#sql-input-source).
```json ```json
{ {

View File

@ -125,6 +125,7 @@ Configure the CSV `inputFormat` to load CSV data as follows:
| columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing | | columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing |
| findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) | | findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) |
| skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) | | skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) |
| tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. This parsing also applies to values separated by `listDelimiter`. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) |
For example: For example:
@ -150,6 +151,7 @@ Configure the TSV `inputFormat` to load TSV data as follows:
| columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing | | columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing |
| findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) | | findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) |
| skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) | | skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) |
| tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. This parsing also applies to values separated by `listDelimiter`. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) |
Be sure to change the `delimiter` to the appropriate delimiter for your data. Like CSV, you must specify the columns and which subset of the columns you want indexed. Be sure to change the `delimiter` to the appropriate delimiter for your data. Like CSV, you must specify the columns and which subset of the columns you want indexed.

View File

@ -29,10 +29,8 @@ For general information on native batch indexing and parallel task indexing, see
## S3 input source ## S3 input source
:::info :::info Required extension
To use the S3 input source, load the extension [`druid-s3-extensions`](../development/extensions-core/s3.md) in your `common.runtime.properties` file.
You need to include the [`druid-s3-extensions`](../development/extensions-core/s3.md) as an extension to use the S3 input source.
::: :::
The S3 input source reads objects directly from S3. You can specify either: The S3 input source reads objects directly from S3. You can specify either:
@ -41,7 +39,7 @@ The S3 input source reads objects directly from S3. You can specify either:
* a list of S3 location prefixes that attempts to list the contents and ingest * a list of S3 location prefixes that attempts to list the contents and ingest
all objects contained within the locations. all objects contained within the locations.
The S3 input source is splittable. Therefore, you can use it with the [Parallel task](./native-batch.md). Each worker task of `index_parallel` reads one or multiple objects. The S3 input source is splittable. Therefore, you can use it with the [parallel task](./native-batch.md). Each worker task of `index_parallel` reads one or multiple objects.
Sample specs: Sample specs:
@ -219,16 +217,14 @@ If `accessKeyId` and `secretAccessKey` are not given, the default [S3 credential
## Google Cloud Storage input source ## Google Cloud Storage input source
:::info :::info Required extension
To use the Google Cloud Storage input source, load the extension [`druid-google-extensions`](../development/extensions-core/google.md) in your `common.runtime.properties` file.
You need to include the [`druid-google-extensions`](../development/extensions-core/google.md) as an extension to use the Google Cloud Storage input source.
::: :::
The Google Cloud Storage input source is to support reading objects directly The Google Cloud Storage input source is to support reading objects directly
from Google Cloud Storage. Objects can be specified as list of Google from Google Cloud Storage. Objects can be specified as list of Google
Cloud Storage URI strings. The Google Cloud Storage input source is splittable Cloud Storage URI strings. The Google Cloud Storage input source is splittable
and can be used by the [Parallel task](./native-batch.md), where each worker task of `index_parallel` will read and can be used by the [parallel task](./native-batch.md), where each worker task of `index_parallel` will read
one or multiple objects. one or multiple objects.
Sample specs: Sample specs:
@ -307,14 +303,12 @@ Google Cloud Storage object:
## Azure input source ## Azure input source
:::info :::info Required extension
To use the Azure input source, load the extension [`druid-azure-extensions`](../development/extensions-core/azure.md) in your `common.runtime.properties` file.
You need to include the [`druid-azure-extensions`](../development/extensions-core/azure.md) as an extension to use the Azure input source.
::: :::
The Azure input source (that uses the type `azureStorage`) reads objects directly from Azure Blob store or Azure Data Lake sources. You can The Azure input source (that uses the type `azureStorage`) reads objects directly from Azure Blob store or Azure Data Lake sources. You can
specify objects as a list of file URI strings or prefixes. You can split the Azure input source for use with [Parallel task](./native-batch.md) indexing and each worker task reads one chunk of the split data. specify objects as a list of file URI strings or prefixes. You can split the Azure input source for use with [parallel task](./native-batch.md) indexing and each worker task reads one chunk of the split data.
The `azureStorage` input source is a new schema for Azure input sources that allows you to specify which storage account files should be ingested from. We recommend that you update any specs that use the old `azure` schema to use the new `azureStorage` schema. The new schema provides more functionality than the older `azure` schema. The `azureStorage` input source is a new schema for Azure input sources that allows you to specify which storage account files should be ingested from. We recommend that you update any specs that use the old `azure` schema to use the new `azureStorage` schema. The new schema provides more functionality than the older `azure` schema.
@ -491,15 +485,13 @@ The `objects` property is:
## HDFS input source ## HDFS input source
:::info :::info Required extension
To use the HDFS input source, load the extension [`druid-hdfs-storage`](../development/extensions-core/hdfs.md) in your `common.runtime.properties` file.
You need to include the [`druid-hdfs-storage`](../development/extensions-core/hdfs.md) as an extension to use the HDFS input source.
::: :::
The HDFS input source is to support reading files directly The HDFS input source is to support reading files directly
from HDFS storage. File paths can be specified as an HDFS URI string or a list from HDFS storage. File paths can be specified as an HDFS URI string or a list
of HDFS URI strings. The HDFS input source is splittable and can be used by the [Parallel task](./native-batch.md), of HDFS URI strings. The HDFS input source is splittable and can be used by the [parallel task](./native-batch.md),
where each worker task of `index_parallel` will read one or multiple files. where each worker task of `index_parallel` will read one or multiple files.
Sample specs: Sample specs:
@ -593,7 +585,7 @@ The `http` input source is not limited to the HTTP or HTTPS protocols. It uses t
For more information about security best practices, see [Security overview](../operations/security-overview.md#best-practices). For more information about security best practices, see [Security overview](../operations/security-overview.md#best-practices).
The HTTP input source is _splittable_ and can be used by the [Parallel task](./native-batch.md), The HTTP input source is _splittable_ and can be used by the [parallel task](./native-batch.md),
where each worker task of `index_parallel` will read only one file. This input source does not support Split Hint Spec. where each worker task of `index_parallel` will read only one file. This input source does not support Split Hint Spec.
Sample specs: Sample specs:
@ -701,7 +693,7 @@ Sample spec:
The Local input source is to support reading files directly from local storage, The Local input source is to support reading files directly from local storage,
and is mainly intended for proof-of-concept testing. and is mainly intended for proof-of-concept testing.
The Local input source is _splittable_ and can be used by the [Parallel task](./native-batch.md), The Local input source is _splittable_ and can be used by the [parallel task](./native-batch.md),
where each worker task of `index_parallel` will read one or multiple files. where each worker task of `index_parallel` will read one or multiple files.
Sample spec: Sample spec:
@ -736,7 +728,7 @@ Sample spec:
The Druid input source is to support reading data directly from existing Druid segments, The Druid input source is to support reading data directly from existing Druid segments,
potentially using a new schema and changing the name, dimensions, metrics, rollup, etc. of the segment. potentially using a new schema and changing the name, dimensions, metrics, rollup, etc. of the segment.
The Druid input source is _splittable_ and can be used by the [Parallel task](./native-batch.md). The Druid input source is _splittable_ and can be used by the [parallel task](./native-batch.md).
This input source has a fixed input format for reading from Druid segments; This input source has a fixed input format for reading from Druid segments;
no `inputFormat` field needs to be specified in the ingestion spec when using this input source. no `inputFormat` field needs to be specified in the ingestion spec when using this input source.
@ -833,17 +825,29 @@ For more information on the `maxNumConcurrentSubTasks` field, see [Implementatio
## SQL input source ## SQL input source
:::info Required extension
To use the SQL input source, you must load the appropriate extension in your `common.runtime.properties` file.
* To connect to MySQL, load the extension [`mysql-metadata-storage`](../development/extensions-core/mysql.md).
* To connect to PostgreSQL, load the extension [`postgresql-metadata-storage`](../development/extensions-core/postgresql.md).
The MySQL extension requires a JDBC driver.
For more information, see the [Installing the MySQL connector library](../development/extensions-core/mysql.md).
:::
The SQL input source is used to read data directly from RDBMS. The SQL input source is used to read data directly from RDBMS.
The SQL input source is _splittable_ and can be used by the [Parallel task](./native-batch.md), where each worker task will read from one SQL query from the list of queries. You can _split_ the ingestion tasks for a SQL input source. When you use the [parallel task](./native-batch.md) type, each worker task reads from one SQL query from the list of queries.
This input source does not support Split Hint Spec. This input source does not support Split Hint Spec.
Since this input source has a fixed input format for reading events, no `inputFormat` field needs to be specified in the ingestion spec when using this input source.
Please refer to the Recommended practices section below before using this input source. The SQL input source has a fixed input format for reading events.
Don't specify `inputFormat` when using this input source.
Refer to the [recommended practices](#recommended-practices) before using this input source.
|Property|Description|Required| |Property|Description|Required|
|--------|-----------|---------| |--------|-----------|---------|
|type|Set the value to `sql`.|Yes| |type|Set the value to `sql`.|Yes|
|database|Specifies the database connection details. The database type corresponds to the extension that supplies the `connectorConfig` support. The specified extension must be loaded into Druid:<br/><br/><ul><li>[mysql-metadata-storage](../development/extensions-core/mysql.md) for `mysql`</li><li> [postgresql-metadata-storage](../development/extensions-core/postgresql.md) extension for `postgresql`.</li></ul><br/><br/>You can selectively allow JDBC properties in `connectURI`. See [JDBC connections security config](../configuration/index.md#jdbc-connections-to-external-databases) for more details.|Yes| |database|Specifies the database connection details. The database type corresponds to the extension that supplies the `connectorConfig` support.<br/><br/>You can selectively allow JDBC properties in `connectURI`. See [JDBC connections security config](../configuration/index.md#jdbc-connections-to-external-databases) for more details.|Yes|
|foldCase|Toggle case folding of database column names. This may be enabled in cases where the database returns case insensitive column names in query results.|No| |foldCase|Boolean to toggle case folding of database column names. For example, to ingest a database column named `Entry_Date` as `entry_date`, set `foldCase` to true and include `entry_date` in the [`dimensionsSpec`](ingestion-spec.md#dimensionsspec).|No|
|sqls|List of SQL queries where each SQL query would retrieve the data to be indexed.|Yes| |sqls|List of SQL queries where each SQL query would retrieve the data to be indexed.|Yes|
The following is an example of an SQL input source spec: The following is an example of an SQL input source spec:
@ -887,7 +891,7 @@ Compared to the other native batch input sources, SQL input source behaves diffe
The Combining input source lets you read data from multiple input sources. The Combining input source lets you read data from multiple input sources.
It identifies the splits from delegate input sources and uses a worker task to process each split. It identifies the splits from delegate input sources and uses a worker task to process each split.
Use the Combining input source only if all the delegates are splittable and can be used by the [Parallel task](./native-batch.md). Each delegate input source must be splittable and compatible with the [parallel task type](./native-batch.md).
Similar to other input sources, the Combining input source supports a single `inputFormat`. Similar to other input sources, the Combining input source supports a single `inputFormat`.
Delegate input sources that require an `inputFormat` must have the same format for input data. Delegate input sources that require an `inputFormat` must have the same format for input data.
@ -931,10 +935,8 @@ The following is an example of a Combining input source spec:
## Iceberg input source ## Iceberg input source
:::info :::info Required extension
To use the Iceberg input source, load the extension [`druid-iceberg-extensions`](../development/extensions-contrib/iceberg.md) in your `common.runtime.properties` file.
To use the Iceberg input source, load the extension [`druid-iceberg-extensions`](../development/extensions-contrib/iceberg.md).
::: :::
You use the Iceberg input source to read data stored in the Iceberg table format. For a given table, the input source scans up to the latest Iceberg snapshot from the configured Hive catalog. Druid ingests the underlying live data files using the existing input source formats. You use the Iceberg input source to read data stored in the Iceberg table format. For a given table, the input source scans up to the latest Iceberg snapshot from the configured Hive catalog. Druid ingests the underlying live data files using the existing input source formats.
@ -1138,20 +1140,19 @@ This input source provides the following filters: `and`, `equals`, `interval`, a
## Delta Lake input source ## Delta Lake input source
:::info :::info Required extension
To use the Delta Lake input source, load the extension [`druid-deltalake-extensions`](../development/extensions-contrib/delta-lake.md) in your `common.runtime.properties` file.
To use the Delta Lake input source, load the extension [`druid-deltalake-extensions`](../development/extensions-contrib/delta-lake.md).
::: :::
You can use the Delta input source to read data stored in a Delta Lake table. For a given table, the input source scans You can use the Delta input source to read data stored in a Delta Lake table. For a given table, the input source scans
the latest snapshot from the configured table. Druid ingests the underlying delta files from the table. the latest snapshot from the configured table. Druid ingests the underlying delta files from the table.
| Property|Description|Required| | Property|Description| Default|Required |
|---------|-----------|--------| |---------|-----------|-----------------|
| type|Set this value to `delta`.|yes| |type|Set this value to `delta`.| None|yes|
| tablePath|The location of the Delta table.|yes| |tablePath|The location of the Delta table.|None|yes|
| filter|The JSON Object that filters data files within a snapshot.|no| |filter|The JSON Object that filters data files within a snapshot.|None|no|
|snapshotVersion|The snapshot version to read from the Delta table. An integer value must be specified.|Latest|no|
### Delta filter object ### Delta filter object
@ -1224,7 +1225,7 @@ filters on partitioned columns.
| column | The table column to apply the filter on. | yes | | column | The table column to apply the filter on. | yes |
| value | The value to use in the filter. | yes | | value | The value to use in the filter. | yes |
The following is a sample spec to read all records from the Delta table `/delta-table/foo`: The following is a sample spec to read all records from the latest snapshot from Delta table `/delta-table/foo`:
```json ```json
... ...
@ -1237,7 +1238,8 @@ The following is a sample spec to read all records from the Delta table `/delta-
} }
``` ```
The following is a sample spec to read records from the Delta table `/delta-table/foo` to select records where `name = 'Employee4' and age >= 30`: The following is a sample spec to read records from the Delta table `/delta-table/foo` snapshot version `3` to select records where
`name = 'Employee4' and age >= 30`:
```json ```json
... ...
@ -1260,7 +1262,8 @@ The following is a sample spec to read records from the Delta table `/delta-tabl
"value": "30" "value": "30"
} }
] ]
} },
"snapshotVersion": 3
}, },
} }
``` ```

View File

@ -124,7 +124,7 @@ For configuration properties shared across all streaming ingestion methods, refe
|`topicPattern`|String|Multiple Kafka topics to read from, passed as a regex pattern. See [Ingest from multiple topics](#ingest-from-multiple-topics) for more information.|Yes if `topic` isn't set.|| |`topicPattern`|String|Multiple Kafka topics to read from, passed as a regex pattern. See [Ingest from multiple topics](#ingest-from-multiple-topics) for more information.|Yes if `topic` isn't set.||
|`consumerProperties`|String, Object|A map of properties to pass to the Kafka consumer. See [Consumer properties](#consumer-properties) for details.|Yes. At the minimum, you must set the `bootstrap.servers` property to establish the initial connection to the Kafka cluster.|| |`consumerProperties`|String, Object|A map of properties to pass to the Kafka consumer. See [Consumer properties](#consumer-properties) for details.|Yes. At the minimum, you must set the `bootstrap.servers` property to establish the initial connection to the Kafka cluster.||
|`pollTimeout`|Long|The length of time to wait for the Kafka consumer to poll records, in milliseconds.|No|100| |`pollTimeout`|Long|The length of time to wait for the Kafka consumer to poll records, in milliseconds.|No|100|
|`useEarliestOffset`|Boolean|If a supervisor manages a datasource for the first time, it obtains a set of starting offsets from Kafka. This flag determines whether it retrieves the earliest or latest offsets in Kafka. Under normal circumstances, subsequent tasks start from where the previous segments ended. Druid only uses `useEarliestOffset` on the first run.|No|`false`| |`useEarliestOffset`|Boolean|If a supervisor is managing a datasource for the first time, it obtains a set of starting offsets from Kafka. This flag determines whether the supervisor retrieves the earliest or latest offsets in Kafka. Under normal circumstances, subsequent tasks start from where the previous segments ended so this flag is only used on the first run.|No|`false`|
|`idleConfig`|Object|Defines how and when the Kafka supervisor can become idle. See [Idle configuration](#idle-configuration) for more details.|No|null| |`idleConfig`|Object|Defines how and when the Kafka supervisor can become idle. See [Idle configuration](#idle-configuration) for more details.|No|null|
#### Ingest from multiple topics #### Ingest from multiple topics

View File

@ -128,7 +128,7 @@ For configuration properties shared across all streaming ingestion methods, refe
|--------|----|-----------|--------|-------| |--------|----|-----------|--------|-------|
|`stream`|String|The Kinesis stream to read.|Yes|| |`stream`|String|The Kinesis stream to read.|Yes||
|`endpoint`|String|The AWS Kinesis stream endpoint for a region. You can find a list of endpoints in the [AWS service endpoints](http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region) document.|No|`kinesis.us-east-1.amazonaws.com`| |`endpoint`|String|The AWS Kinesis stream endpoint for a region. You can find a list of endpoints in the [AWS service endpoints](http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region) document.|No|`kinesis.us-east-1.amazonaws.com`|
|`useEarliestSequenceNumber`|Boolean|If a supervisor is managing a datasource for the first time, it obtains a set of starting sequence numbers from Kinesis. This flag determines whether a supervisor retrieves the earliest or latest sequence numbers in Kinesis. Under normal circumstances, subsequent tasks start from where the previous segments ended so this flag is only used on the first run.|No|`false`| |`useEarliestSequenceNumber`|Boolean|If a supervisor is managing a datasource for the first time, it obtains a set of starting sequence numbers from Kinesis. This flag determines whether the supervisor retrieves the earliest or latest sequence numbers in Kinesis. Under normal circumstances, subsequent tasks start from where the previous segments ended so this flag is only used on the first run.|No|`false`|
|`fetchDelayMillis`|Integer|Time in milliseconds to wait between subsequent calls to fetch records from Kinesis. See [Determine fetch settings](#determine-fetch-settings).|No|0| |`fetchDelayMillis`|Integer|Time in milliseconds to wait between subsequent calls to fetch records from Kinesis. See [Determine fetch settings](#determine-fetch-settings).|No|0|
|`awsAssumedRoleArn`|String|The AWS assumed role to use for additional permissions.|No|| |`awsAssumedRoleArn`|String|The AWS assumed role to use for additional permissions.|No||
|`awsExternalId`|String|The AWS external ID to use for additional permissions.|No|| |`awsExternalId`|String|The AWS external ID to use for additional permissions.|No||
@ -155,7 +155,7 @@ For configuration properties shared across all streaming ingestion methods, refe
|Property|Type|Description|Required|Default| |Property|Type|Description|Required|Default|
|--------|----|-----------|--------|-------| |--------|----|-----------|--------|-------|
|`skipSequenceNumberAvailabilityCheck`|Boolean|Whether to enable checking if the current sequence number is still available in a particular Kinesis shard. If `false`, the indexing task attempts to reset the current sequence number, depending on the value of `resetOffsetAutomatically`.|No|`false`| |`skipSequenceNumberAvailabilityCheck`|Boolean|Whether to enable checking if the current sequence number is still available in a particular Kinesis shard. If `false`, the indexing task attempts to reset the current sequence number, depending on the value of `resetOffsetAutomatically`. For more information on the `resetOffsetAutomatically` property, see [Supervisor tuning configuration](supervisor.md#tuning-configuration).|No|`false`|
|`recordBufferSizeBytes`|Integer| The size of the buffer (heap memory bytes) Druid uses between the Kinesis fetch threads and the main ingestion thread.|No| See [Determine fetch settings](#determine-fetch-settings) for defaults.| |`recordBufferSizeBytes`|Integer| The size of the buffer (heap memory bytes) Druid uses between the Kinesis fetch threads and the main ingestion thread.|No| See [Determine fetch settings](#determine-fetch-settings) for defaults.|
|`recordBufferOfferTimeout`|Integer|The number of milliseconds to wait for space to become available in the buffer before timing out.|No|5000| |`recordBufferOfferTimeout`|Integer|The number of milliseconds to wait for space to become available in the buffer before timing out.|No|5000|
|`recordBufferFullWait`|Integer|The number of milliseconds to wait for the buffer to drain before Druid attempts to fetch records from Kinesis again.|No|5000| |`recordBufferFullWait`|Integer|The number of milliseconds to wait for the buffer to drain before Druid attempts to fetch records from Kinesis again.|No|5000|
@ -315,7 +315,7 @@ This window with early task shutdowns and possible task failures concludes when:
- All closed shards have been fully read and the Kinesis ingestion tasks have published the data from those shards, committing the "closed" state to metadata storage. - All closed shards have been fully read and the Kinesis ingestion tasks have published the data from those shards, committing the "closed" state to metadata storage.
- Any remaining tasks that had inactive shards in the assignment have been shut down. These tasks would have been created before the closed shards were completely drained. - Any remaining tasks that had inactive shards in the assignment have been shut down. These tasks would have been created before the closed shards were completely drained.
Note that when the supervisor is running and detects new partitions, tasks read new partitions from the earliest offsets, irrespective of the `useEarliestSequence` setting. This is because these new shards were immediately discovered and are therefore unlikely to experience a lag. Note that when the supervisor is running and detects new partitions, tasks read new partitions from the earliest sequence number, irrespective of the `useEarliestSequence` setting. This is because these new shards were immediately discovered and are therefore unlikely to experience a lag.
If resharding occurs when the supervisor is suspended and `useEarliestSequence` is set to `false`, resuming the supervisor causes tasks to read the new shards from the latest sequence. This is by design so that the consumer can catch up quickly with any lag accumulated while the supervisor was suspended. If resharding occurs when the supervisor is suspended and `useEarliestSequence` is set to `false`, resuming the supervisor causes tasks to read the new shards from the latest sequence. This is by design so that the consumer can catch up quickly with any lag accumulated while the supervisor was suspended.
@ -324,7 +324,7 @@ If resharding occurs when the supervisor is suspended and `useEarliestSequence`
Before you deploy the `druid-kinesis-indexing-service` extension to production, consider the following known issues: Before you deploy the `druid-kinesis-indexing-service` extension to production, consider the following known issues:
- Kinesis imposes a read throughput limit per shard. If you have multiple supervisors reading from the same Kinesis stream, consider adding more shards to ensure sufficient read throughput for all supervisors. - Kinesis imposes a read throughput limit per shard. If you have multiple supervisors reading from the same Kinesis stream, consider adding more shards to ensure sufficient read throughput for all supervisors.
- A Kinesis supervisor can sometimes compare the checkpoint offset to retention window of the stream to see if it has fallen behind. These checks fetch the earliest sequence number for Kinesis which can result in `IteratorAgeMilliseconds` becoming very high in AWS CloudWatch. - A Kinesis supervisor can sometimes compare the checkpoint sequence number to the retention window of the stream to see if it has fallen behind. These checks fetch the earliest sequence number for Kinesis which can result in `IteratorAgeMilliseconds` becoming very high in AWS CloudWatch.
## Learn more ## Learn more

View File

@ -204,7 +204,7 @@ For configuration properties specific to Kafka and Kinesis, see [Kafka tuning co
|`indexSpecForIntermediatePersists`|Object|Defines segment storage format options to use at indexing time for intermediate persisted temporary segments. You can use `indexSpecForIntermediatePersists` to disable dimension/metric compression on intermediate segments to reduce memory required for final merging. However, disabling compression on intermediate segments might increase page cache use while they are used before getting merged into final segment published.|No|| |`indexSpecForIntermediatePersists`|Object|Defines segment storage format options to use at indexing time for intermediate persisted temporary segments. You can use `indexSpecForIntermediatePersists` to disable dimension/metric compression on intermediate segments to reduce memory required for final merging. However, disabling compression on intermediate segments might increase page cache use while they are used before getting merged into final segment published.|No||
|`reportParseExceptions`|Boolean|DEPRECATED. If `true`, Druid throws exceptions encountered during parsing causing ingestion to halt. If `false`, Druid skips unparseable rows and fields. Setting `reportParseExceptions` to `true` overrides existing configurations for `maxParseExceptions` and `maxSavedParseExceptions`, setting `maxParseExceptions` to 0 and limiting `maxSavedParseExceptions` to not more than 1.|No|`false`| |`reportParseExceptions`|Boolean|DEPRECATED. If `true`, Druid throws exceptions encountered during parsing causing ingestion to halt. If `false`, Druid skips unparseable rows and fields. Setting `reportParseExceptions` to `true` overrides existing configurations for `maxParseExceptions` and `maxSavedParseExceptions`, setting `maxParseExceptions` to 0 and limiting `maxSavedParseExceptions` to not more than 1.|No|`false`|
|`handoffConditionTimeout`|Long|Number of milliseconds to wait for segment handoff. Set to a value >= 0, where 0 means to wait indefinitely.|No|900000 (15 minutes) for Kafka. 0 for Kinesis.| |`handoffConditionTimeout`|Long|Number of milliseconds to wait for segment handoff. Set to a value >= 0, where 0 means to wait indefinitely.|No|900000 (15 minutes) for Kafka. 0 for Kinesis.|
|`resetOffsetAutomatically`|Boolean|Resets partitions when the sequence number is unavailable. If set to `true`, Druid resets partitions to the earliest or latest offset, based on the value of `useEarliestSequenceNumber` or `useEarliestOffset` (earliest if `true`, latest if `false`). If set to `false`, Druid surfaces the exception causing tasks to fail and ingestion to halt. If this occurs, manual intervention is required to correct the situation, potentially through [resetting the supervisor](../api-reference/supervisor-api.md#reset-a-supervisor).|No|`false`| |`resetOffsetAutomatically`|Boolean|Resets partitions when the offset is unavailable. If set to `true`, Druid resets partitions to the earliest or latest offset, based on the value of `useEarliestOffset` or `useEarliestSequenceNumber` (earliest if `true`, latest if `false`). If set to `false`, Druid surfaces the exception causing tasks to fail and ingestion to halt. If this occurs, manual intervention is required to correct the situation, potentially through [resetting the supervisor](../api-reference/supervisor-api.md#reset-a-supervisor).|No|`false`|
|`workerThreads`|Integer|The number of threads that the supervisor uses to handle requests/responses for worker tasks, along with any other internal asynchronous operation.|No|`min(10, taskCount)`| |`workerThreads`|Integer|The number of threads that the supervisor uses to handle requests/responses for worker tasks, along with any other internal asynchronous operation.|No|`min(10, taskCount)`|
|`chatRetries`|Integer|The number of times Druid retries HTTP requests to indexing tasks before considering tasks unresponsive.|No|8| |`chatRetries`|Integer|The number of times Druid retries HTTP requests to indexing tasks before considering tasks unresponsive.|No|8|
|`httpTimeout`|ISO 8601 period|The period of time to wait for a HTTP response from an indexing task.|No|`PT10S`| |`httpTimeout`|ISO 8601 period|The period of time to wait for a HTTP response from an indexing task.|No|`PT10S`|
@ -214,6 +214,7 @@ For configuration properties specific to Kafka and Kinesis, see [Kafka tuning co
|`logParseExceptions`|Boolean|If `true`, Druid logs an error message when a parsing exception occurs, containing information about the row where the error occurred.|No|`false`| |`logParseExceptions`|Boolean|If `true`, Druid logs an error message when a parsing exception occurs, containing information about the row where the error occurred.|No|`false`|
|`maxParseExceptions`|Integer|The maximum number of parse exceptions that can occur before the task halts ingestion and fails. Setting `reportParseExceptions` overrides this limit.|No|unlimited| |`maxParseExceptions`|Integer|The maximum number of parse exceptions that can occur before the task halts ingestion and fails. Setting `reportParseExceptions` overrides this limit.|No|unlimited|
|`maxSavedParseExceptions`|Integer|When a parse exception occurs, Druid keeps track of the most recent parse exceptions. `maxSavedParseExceptions` limits the number of saved exception instances. These saved exceptions are available after the task finishes in the [task completion report](../ingestion/tasks.md#task-reports). Setting `reportParseExceptions` overrides this limit.|No|0| |`maxSavedParseExceptions`|Integer|When a parse exception occurs, Druid keeps track of the most recent parse exceptions. `maxSavedParseExceptions` limits the number of saved exception instances. These saved exceptions are available after the task finishes in the [task completion report](../ingestion/tasks.md#task-reports). Setting `reportParseExceptions` overrides this limit.|No|0|
|`maxColumnsToMerge`|Integer|Limit of the number of segments to merge in a single phase when merging segments for publishing. This limit affects the total number of columns present in a set of segments to merge. If the limit is exceeded, segment merging occurs in multiple phases. Druid merges at least 2 segments per phase, regardless of this setting.|No|-1|
## Start a supervisor ## Start a supervisor
@ -395,7 +396,7 @@ For information on how to terminate a supervisor by API, see [Supervisors: Termi
Indexing tasks run on Middle Managers and are limited by the resources available in the Middle Manager cluster. In particular, you should make sure that you have sufficient worker capacity, configured using the Indexing tasks run on Middle Managers and are limited by the resources available in the Middle Manager cluster. In particular, you should make sure that you have sufficient worker capacity, configured using the
`druid.worker.capacity` property, to handle the configuration in the supervisor spec. Note that worker capacity is `druid.worker.capacity` property, to handle the configuration in the supervisor spec. Note that worker capacity is
shared across all types of indexing tasks, so you should plan your worker capacity to handle your total indexing load, such as batch processing, streaming tasks, and merging tasks. If your workers run out of capacity, indexing tasks queue and wait for the next available worker. This may cause queries to return partial results but will not result in data loss, assuming the tasks run before the stream purges those sequence numbers. shared across all types of indexing tasks, so you should plan your worker capacity to handle your total indexing load, such as batch processing, streaming tasks, and merging tasks. If your workers run out of capacity, indexing tasks queue and wait for the next available worker. This may cause queries to return partial results but will not result in data loss, assuming the tasks run before the stream purges those offsets.
A running task can be in one of two states: reading or publishing. A task remains in reading state for the period defined in `taskDuration`, at which point it transitions to publishing state. A task remains in publishing state for as long as it takes to generate segments, push segments to deep storage, and have them loaded and served by a Historical service or until `completionTimeout` elapses. A running task can be in one of two states: reading or publishing. A task remains in reading state for the period defined in `taskDuration`, at which point it transitions to publishing state. A task remains in publishing state for as long as it takes to generate segments, push segments to deep storage, and have them loaded and served by a Historical service or until `completionTimeout` elapses.

View File

@ -36,8 +36,8 @@ Task APIs are available in two main places:
- The [Overlord](../design/overlord.md) process offers HTTP APIs to submit tasks, cancel tasks, check their status, - The [Overlord](../design/overlord.md) process offers HTTP APIs to submit tasks, cancel tasks, check their status,
review logs and reports, and more. Refer to the [Tasks API reference](../api-reference/tasks-api.md) for a review logs and reports, and more. Refer to the [Tasks API reference](../api-reference/tasks-api.md) for a
full list. full list.
- Druid SQL includes a [`sys.tasks`](../querying/sql-metadata-tables.md#tasks-table) table that provides information about currently - Druid SQL includes a [`sys.tasks`](../querying/sql-metadata-tables.md#tasks-table) table that provides information about active
running tasks. This table is read-only, and has a limited (but useful!) subset of the full information available through and recently completed tasks. This table is read-only and has a subset of the full task report available through
the Overlord APIs. the Overlord APIs.
<a name="reports"></a> <a name="reports"></a>

View File

@ -508,6 +508,8 @@ These metrics are only available if the `OshiSysMonitor` module is included.
|`sys/tcpv4/out/rsts`|Total "out reset" packets sent to reset the connection||Generally 0| |`sys/tcpv4/out/rsts`|Total "out reset" packets sent to reset the connection||Generally 0|
|`sys/tcpv4/retrans/segs`|Total segments re-transmitted||Varies| |`sys/tcpv4/retrans/segs`|Total segments re-transmitted||Varies|
If you want to enable only some of these metrics categories you could specify `druid.monitoring.sys.categories`.
Possible values are `mem`, `swap`, `fs`, `disk`, `net`, `cpu`, `sys`, and `tcp`.
## S3 multi-part upload ## S3 multi-part upload

View File

@ -377,7 +377,7 @@ The JDBC lookups will poll a database to populate its local cache. If the `tsCol
:::info :::info
If using JDBC, you will need to add your database's client JAR files to the extension's directory. If using JDBC, you will need to add your database's client JAR files to the extension's directory.
For Postgres, the connector JAR is already included. For Postgres, the connector JAR is already included.
See the MySQL extension documentation for instructions to obtain [MySQL](../development/extensions-core/mysql.md#installing-the-mysql-connector-library) or [MariaDB](../development/extensions-core/mysql.md#alternative-installing-the-mariadb-connector-library) connector libraries. See the MySQL extension documentation for instructions to obtain [MySQL](../development/extensions-core/mysql.md#install-mysql-connectorj) or [MariaDB](../development/extensions-core/mysql.md#install-mariadb-connectorj) connector libraries.
The connector JAR should reside in the classpath of Druid's main class loader. The connector JAR should reside in the classpath of Druid's main class loader.
To add the connector JAR to the classpath, you can copy the downloaded file to `lib/` under the distribution root directory. Alternatively, create a symbolic link to the connector in the `lib` directory. To add the connector JAR to the classpath, you can copy the downloaded file to `lib/` under the distribution root directory. Alternatively, create a symbolic link to the connector in the `lib` directory.
::: :::

View File

@ -246,6 +246,7 @@ JSON functions provide facilities to extract, transform, and create `COMPLEX<jso
| to_json_string(expr) | Convert `expr` into a JSON `STRING` value | | to_json_string(expr) | Convert `expr` into a JSON `STRING` value |
| json_keys(expr, path) | Get array of field names from `expr` at the specified JSONPath `path`, or null if the data does not exist or have any fields | | json_keys(expr, path) | Get array of field names from `expr` at the specified JSONPath `path`, or null if the data does not exist or have any fields |
| json_paths(expr) | Get array of all JSONPath paths available from `expr` | | json_paths(expr) | Get array of all JSONPath paths available from `expr` |
| json_merge(expr1, expr2[, expr3 ...]) | Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the rightmost value when there are key overlaps. |
### JSONPath syntax ### JSONPath syntax

File diff suppressed because it is too large Load Diff

View File

@ -38,6 +38,7 @@ You can use the following JSON functions to extract, transform, and create `COMP
| --- | --- | | --- | --- |
|`JSON_KEYS(expr, path)`| Returns an array of field names from `expr` at the specified `path`.| |`JSON_KEYS(expr, path)`| Returns an array of field names from `expr` at the specified `path`.|
|`JSON_OBJECT(KEY expr1 VALUE expr2[, KEY expr3 VALUE expr4, ...])` | Constructs a new `COMPLEX<json>` object. The `KEY` expressions must evaluate to string types. The `VALUE` expressions can be composed of any input type, including other `COMPLEX<json>` values. `JSON_OBJECT` can accept colon-separated key-value pairs. The following syntax is equivalent: `JSON_OBJECT(expr1:expr2[, expr3:expr4, ...])`.| |`JSON_OBJECT(KEY expr1 VALUE expr2[, KEY expr3 VALUE expr4, ...])` | Constructs a new `COMPLEX<json>` object. The `KEY` expressions must evaluate to string types. The `VALUE` expressions can be composed of any input type, including other `COMPLEX<json>` values. `JSON_OBJECT` can accept colon-separated key-value pairs. The following syntax is equivalent: `JSON_OBJECT(expr1:expr2[, expr3:expr4, ...])`.|
|`JSON_MERGE(expr1, expr2[, expr3 ...])`| Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the rightmost value when there are key overlaps. Returning always a `COMPLEX<json>` type.|
|`JSON_PATHS(expr)`| Returns an array of all paths which refer to literal values in `expr` in JSONPath format. | |`JSON_PATHS(expr)`| Returns an array of all paths which refer to literal values in `expr` in JSONPath format. |
|`JSON_QUERY(expr, path)`| Extracts a `COMPLEX<json>` value from `expr`, at the specified `path`. | |`JSON_QUERY(expr, path)`| Extracts a `COMPLEX<json>` value from `expr`, at the specified `path`. |
|`JSON_QUERY_ARRAY(expr, path)`| Extracts an `ARRAY<COMPLEX<json>>` value from `expr` at the specified `path`. If value is not an `ARRAY`, it gets translated into a single element `ARRAY` containing the value at `path`. The primary use of this function is to extract arrays of objects to use as inputs to other [array functions](./sql-array-functions.md).| |`JSON_QUERY_ARRAY(expr, path)`| Extracts an `ARRAY<COMPLEX<json>>` value from `expr` at the specified `path`. If value is not an `ARRAY`, it gets translated into a single element `ARRAY` containing the value at `path`. The primary use of this function is to extract arrays of objects to use as inputs to other [array functions](./sql-array-functions.md).|

View File

@ -266,7 +266,7 @@ GROUP BY servers.server;
### TASKS table ### TASKS table
The tasks table provides information about active and recently-completed indexing tasks. For more information The tasks table provides information about active and recently completed tasks. For more information
check out the documentation for [ingestion tasks](../ingestion/tasks.md). check out the documentation for [ingestion tasks](../ingestion/tasks.md).
|Column|Type|Notes| |Column|Type|Notes|

View File

@ -173,10 +173,10 @@ overhead.
|`MILLIS_TO_TIMESTAMP(millis_expr)`|Converts a number of milliseconds since the epoch (1970-01-01 00:00:00 UTC) into a timestamp.| |`MILLIS_TO_TIMESTAMP(millis_expr)`|Converts a number of milliseconds since the epoch (1970-01-01 00:00:00 UTC) into a timestamp.|
|`TIMESTAMP_TO_MILLIS(timestamp_expr)`|Converts a timestamp into a number of milliseconds since the epoch.| |`TIMESTAMP_TO_MILLIS(timestamp_expr)`|Converts a timestamp into a number of milliseconds since the epoch.|
|`EXTRACT(unit FROM timestamp_expr)`|Extracts a time part from `expr`, returning it as a number. Unit can be EPOCH, MICROSECOND, MILLISECOND, SECOND, MINUTE, HOUR, DAY (day of month), DOW (day of week), ISODOW (ISO day of week), DOY (day of year), WEEK (week of year), MONTH, QUARTER, YEAR, ISOYEAR, DECADE, CENTURY or MILLENNIUM. Units must be provided unquoted, like `EXTRACT(HOUR FROM __time)`.| |`EXTRACT(unit FROM timestamp_expr)`|Extracts a time part from `expr`, returning it as a number. Unit can be EPOCH, MICROSECOND, MILLISECOND, SECOND, MINUTE, HOUR, DAY (day of month), DOW (day of week), ISODOW (ISO day of week), DOY (day of year), WEEK (week of year), MONTH, QUARTER, YEAR, ISOYEAR, DECADE, CENTURY or MILLENNIUM. Units must be provided unquoted, like `EXTRACT(HOUR FROM __time)`.|
|`FLOOR(timestamp_expr TO unit)`|Rounds down a timestamp, returning it as a new timestamp. Unit can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.| |`FLOOR(timestamp_expr TO unit)`|Rounds down a timestamp, returning it as a new timestamp. The `unit` parameter must be unquoted and can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|`CEIL(timestamp_expr TO unit)`|Rounds up a timestamp, returning it as a new timestamp. Unit can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.| |`CEIL(timestamp_expr TO unit)`|Rounds up a timestamp, returning it as a new timestamp. The `unit` parameter must be unquoted and can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|`TIMESTAMPADD(unit, count, timestamp)`|Equivalent to `timestamp + count * INTERVAL '1' UNIT`.| |`TIMESTAMPADD(unit, count, timestamp)`|Adds a `count` number of time `unit` to timestamp, equivalent to `timestamp + count * unit`. The `unit` parameter must be unquoted and can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|`TIMESTAMPDIFF(unit, timestamp1, timestamp2)`|Returns the (signed) number of `unit` between `timestamp1` and `timestamp2`. Unit can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.| |`TIMESTAMPDIFF(unit, timestamp1, timestamp2)`|Returns a signed number of `unit` between `timestamp1` and `timestamp2`. The `unit` parameter must be unquoted and can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
## Reduction functions ## Reduction functions

View File

@ -1,6 +1,5 @@
-server -server
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms15g -Xms15g
-Xmx15g -Xmx15g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -1,7 +1,6 @@
-server -server
-Xms1g -Xms1g
-Xmx1g -Xmx1g
-XX:+UseG1GC
-XX:MaxDirectMemorySize=128m -XX:MaxDirectMemorySize=128m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-Duser.timezone=UTC -Duser.timezone=UTC

View File

@ -3,7 +3,6 @@
-Xmx12g -Xmx12g
-XX:MaxDirectMemorySize=11g -XX:MaxDirectMemorySize=11g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms15g -Xms15g
-Xmx15g -Xmx15g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -3,7 +3,6 @@
-Xmx16g -Xmx16g
-XX:MaxDirectMemorySize=25g -XX:MaxDirectMemorySize=25g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms256m -Xms256m
-Xmx256m -Xmx256m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -1,7 +1,6 @@
-server -server
-Xms1g -Xms1g
-Xmx1g -Xmx1g
-XX:+UseG1GC
-XX:MaxDirectMemorySize=128m -XX:MaxDirectMemorySize=128m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-Duser.timezone=UTC -Duser.timezone=UTC

View File

@ -3,7 +3,6 @@
-Xmx8g -Xmx8g
-XX:MaxDirectMemorySize=5g -XX:MaxDirectMemorySize=5g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms9g -Xms9g
-Xmx9g -Xmx9g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -3,7 +3,6 @@
-Xmx8g -Xmx8g
-XX:MaxDirectMemorySize=13g -XX:MaxDirectMemorySize=13g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms256m -Xms256m
-Xmx256m -Xmx256m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -1,7 +1,6 @@
-server -server
-Xms512m -Xms512m
-Xmx512m -Xmx512m
-XX:+UseG1GC
-XX:MaxDirectMemorySize=128m -XX:MaxDirectMemorySize=128m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-Duser.timezone=UTC -Duser.timezone=UTC

View File

@ -3,7 +3,6 @@
-Xmx512m -Xmx512m
-XX:MaxDirectMemorySize=768m -XX:MaxDirectMemorySize=768m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms256m -Xms256m
-Xmx256m -Xmx256m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -3,7 +3,6 @@
-Xmx512m -Xmx512m
-XX:MaxDirectMemorySize=1280m -XX:MaxDirectMemorySize=1280m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms64m -Xms64m
-Xmx64m -Xmx64m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -1,7 +1,6 @@
-server -server
-Xms128m -Xms128m
-Xmx128m -Xmx128m
-XX:+UseG1GC
-XX:MaxDirectMemorySize=128m -XX:MaxDirectMemorySize=128m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-Duser.timezone=UTC -Duser.timezone=UTC

View File

@ -3,7 +3,6 @@
-Xmx512m -Xmx512m
-XX:MaxDirectMemorySize=400m -XX:MaxDirectMemorySize=400m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms256m -Xms256m
-Xmx256m -Xmx256m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -3,7 +3,6 @@
-Xmx512m -Xmx512m
-XX:MaxDirectMemorySize=400m -XX:MaxDirectMemorySize=400m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms64m -Xms64m
-Xmx64m -Xmx64m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -1,7 +1,6 @@
-server -server
-Xms128m -Xms128m
-Xmx128m -Xmx128m
-XX:+UseG1GC
-XX:MaxDirectMemorySize=128m -XX:MaxDirectMemorySize=128m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-Duser.timezone=UTC -Duser.timezone=UTC

View File

@ -3,7 +3,6 @@
-Xmx4g -Xmx4g
-XX:MaxDirectMemorySize=3g -XX:MaxDirectMemorySize=3g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms4500m -Xms4500m
-Xmx4500m -Xmx4500m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -3,7 +3,6 @@
-Xmx4g -Xmx4g
-XX:MaxDirectMemorySize=8g -XX:MaxDirectMemorySize=8g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms128m -Xms128m
-Xmx128m -Xmx128m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -1,10 +1,8 @@
-server -server
-Xms512m -Xms512m
-Xmx512m -Xmx512m
-XX:+UseG1GC
-XX:MaxDirectMemorySize=128m -XX:MaxDirectMemorySize=128m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -3,7 +3,6 @@
-Xmx16g -Xmx16g
-XX:MaxDirectMemorySize=12g -XX:MaxDirectMemorySize=12g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms18g -Xms18g
-Xmx18g -Xmx18g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -3,7 +3,6 @@
-Xmx24g -Xmx24g
-XX:MaxDirectMemorySize=44g -XX:MaxDirectMemorySize=44g
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -2,7 +2,6 @@
-Xms256m -Xms256m
-Xmx256m -Xmx256m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-XX:+UseG1GC
-Duser.timezone=UTC -Duser.timezone=UTC
-Dfile.encoding=UTF-8 -Dfile.encoding=UTF-8
-Djava.io.tmpdir=var/tmp -Djava.io.tmpdir=var/tmp

View File

@ -1,7 +1,6 @@
-server -server
-Xms1g -Xms1g
-Xmx1g -Xmx1g
-XX:+UseG1GC
-XX:MaxDirectMemorySize=128m -XX:MaxDirectMemorySize=128m
-XX:+ExitOnOutOfMemoryError -XX:+ExitOnOutOfMemoryError
-Duser.timezone=UTC -Duser.timezone=UTC

View File

@ -28,7 +28,7 @@
<parent> <parent>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>

View File

@ -537,7 +537,7 @@ public class OssInputSourceTest extends InitializedNullHandlingTest
InputSourceReader reader = inputSource.reader( InputSourceReader reader = inputSource.reader(
someSchema, someSchema,
new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0, null),
temporaryFolder.newFolder() temporaryFolder.newFolder()
); );
@ -584,7 +584,7 @@ public class OssInputSourceTest extends InitializedNullHandlingTest
InputSourceReader reader = inputSource.reader( InputSourceReader reader = inputSource.reader(
someSchema, someSchema,
new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0), new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0, null),
temporaryFolder.newFolder() temporaryFolder.newFolder()
); );

View File

@ -24,7 +24,7 @@
<parent> <parent>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>

View File

@ -29,7 +29,7 @@
<parent> <parent>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>

View File

@ -29,7 +29,7 @@
<parent> <parent>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>

View File

@ -25,7 +25,7 @@
<parent> <parent>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>

View File

@ -22,7 +22,7 @@
<parent> <parent>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -29,7 +29,7 @@
<parent> <parent>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>

View File

@ -35,8 +35,8 @@ import org.apache.druid.query.timeseries.TimeseriesResultValue;
import org.apache.druid.segment.IncrementalIndexTimeBoundaryInspector; import org.apache.druid.segment.IncrementalIndexTimeBoundaryInspector;
import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexCursorFactory;
import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
import org.apache.druid.segment.incremental.OnheapIncrementalIndex; import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.testing.InitializedNullHandlingTest;
import org.joda.time.DateTime; import org.joda.time.DateTime;
@ -104,7 +104,7 @@ public class DistinctCountTimeseriesQueryTest extends InitializedNullHandlingTes
final Iterable<Result<TimeseriesResultValue>> results = final Iterable<Result<TimeseriesResultValue>> results =
engine.process( engine.process(
query, query,
new IncrementalIndexStorageAdapter(index), new IncrementalIndexCursorFactory(index),
new IncrementalIndexTimeBoundaryInspector(index), new IncrementalIndexTimeBoundaryInspector(index),
new DefaultTimeseriesQueryMetrics() new DefaultTimeseriesQueryMetrics()
).toList(); ).toList();

View File

@ -33,13 +33,13 @@ import org.apache.druid.query.topn.TopNQuery;
import org.apache.druid.query.topn.TopNQueryBuilder; import org.apache.druid.query.topn.TopNQueryBuilder;
import org.apache.druid.query.topn.TopNQueryEngine; import org.apache.druid.query.topn.TopNQueryEngine;
import org.apache.druid.query.topn.TopNResultValue; import org.apache.druid.query.topn.TopNResultValue;
import org.apache.druid.segment.IncrementalIndexTimeBoundaryInspector; import org.apache.druid.segment.IncrementalIndexSegment;
import org.apache.druid.segment.TestHelper; import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.incremental.IncrementalIndex; import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
import org.apache.druid.segment.incremental.OnheapIncrementalIndex; import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.testing.InitializedNullHandlingTest;
import org.apache.druid.timeline.SegmentId;
import org.joda.time.DateTime; import org.joda.time.DateTime;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
@ -133,8 +133,7 @@ public class DistinctCountTopNQueryTest extends InitializedNullHandlingTest
final Iterable<Result<TopNResultValue>> results = final Iterable<Result<TopNResultValue>> results =
engine.query( engine.query(
query, query,
new IncrementalIndexStorageAdapter(index), new IncrementalIndexSegment(index, SegmentId.dummy(QueryRunnerTestHelper.DATA_SOURCE)),
new IncrementalIndexTimeBoundaryInspector(index),
null null
).toList(); ).toList();

View File

@ -24,7 +24,7 @@
<parent> <parent>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>

View File

@ -29,7 +29,7 @@
<parent> <parent>
<artifactId>druid</artifactId> <artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId> <groupId>org.apache.druid</groupId>
<version>31.0.0-SNAPSHOT</version> <version>32.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -67,9 +67,9 @@ import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
/** /**
* Input source to ingest data from a Delta Lake. This input source reads the latest snapshot from a Delta table * Input source to ingest data from a Delta Lake. This input source reads the given {@code snapshotVersion} from a Delta
* specified by {@code tablePath} parameter. If {@code filter} is specified, it's used at the Kernel level * table specified by {@code tablePath} parameter, or the latest snapshot if it's not specified.
* for data pruning. The filtering behavior is as follows: * If {@code filter} is specified, it's used at the Kernel level for data pruning. The filtering behavior is as follows:
* <ul> * <ul>
* <li> When a filter is applied on a partitioned table using the partitioning columns, the filtering is guaranteed. </li> * <li> When a filter is applied on a partitioned table using the partitioning columns, the filtering is guaranteed. </li>
* <li> When a filter is applied on non-partitioned columns, the filtering is best-effort as the Delta * <li> When a filter is applied on non-partitioned columns, the filtering is best-effort as the Delta
@ -79,7 +79,6 @@ import java.util.stream.Stream;
* <p> * <p>
* We leverage the Delta Kernel APIs to interact with a Delta table. The Kernel API abstracts away the * We leverage the Delta Kernel APIs to interact with a Delta table. The Kernel API abstracts away the
* complexities of the Delta protocol itself. * complexities of the Delta protocol itself.
* Note: currently, the Kernel table API only supports reading from the latest snapshot.
* </p> * </p>
*/ */
public class DeltaInputSource implements SplittableInputSource<DeltaSplit> public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
@ -97,11 +96,15 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
@Nullable @Nullable
private final DeltaFilter filter; private final DeltaFilter filter;
@JsonProperty
private final Long snapshotVersion;
@JsonCreator @JsonCreator
public DeltaInputSource( public DeltaInputSource(
@JsonProperty("tablePath") final String tablePath, @JsonProperty("tablePath") final String tablePath,
@JsonProperty("deltaSplit") @Nullable final DeltaSplit deltaSplit, @JsonProperty("deltaSplit") @Nullable final DeltaSplit deltaSplit,
@JsonProperty("filter") @Nullable final DeltaFilter filter @JsonProperty("filter") @Nullable final DeltaFilter filter,
@JsonProperty("snapshotVersion") @Nullable final Long snapshotVersion
) )
{ {
if (tablePath == null) { if (tablePath == null) {
@ -110,6 +113,7 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
this.tablePath = tablePath; this.tablePath = tablePath;
this.deltaSplit = deltaSplit; this.deltaSplit = deltaSplit;
this.filter = filter; this.filter = filter;
this.snapshotVersion = snapshotVersion;
} }
@Override @Override
@ -152,15 +156,15 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
} }
} else { } else {
final Table table = Table.forPath(engine, tablePath); final Table table = Table.forPath(engine, tablePath);
final Snapshot latestSnapshot = getLatestSnapshotForTable(table, engine); final Snapshot snapshot = getSnapshotForTable(table, engine);
final StructType fullSnapshotSchema = latestSnapshot.getSchema(engine); final StructType fullSnapshotSchema = snapshot.getSchema(engine);
final StructType prunedSchema = pruneSchema( final StructType prunedSchema = pruneSchema(
fullSnapshotSchema, fullSnapshotSchema,
inputRowSchema.getColumnsFilter() inputRowSchema.getColumnsFilter()
); );
final ScanBuilder scanBuilder = latestSnapshot.getScanBuilder(engine); final ScanBuilder scanBuilder = snapshot.getScanBuilder(engine);
if (filter != null) { if (filter != null) {
scanBuilder.withFilter(engine, filter.getFilterPredicate(fullSnapshotSchema)); scanBuilder.withFilter(engine, filter.getFilterPredicate(fullSnapshotSchema));
} }
@ -206,17 +210,17 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
} }
final Engine engine = createDeltaEngine(); final Engine engine = createDeltaEngine();
final Snapshot latestSnapshot; final Snapshot snapshot;
final Table table = Table.forPath(engine, tablePath); final Table table = Table.forPath(engine, tablePath);
try { try {
latestSnapshot = getLatestSnapshotForTable(table, engine); snapshot = getSnapshotForTable(table, engine);
} }
catch (TableNotFoundException e) { catch (TableNotFoundException e) {
throw InvalidInput.exception(e, "tablePath[%s] not found.", tablePath); throw InvalidInput.exception(e, "tablePath[%s] not found.", tablePath);
} }
final StructType fullSnapshotSchema = latestSnapshot.getSchema(engine); final StructType fullSnapshotSchema = snapshot.getSchema(engine);
final ScanBuilder scanBuilder = latestSnapshot.getScanBuilder(engine); final ScanBuilder scanBuilder = snapshot.getScanBuilder(engine);
if (filter != null) { if (filter != null) {
scanBuilder.withFilter(engine, filter.getFilterPredicate(fullSnapshotSchema)); scanBuilder.withFilter(engine, filter.getFilterPredicate(fullSnapshotSchema));
} }
@ -254,7 +258,8 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
return new DeltaInputSource( return new DeltaInputSource(
tablePath, tablePath,
split.get(), split.get(),
filter filter,
snapshotVersion
); );
} }
@ -333,7 +338,7 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
); );
} }
private Snapshot getLatestSnapshotForTable(final Table table, final Engine engine) private Snapshot getSnapshotForTable(final Table table, final Engine engine)
{ {
// Setting the LogStore class loader before calling the Delta Kernel snapshot API is required as a workaround with // Setting the LogStore class loader before calling the Delta Kernel snapshot API is required as a workaround with
// the 3.2.0 Delta Kernel because the Kernel library cannot instantiate the LogStore class otherwise. Please see // the 3.2.0 Delta Kernel because the Kernel library cannot instantiate the LogStore class otherwise. Please see
@ -341,7 +346,11 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
final ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader(); final ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader();
try { try {
Thread.currentThread().setContextClassLoader(LogStore.class.getClassLoader()); Thread.currentThread().setContextClassLoader(LogStore.class.getClassLoader());
return table.getLatestSnapshot(engine); if (snapshotVersion != null) {
return table.getSnapshotAsOfVersion(engine, snapshotVersion);
} else {
return table.getLatestSnapshot(engine);
}
} }
finally { finally {
Thread.currentThread().setContextClassLoader(currCtxCl); Thread.currentThread().setContextClassLoader(currCtxCl);
@ -359,4 +368,10 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
{ {
return filter; return filter;
} }
@VisibleForTesting
Long getSnapshotVersion()
{
return snapshotVersion;
}
} }

View File

@ -55,7 +55,8 @@ public class DeltaInputRowTest
Object[][] data = new Object[][]{ Object[][] data = new Object[][]{
{NonPartitionedDeltaTable.DELTA_TABLE_PATH, NonPartitionedDeltaTable.FULL_SCHEMA, NonPartitionedDeltaTable.DIMENSIONS, NonPartitionedDeltaTable.EXPECTED_ROWS}, {NonPartitionedDeltaTable.DELTA_TABLE_PATH, NonPartitionedDeltaTable.FULL_SCHEMA, NonPartitionedDeltaTable.DIMENSIONS, NonPartitionedDeltaTable.EXPECTED_ROWS},
{PartitionedDeltaTable.DELTA_TABLE_PATH, PartitionedDeltaTable.FULL_SCHEMA, PartitionedDeltaTable.DIMENSIONS, PartitionedDeltaTable.EXPECTED_ROWS}, {PartitionedDeltaTable.DELTA_TABLE_PATH, PartitionedDeltaTable.FULL_SCHEMA, PartitionedDeltaTable.DIMENSIONS, PartitionedDeltaTable.EXPECTED_ROWS},
{ComplexTypesDeltaTable.DELTA_TABLE_PATH, ComplexTypesDeltaTable.FULL_SCHEMA, ComplexTypesDeltaTable.DIMENSIONS, ComplexTypesDeltaTable.EXPECTED_ROWS} {ComplexTypesDeltaTable.DELTA_TABLE_PATH, ComplexTypesDeltaTable.FULL_SCHEMA, ComplexTypesDeltaTable.DIMENSIONS, ComplexTypesDeltaTable.EXPECTED_ROWS},
{SnapshotDeltaTable.DELTA_TABLE_PATH, SnapshotDeltaTable.FULL_SCHEMA, SnapshotDeltaTable.DIMENSIONS, SnapshotDeltaTable.LATEST_SNAPSHOT_EXPECTED_ROWS}
}; };
return Arrays.asList(data); return Arrays.asList(data);
} }
@ -124,7 +125,7 @@ public class DeltaInputRowTest
@ParameterizedTest(name = "{index}:with context {0}") @ParameterizedTest(name = "{index}:with context {0}")
public void testReadNonExistentTable() public void testReadNonExistentTable()
{ {
final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null); final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null, null);
MatcherAssert.assertThat( MatcherAssert.assertThat(
Assert.assertThrows( Assert.assertThrows(

View File

@ -139,4 +139,18 @@ public class DeltaInputSourceSerdeTest
exception.getCause().getMessage() exception.getCause().getMessage()
); );
} }
@Test
public void testDeltaInputSourceWithSnapshotVersion() throws JsonProcessingException
{
final String payload = "{\n"
+ " \"type\": \"delta\",\n"
+ " \"tablePath\": \"foo/bar\",\n"
+ " \"snapshotVersion\": 56\n"
+ " }";
final DeltaInputSource deltaInputSource = OBJECT_MAPPER.readValue(payload, DeltaInputSource.class);
Assert.assertEquals("foo/bar", deltaInputSource.getTablePath());
Assert.assertEquals((Long) 56L, deltaInputSource.getSnapshotVersion());
}
} }

View File

@ -19,6 +19,7 @@
package org.apache.druid.delta.input; package org.apache.druid.delta.input;
import io.delta.kernel.exceptions.KernelException;
import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowListPlusRawValues; import org.apache.druid.data.input.InputRowListPlusRawValues;
import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.InputRowSchema;
@ -68,27 +69,62 @@ public class DeltaInputSourceTest
{ {
NonPartitionedDeltaTable.DELTA_TABLE_PATH, NonPartitionedDeltaTable.DELTA_TABLE_PATH,
NonPartitionedDeltaTable.FULL_SCHEMA, NonPartitionedDeltaTable.FULL_SCHEMA,
null,
NonPartitionedDeltaTable.EXPECTED_ROWS NonPartitionedDeltaTable.EXPECTED_ROWS
}, },
{ {
NonPartitionedDeltaTable.DELTA_TABLE_PATH, NonPartitionedDeltaTable.DELTA_TABLE_PATH,
NonPartitionedDeltaTable.SCHEMA_1, NonPartitionedDeltaTable.SCHEMA_1,
null,
NonPartitionedDeltaTable.EXPECTED_ROWS NonPartitionedDeltaTable.EXPECTED_ROWS
}, },
{ {
NonPartitionedDeltaTable.DELTA_TABLE_PATH, NonPartitionedDeltaTable.DELTA_TABLE_PATH,
NonPartitionedDeltaTable.SCHEMA_2, NonPartitionedDeltaTable.SCHEMA_2,
null,
NonPartitionedDeltaTable.EXPECTED_ROWS NonPartitionedDeltaTable.EXPECTED_ROWS
}, },
{ {
PartitionedDeltaTable.DELTA_TABLE_PATH, PartitionedDeltaTable.DELTA_TABLE_PATH,
PartitionedDeltaTable.FULL_SCHEMA, PartitionedDeltaTable.FULL_SCHEMA,
null,
PartitionedDeltaTable.EXPECTED_ROWS PartitionedDeltaTable.EXPECTED_ROWS
}, },
{ {
ComplexTypesDeltaTable.DELTA_TABLE_PATH, ComplexTypesDeltaTable.DELTA_TABLE_PATH,
ComplexTypesDeltaTable.FULL_SCHEMA, ComplexTypesDeltaTable.FULL_SCHEMA,
null,
ComplexTypesDeltaTable.EXPECTED_ROWS ComplexTypesDeltaTable.EXPECTED_ROWS
},
{
SnapshotDeltaTable.DELTA_TABLE_PATH,
SnapshotDeltaTable.FULL_SCHEMA,
0L,
SnapshotDeltaTable.V0_SNAPSHOT_EXPECTED_ROWS
},
{
SnapshotDeltaTable.DELTA_TABLE_PATH,
SnapshotDeltaTable.FULL_SCHEMA,
1L,
SnapshotDeltaTable.V1_SNAPSHOT_EXPECTED_ROWS
},
{
SnapshotDeltaTable.DELTA_TABLE_PATH,
SnapshotDeltaTable.FULL_SCHEMA,
2L,
SnapshotDeltaTable.V2_SNAPSHOT_EXPECTED_ROWS
},
{
SnapshotDeltaTable.DELTA_TABLE_PATH,
SnapshotDeltaTable.FULL_SCHEMA,
3L,
SnapshotDeltaTable.LATEST_SNAPSHOT_EXPECTED_ROWS
},
{
SnapshotDeltaTable.DELTA_TABLE_PATH,
SnapshotDeltaTable.FULL_SCHEMA,
null,
SnapshotDeltaTable.LATEST_SNAPSHOT_EXPECTED_ROWS
} }
}; };
} }
@ -98,12 +134,14 @@ public class DeltaInputSourceTest
@Parameterized.Parameter(1) @Parameterized.Parameter(1)
public InputRowSchema schema; public InputRowSchema schema;
@Parameterized.Parameter(2) @Parameterized.Parameter(2)
public Long snapshotVersion;
@Parameterized.Parameter(3)
public List<Map<String, Object>> expectedRows; public List<Map<String, Object>> expectedRows;
@Test @Test
public void testSampleDeltaTable() throws IOException public void testSampleDeltaTable() throws IOException
{ {
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, null); final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, null, snapshotVersion);
final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null); final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null);
List<InputRowListPlusRawValues> actualSampledRows = sampleAllRows(inputSourceReader); List<InputRowListPlusRawValues> actualSampledRows = sampleAllRows(inputSourceReader);
@ -137,7 +175,7 @@ public class DeltaInputSourceTest
@Test @Test
public void testReadDeltaTable() throws IOException public void testReadDeltaTable() throws IOException
{ {
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, null); final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, null, snapshotVersion);
final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null); final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null);
final List<InputRow> actualReadRows = readAllRows(inputSourceReader); final List<InputRow> actualReadRows = readAllRows(inputSourceReader);
validateRows(expectedRows, actualReadRows, schema); validateRows(expectedRows, actualReadRows, schema);
@ -269,7 +307,7 @@ public class DeltaInputSourceTest
@Test @Test
public void testSampleDeltaTable() throws IOException public void testSampleDeltaTable() throws IOException
{ {
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, filter); final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, filter, null);
final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null); final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null);
List<InputRowListPlusRawValues> actualSampledRows = sampleAllRows(inputSourceReader); List<InputRowListPlusRawValues> actualSampledRows = sampleAllRows(inputSourceReader);
@ -311,7 +349,7 @@ public class DeltaInputSourceTest
@Test @Test
public void testReadDeltaTable() throws IOException public void testReadDeltaTable() throws IOException
{ {
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, filter); final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, filter, null);
final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null); final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null);
final List<InputRow> actualReadRows = readAllRows(inputSourceReader); final List<InputRow> actualReadRows = readAllRows(inputSourceReader);
validateRows(expectedRows, actualReadRows, schema); validateRows(expectedRows, actualReadRows, schema);
@ -326,7 +364,7 @@ public class DeltaInputSourceTest
MatcherAssert.assertThat( MatcherAssert.assertThat(
Assert.assertThrows( Assert.assertThrows(
DruidException.class, DruidException.class,
() -> new DeltaInputSource(null, null, null) () -> new DeltaInputSource(null, null, null, null)
), ),
DruidExceptionMatcher.invalidInput().expectMessageIs( DruidExceptionMatcher.invalidInput().expectMessageIs(
"tablePath cannot be null." "tablePath cannot be null."
@ -337,7 +375,7 @@ public class DeltaInputSourceTest
@Test @Test
public void testSplitNonExistentTable() public void testSplitNonExistentTable()
{ {
final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null); final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null, null);
MatcherAssert.assertThat( MatcherAssert.assertThat(
Assert.assertThrows( Assert.assertThrows(
@ -353,7 +391,7 @@ public class DeltaInputSourceTest
@Test @Test
public void testReadNonExistentTable() public void testReadNonExistentTable()
{ {
final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null); final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null, null);
MatcherAssert.assertThat( MatcherAssert.assertThat(
Assert.assertThrows( Assert.assertThrows(
@ -365,6 +403,22 @@ public class DeltaInputSourceTest
) )
); );
} }
@Test
public void testReadNonExistentSnapshot()
{
final DeltaInputSource deltaInputSource = new DeltaInputSource(
SnapshotDeltaTable.DELTA_TABLE_PATH,
null,
null,
100L
);
Assert.assertThrows(
KernelException.class,
() -> deltaInputSource.reader(null, null, null)
);
}
} }
private static List<InputRowListPlusRawValues> sampleAllRows(InputSourceReader reader) throws IOException private static List<InputRowListPlusRawValues> sampleAllRows(InputSourceReader reader) throws IOException

View File

@ -37,7 +37,9 @@ public class RowSerdeTest
{ {
Object[][] data = new Object[][]{ Object[][] data = new Object[][]{
{NonPartitionedDeltaTable.DELTA_TABLE_PATH}, {NonPartitionedDeltaTable.DELTA_TABLE_PATH},
{PartitionedDeltaTable.DELTA_TABLE_PATH} {PartitionedDeltaTable.DELTA_TABLE_PATH},
{ComplexTypesDeltaTable.DELTA_TABLE_PATH},
{SnapshotDeltaTable.DELTA_TABLE_PATH}
}; };
return Arrays.asList(data); return Arrays.asList(data);
} }

View File

@ -0,0 +1,129 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.delta.input;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.data.input.ColumnsFilter;
import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.segment.AutoTypeColumnSchema;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* Refer to extensions-contrib/druid-deltalake-extensions/src/test/resources/README.md to generate the
* sample complex types Delta Lake table used in the unit tests.
*
*/
public class SnapshotDeltaTable
{
/**
* The Delta table path used by unit tests.
*/
public static final String DELTA_TABLE_PATH = "src/test/resources/snapshot-table";
/**
* The list of dimensions in the Delta table {@link #DELTA_TABLE_PATH}.
*/
public static final List<String> DIMENSIONS = ImmutableList.of("id", "map_info");
public static final List<Map<String, Object>> V0_SNAPSHOT_EXPECTED_ROWS = new ArrayList<>(
ImmutableList.of(
ImmutableMap.of(
"id", 0L,
"map_info", ImmutableMap.of("snapshotVersion", 0)
),
ImmutableMap.of(
"id", 1L,
"map_info", ImmutableMap.of("snapshotVersion", 0)
),
ImmutableMap.of(
"id", 2L,
"map_info", ImmutableMap.of("snapshotVersion", 0)
)
)
);
public static final List<Map<String, Object>> V1_SNAPSHOT_EXPECTED_ROWS = new ArrayList<>(
ImmutableList.of(
ImmutableMap.of(
"id", 0L,
"map_info", ImmutableMap.of("snapshotVersion", 0)
),
ImmutableMap.of(
"id", 2L,
"map_info", ImmutableMap.of("snapshotVersion", 0)
)
)
);
public static final List<Map<String, Object>> V2_SNAPSHOT_EXPECTED_ROWS = new ArrayList<>(
ImmutableList.of(
ImmutableMap.of(
"id", 2L,
"map_info", ImmutableMap.of("snapshotVersion", 2)
),
ImmutableMap.of(
"id", 0L,
"map_info", ImmutableMap.of("snapshotVersion", 0)
)
)
);
public static final List<Map<String, Object>> LATEST_SNAPSHOT_EXPECTED_ROWS = new ArrayList<>(
ImmutableList.of(
ImmutableMap.of(
"id", 1L,
"map_info", ImmutableMap.of("snapshotVersion", 3)
),
ImmutableMap.of(
"id", 4L,
"map_info", ImmutableMap.of("snapshotVersion", 3)
),
ImmutableMap.of(
"id", 2L,
"map_info", ImmutableMap.of("snapshotVersion", 2)
),
ImmutableMap.of(
"id", 0L,
"map_info", ImmutableMap.of("snapshotVersion", 0)
)
)
);
/**
* The Druid schema used for ingestion of {@link #DELTA_TABLE_PATH}.
*/
public static final InputRowSchema FULL_SCHEMA = new InputRowSchema(
new TimestampSpec("na", "posix", DateTimes.of("2024-01-01")),
new DimensionsSpec(
ImmutableList.of(
new AutoTypeColumnSchema("id", null),
new AutoTypeColumnSchema("map_info", null)
)
),
ColumnsFilter.all()
);
}

View File

@ -44,18 +44,20 @@ Delta table to `resources/employee-delta-table`. You can override the defaults b
```shell ```shell
python3 create_delta_table.py -h python3 create_delta_table.py -h
usage: create_delta_table.py [-h] --save_path SAVE_PATH [--save_mode {append,overwrite}] [--partitioned_by {date,name}] [--num_records NUM_RECORDS] usage: create_delta_table.py [-h] [--delta_table_type {TableType.SIMPLE,TableType.COMPLEX,TableType.SNAPSHOTS}] --save_path SAVE_PATH [--save_mode {append,overwrite}] [--partitioned_by {date,name,id}] [--num_records NUM_RECORDS]
Script to write a Delta Lake table. Script to write a Delta Lake table.
options: options:
-h, --help show this help message and exit -h, --help show this help message and exit
--delta_table_type {TableType.SIMPLE,TableType.COMPLEX,TableType.SNAPSHOTS}
Choose a Delta table type to generate. (default: TableType.SIMPLE)
--save_path SAVE_PATH --save_path SAVE_PATH
Save path for Delta table (default: None) Save path for Delta table (default: None)
--save_mode {append,overwrite} --save_mode {append,overwrite}
Specify write mode (append/overwrite) (default: append) Specify write mode (append/overwrite) (default: append)
--partitioned_by {date,name} --partitioned_by {date,name,id}
Partitioned by columns (default: None) Column to partition the Delta table (default: None)
--num_records NUM_RECORDS --num_records NUM_RECORDS
Specify number of Delta records to write (default: 5) Specify number of Delta records to write (default: 5)
``` ```
@ -88,10 +90,21 @@ The resulting Delta table is checked in to the repo. The expectated rows to be u
### Complex types table `complex-types-table`: ### Complex types table `complex-types-table`:
The test data in `resources/complex-types-table` contains 5 Delta records generated with 1 snapshot. The test data in `resources/complex-types-table` contains 5 Delta records generated with 1 snapshot.
The table was generated by running the following commands: The table was generated by running the following command:
```shell ```shell
python3 create_delta_table.py --save_path=complex-types-table --num_records=5 --gen_complex_types=True python3 create_delta_table.py --save_path=complex-types-table --delta_table_type=complex
``` ```
The resulting Delta table is checked in to the repo. The expectated rows to be used in tests are updated in The resulting Delta table is checked in to the repo. The expectated rows to be used in tests are updated in
`ComplexTypesDeltaTable.java` accordingly. `ComplexTypesDeltaTable.java` accordingly.
### Snapshots table `snapshot-table`:
The test data in `resources/snapshot-table` contains 4 Delta snapshots with delete, update and removal of records across
snapshots. The table was generated by running the following command:
```shell
python3 create_delta_table.py --save_path=snapshot-table --partitioned_by=id --delta_table_type=snapshots --num_records=3
```
The resulting Delta table is checked in to the repo. The expectated rows to be used in tests are updated in
`SnapshotDeltaTable.java` accordingly.

View File

@ -16,11 +16,20 @@
# limitations under the License. # limitations under the License.
import argparse import argparse
from enum import Enum
from delta import * from delta import *
import pyspark import pyspark
from pyspark.sql.types import MapType, StructType, StructField, ShortType, StringType, TimestampType, LongType, IntegerType, DoubleType, FloatType, DateType, BooleanType, ArrayType from pyspark.sql.types import MapType, StructType, StructField, ShortType, StringType, TimestampType, LongType, IntegerType, DoubleType, FloatType, DateType, BooleanType, ArrayType
from pyspark.sql.functions import expr
from datetime import datetime, timedelta from datetime import datetime, timedelta
import random import random
from delta.tables import DeltaTable
class TableType(Enum):
SIMPLE = "simple"
COMPLEX = "complex"
SNAPSHOTS = "snapshots"
def config_spark_with_delta_lake(): def config_spark_with_delta_lake():
@ -40,15 +49,12 @@ def config_spark_with_delta_lake():
def create_dataset_with_complex_types(num_records): def create_dataset_with_complex_types(num_records):
""" """
Create a mock dataset with records containing complex types like arrays, structs and maps. Create a mock dataset with records containing complex types like arrays, structs and maps.
Parameters: Parameters:
- num_records (int): Number of records to generate. - num_records (int): Number of records to generate.
Returns: Returns:
- Tuple: A tuple containing a list of records and the corresponding schema. - Tuple: A tuple containing a list of records and the corresponding schema.
- List of Records: Each record is a tuple representing a row of data. - List of Records: Each record is a tuple representing a row of data.
- StructType: The schema defining the structure of the records. - StructType: The schema defining the structure of the records.
Example: Example:
```python ```python
data, schema = create_dataset_with_complex_types(10) data, schema = create_dataset_with_complex_types(10)
@ -86,6 +92,59 @@ def create_dataset_with_complex_types(num_records):
return data, schema return data, schema
def create_snapshots_table(num_records):
"""
Create a mock dataset for snapshots.
Parameters:
- num_records (int): Number of records to generate.
Returns:
- Tuple: A tuple containing a list of records and the corresponding schema pertaining to a single snapshot.
Example:
```python
data, schema = create_snapshots_table(5)
```
"""
schema = StructType([
StructField("id", LongType(), False),
StructField("map_info", MapType(StringType(), IntegerType()))
])
data = []
for idx in range(num_records):
record = (
idx,
{"snapshotVersion": 0}
)
data.append(record)
return data, schema
def update_table(spark, schema, delta_table_path):
"""
Update table at the specified delta path with updates: deletion, partial upsert, and insertion.
Each update generates a distinct snapshot for the Delta table.
"""
delta_table = DeltaTable.forPath(spark, delta_table_path)
# Snapshot 1: remove record with id = 2; result : (id=0, id=2)
delta_table.delete(condition="id=1")
# Snapshot 2: do a partial update of snapshotInfo map for id = 2 ; result : (id=2, id=0)
delta_table.update(
condition="id=2",
set={"map_info": expr("map('snapshotVersion', 2)")}
)
# Snapshot 3: New records to be appended; result : (id=1, id=4, id=2, id=0)
append_data = [
(1, {"snapshotVersion": 3}),
(4, {"snapshotVersion": 3})
]
append_df = spark.createDataFrame(append_data, schema)
append_df.write.format("delta").mode("append").save(delta_table_path)
def create_dataset(num_records): def create_dataset(num_records):
""" """
Generate a mock employee dataset with different datatypes for testing purposes. Generate a mock employee dataset with different datatypes for testing purposes.
@ -141,19 +200,18 @@ def main():
parser = argparse.ArgumentParser(description="Script to write a Delta Lake table.", parser = argparse.ArgumentParser(description="Script to write a Delta Lake table.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter) formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--gen_complex_types", type=bool, default=False, help="Generate a Delta table with records" parser.add_argument('--delta_table_type', type=lambda t: TableType[t.upper()], choices=TableType,
" containing complex types like structs," default=TableType.SIMPLE, help='Choose a Delta table type to generate.')
" maps and arrays.")
parser.add_argument('--save_path', default=None, required=True, help="Save path for Delta table") parser.add_argument('--save_path', default=None, required=True, help="Save path for Delta table")
parser.add_argument('--save_mode', choices=('append', 'overwrite'), default="append", parser.add_argument('--save_mode', choices=('append', 'overwrite'), default="append",
help="Specify write mode (append/overwrite)") help="Specify write mode (append/overwrite)")
parser.add_argument('--partitioned_by', choices=("date", "name"), default=None, parser.add_argument('--partitioned_by', choices=("date", "name", "id"), default=None,
help="Column to partition the Delta table") help="Column to partition the Delta table")
parser.add_argument('--num_records', type=int, default=5, help="Specify number of Delta records to write") parser.add_argument('--num_records', type=int, default=5, help="Specify number of Delta records to write")
args = parser.parse_args() args = parser.parse_args()
is_gen_complex_types = args.gen_complex_types delta_table_type = args.delta_table_type
save_mode = args.save_mode save_mode = args.save_mode
save_path = args.save_path save_path = args.save_path
num_records = args.num_records num_records = args.num_records
@ -161,21 +219,29 @@ def main():
spark = config_spark_with_delta_lake() spark = config_spark_with_delta_lake()
if is_gen_complex_types: if delta_table_type == TableType.SIMPLE:
data, schema = create_dataset_with_complex_types(num_records=num_records)
else:
data, schema = create_dataset(num_records=num_records) data, schema = create_dataset(num_records=num_records)
elif delta_table_type == TableType.COMPLEX:
data, schema = create_dataset_with_complex_types(num_records=num_records)
elif delta_table_type == TableType.SNAPSHOTS:
data, schema = create_snapshots_table(num_records)
else:
args.print_help()
raise Exception("Unknown value specified for --delta_table_type")
df = spark.createDataFrame(data, schema=schema) df = spark.createDataFrame(data, schema=schema)
if not partitioned_by: if not partitioned_by:
df.write.format("delta").mode(save_mode).save(save_path) df.write.format("delta").mode(save_mode).save(save_path)
else: else:
df.write.format("delta").partitionBy("name").mode(save_mode).save(save_path) df.write.format("delta").partitionBy(partitioned_by).mode(save_mode).save(save_path)
df.show() df.show()
print(f"Generated Delta table records partitioned by {partitioned_by} in {save_path} in {save_mode} mode" print(f"Generated Delta table records partitioned by {partitioned_by} in {save_path} in {save_mode} mode"
f" with {num_records} records.") f" with {num_records} records with {delta_table_type}.")
if delta_table_type == TableType.SNAPSHOTS:
update_table(spark, schema, save_path)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -0,0 +1,6 @@
{"commitInfo":{"timestamp":1725465348581,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"id\"]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"3","numOutputRows":"3","numOutputBytes":"2607"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.1.0","txnId":"d52bcd81-2310-417a-acb2-e206a4882383"}}
{"metaData":{"id":"5a4682fa-c3d8-4f49-8825-b8540e20ce93","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"map_info\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"integer\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["id"],"configuration":{},"createdTime":1725465346226}}
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
{"add":{"path":"id=0/part-00003-8610110f-f5a0-4856-a5a8-516e5b35ef44.c000.snappy.parquet","partitionValues":{"id":"0"},"size":869,"modificationTime":1725465348507,"dataChange":true,"stats":"{\"numRecords\":1,\"nullCount\":{\"map_info\":0}}"}}
{"add":{"path":"id=1/part-00006-120df0a3-1c7a-4a2e-81aa-7bc8140b0f09.c000.snappy.parquet","partitionValues":{"id":"1"},"size":869,"modificationTime":1725465348507,"dataChange":true,"stats":"{\"numRecords\":1,\"nullCount\":{\"map_info\":0}}"}}
{"add":{"path":"id=2/part-00009-246861b8-01b0-446c-b4f1-ab0c2e762044.c000.snappy.parquet","partitionValues":{"id":"2"},"size":869,"modificationTime":1725465348506,"dataChange":true,"stats":"{\"numRecords\":1,\"nullCount\":{\"map_info\":0}}"}}

View File

@ -0,0 +1,2 @@
{"commitInfo":{"timestamp":1725465352088,"operation":"DELETE","operationParameters":{"predicate":"[\"(id#852L = 1)\"]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"1","numRemovedBytes":"869","numCopiedRows":"0","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"0","executionTimeMs":"426","numDeletionVectorsUpdated":"0","numDeletedRows":"1","scanTimeMs":"421","numAddedFiles":"0","numAddedBytes":"0","rewriteTimeMs":"0"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.1.0","txnId":"5af91bc8-feb5-40e2-b7d0-76acd1038ba7"}}
{"remove":{"path":"id=1/part-00006-120df0a3-1c7a-4a2e-81aa-7bc8140b0f09.c000.snappy.parquet","deletionTimestamp":1725465351650,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"id":"1"},"size":869,"stats":"{\"numRecords\":1}"}}

Some files were not shown because too many files have changed in this diff Show More