mirror of https://github.com/apache/druid.git
Merge remote-tracking branch 'apache/master' into quidem-kttm
This commit is contained in:
commit
6075163aaa
|
@ -117,6 +117,28 @@ jobs:
|
|||
MAVEN_PROJECTS: ${{ inputs.maven_projects }}
|
||||
run: ./.github/scripts/unit_tests_script.sh
|
||||
|
||||
- name: Check for .hprof files on failure
|
||||
if: ${{ failure() }}
|
||||
id: check_for_heap_dump
|
||||
run: |
|
||||
if ls ${GITHUB_WORKSPACE}/target/*.hprof 1> /dev/null 2>&1; then
|
||||
echo "found_hprof=true" >> "$GITHUB_ENV"
|
||||
else
|
||||
echo "found_hprof=false" >> "$GITHUB_ENV"
|
||||
fi
|
||||
|
||||
- name: Collect tarball hprof dumps if they exist on failure
|
||||
if: ${{ failure() && env.found_hprof == 'true' }}
|
||||
run: |
|
||||
tar cvzf ${RUNNER_TEMP}/hprof-dumps.tgz ${GITHUB_WORKSPACE}/target/*.hprof
|
||||
|
||||
- name: Upload hprof dumps to GitHub if they exist on failure
|
||||
if: ${{ failure() && env.found_hprof == 'true' }}
|
||||
uses: actions/upload-artifact@master
|
||||
with:
|
||||
name: Hprof-${{ inputs.group }} hprof dumps (Compile=jdk${{ inputs.build_jdk }}, Run=jdk${{ inputs.runtime_jdk }})
|
||||
path: ${{ runner.temp }}/hprof-dumps.tgz
|
||||
|
||||
- name: set outputs on failure
|
||||
id: set_outputs
|
||||
if: ${{ failure() }}
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
<parent>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
|
|
@ -132,7 +132,7 @@ public class DelimitedInputFormatBenchmark
|
|||
@Setup(Level.Trial)
|
||||
public void prepareFormat()
|
||||
{
|
||||
format = new DelimitedInputFormat(fromHeader ? null : COLUMNS, null, "\t", null, fromHeader, fromHeader ? 0 : 1);
|
||||
format = new DelimitedInputFormat(fromHeader ? null : COLUMNS, null, "\t", null, fromHeader, fromHeader ? 0 : 1, null);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
|
|
|
@ -36,7 +36,7 @@ import org.apache.druid.segment.Cursor;
|
|||
import org.apache.druid.segment.CursorBuildSpec;
|
||||
import org.apache.druid.segment.CursorHolder;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.QueryableIndexStorageAdapter;
|
||||
import org.apache.druid.segment.QueryableIndexCursorFactory;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.generator.GeneratorColumnSchema;
|
||||
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
|
||||
|
@ -161,9 +161,8 @@ public class ExpressionAggregationBenchmark
|
|||
|
||||
private double compute(final Function<ColumnSelectorFactory, BufferAggregator> aggregatorFactory)
|
||||
{
|
||||
final QueryableIndexStorageAdapter adapter = new QueryableIndexStorageAdapter(index);
|
||||
|
||||
try (final CursorHolder cursorHolder = adapter.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
|
||||
final BufferAggregator bufferAggregator = aggregatorFactory.apply(cursor.getColumnSelectorFactory());
|
||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.druid.segment.Cursor;
|
|||
import org.apache.druid.segment.CursorBuildSpec;
|
||||
import org.apache.druid.segment.CursorHolder;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.QueryableIndexStorageAdapter;
|
||||
import org.apache.druid.segment.QueryableIndexCursorFactory;
|
||||
import org.apache.druid.segment.column.ValueType;
|
||||
import org.apache.druid.segment.generator.GeneratorColumnSchema;
|
||||
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
|
||||
|
@ -148,7 +148,9 @@ public class ExpressionFilterBenchmark
|
|||
final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
|
||||
.setFilter(expressionFilter.toFilter())
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
|
||||
|
||||
|
@ -166,7 +168,9 @@ public class ExpressionFilterBenchmark
|
|||
final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
|
||||
.setFilter(nativeFilter.toFilter())
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("x");
|
||||
while (!cursor.isDone()) {
|
||||
|
|
|
@ -42,9 +42,8 @@ import org.apache.druid.segment.CursorHolder;
|
|||
import org.apache.druid.segment.Cursors;
|
||||
import org.apache.druid.segment.DimensionSelector;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.QueryableIndexStorageAdapter;
|
||||
import org.apache.druid.segment.QueryableIndexCursorFactory;
|
||||
import org.apache.druid.segment.QueryableIndexTimeBoundaryInspector;
|
||||
import org.apache.druid.segment.StorageAdapter;
|
||||
import org.apache.druid.segment.VirtualColumns;
|
||||
import org.apache.druid.segment.column.ColumnHolder;
|
||||
import org.apache.druid.segment.column.ColumnType;
|
||||
|
@ -160,7 +159,8 @@ public class ExpressionSelectorBenchmark
|
|||
)
|
||||
)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
|
@ -171,7 +171,8 @@ public class ExpressionSelectorBenchmark
|
|||
@Benchmark
|
||||
public void timeFloorUsingExtractionFn(Blackhole blackhole)
|
||||
{
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
|
||||
final DimensionSelector selector = cursor
|
||||
|
@ -190,15 +191,15 @@ public class ExpressionSelectorBenchmark
|
|||
@Benchmark
|
||||
public void timeFloorUsingCursor(Blackhole blackhole)
|
||||
{
|
||||
final StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
|
||||
try (final CursorHolder cursorHolder = adapter.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final CursorGranularizer granularizer = CursorGranularizer.create(
|
||||
cursor,
|
||||
QueryableIndexTimeBoundaryInspector.create(index),
|
||||
Cursors.getTimeOrdering(index.getOrdering()),
|
||||
Granularities.HOUR,
|
||||
adapter.getInterval()
|
||||
index.getDataInterval()
|
||||
);
|
||||
final Sequence<Long> results =
|
||||
Sequences.simple(granularizer.getBucketIterable())
|
||||
|
@ -241,7 +242,8 @@ public class ExpressionSelectorBenchmark
|
|||
)
|
||||
.build();
|
||||
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final DimensionSelector selector = cursor.getColumnSelectorFactory().makeDimensionSelector(
|
||||
DefaultDimensionSpec.of("v")
|
||||
|
@ -253,7 +255,8 @@ public class ExpressionSelectorBenchmark
|
|||
@Benchmark
|
||||
public void timeFormatUsingExtractionFn(Blackhole blackhole)
|
||||
{
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final DimensionSelector selector = cursor
|
||||
.getColumnSelectorFactory()
|
||||
|
@ -284,7 +287,8 @@ public class ExpressionSelectorBenchmark
|
|||
)
|
||||
.build();
|
||||
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
consumeLong(cursor, selector, blackhole);
|
||||
|
@ -307,7 +311,8 @@ public class ExpressionSelectorBenchmark
|
|||
)
|
||||
.build();
|
||||
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final DimensionSelector selector = cursor
|
||||
.getColumnSelectorFactory()
|
||||
|
@ -320,7 +325,8 @@ public class ExpressionSelectorBenchmark
|
|||
@Benchmark
|
||||
public void strlenUsingExtractionFn(Blackhole blackhole)
|
||||
{
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final DimensionSelector selector = cursor
|
||||
.getColumnSelectorFactory()
|
||||
|
@ -346,7 +352,8 @@ public class ExpressionSelectorBenchmark
|
|||
)
|
||||
.build();
|
||||
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
consumeLong(cursor, selector, blackhole);
|
||||
|
@ -368,7 +375,8 @@ public class ExpressionSelectorBenchmark
|
|||
)
|
||||
)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
consumeLong(cursor, selector, blackhole);
|
||||
|
@ -390,7 +398,8 @@ public class ExpressionSelectorBenchmark
|
|||
)
|
||||
)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
consumeLong(cursor, selector, blackhole);
|
||||
|
@ -412,7 +421,8 @@ public class ExpressionSelectorBenchmark
|
|||
)
|
||||
)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
consumeLong(cursor, selector, blackhole);
|
||||
|
@ -447,7 +457,8 @@ public class ExpressionSelectorBenchmark
|
|||
)
|
||||
.build();
|
||||
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
consumeLong(cursor, selector, blackhole);
|
||||
|
@ -476,7 +487,8 @@ public class ExpressionSelectorBenchmark
|
|||
)
|
||||
.build();
|
||||
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
consumeLong(cursor, selector, blackhole);
|
||||
|
@ -513,7 +525,8 @@ public class ExpressionSelectorBenchmark
|
|||
)
|
||||
.build();
|
||||
|
||||
try (final CursorHolder cursorHolder = new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(index);
|
||||
try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
|
||||
consumeLong(cursor, selector, blackhole);
|
||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.druid.segment.Cursor;
|
|||
import org.apache.druid.segment.CursorBuildSpec;
|
||||
import org.apache.druid.segment.CursorHolder;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.QueryableIndexStorageAdapter;
|
||||
import org.apache.druid.segment.QueryableIndexCursorFactory;
|
||||
import org.apache.druid.segment.VirtualColumns;
|
||||
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
|
||||
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
|
||||
|
@ -155,7 +155,7 @@ public class ExpressionVectorSelectorBenchmark
|
|||
.setVirtualColumns(virtualColumns)
|
||||
.build();
|
||||
final CursorHolder cursorHolder = closer.register(
|
||||
new QueryableIndexStorageAdapter(index).makeCursorHolder(buildSpec)
|
||||
new QueryableIndexCursorFactory(index).makeCursorHolder(buildSpec)
|
||||
);
|
||||
if (vectorize) {
|
||||
VectorCursor cursor = cursorHolder.asVectorCursor();
|
||||
|
|
|
@ -46,14 +46,14 @@ import org.apache.druid.query.ordering.StringComparators;
|
|||
import org.apache.druid.segment.BaseLongColumnValueSelector;
|
||||
import org.apache.druid.segment.Cursor;
|
||||
import org.apache.druid.segment.CursorBuildSpec;
|
||||
import org.apache.druid.segment.CursorFactory;
|
||||
import org.apache.druid.segment.CursorHolder;
|
||||
import org.apache.druid.segment.DimensionSelector;
|
||||
import org.apache.druid.segment.IndexIO;
|
||||
import org.apache.druid.segment.IndexMergerV9;
|
||||
import org.apache.druid.segment.IndexSpec;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.QueryableIndexStorageAdapter;
|
||||
import org.apache.druid.segment.StorageAdapter;
|
||||
import org.apache.druid.segment.QueryableIndexCursorFactory;
|
||||
import org.apache.druid.segment.column.ColumnConfig;
|
||||
import org.apache.druid.segment.column.ColumnHolder;
|
||||
import org.apache.druid.segment.data.IndexedInts;
|
||||
|
@ -231,8 +231,8 @@ public class FilterPartitionBenchmark
|
|||
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||
public void stringRead(Blackhole blackhole)
|
||||
{
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, null)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, null)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursor(cursor, blackhole);
|
||||
}
|
||||
|
@ -243,8 +243,8 @@ public class FilterPartitionBenchmark
|
|||
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||
public void longRead(Blackhole blackhole)
|
||||
{
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, null)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, null)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursorLong(cursor, blackhole);
|
||||
}
|
||||
|
@ -255,8 +255,8 @@ public class FilterPartitionBenchmark
|
|||
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||
public void timeFilterNone(Blackhole blackhole)
|
||||
{
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (CursorHolder cursorHolder = makeCursorHolder(sa, timeFilterNone)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, timeFilterNone)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursorLong(cursor, blackhole);
|
||||
}
|
||||
|
@ -267,8 +267,8 @@ public class FilterPartitionBenchmark
|
|||
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||
public void timeFilterHalf(Blackhole blackhole)
|
||||
{
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, timeFilterHalf)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, timeFilterHalf)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursorLong(cursor, blackhole);
|
||||
}
|
||||
|
@ -279,8 +279,8 @@ public class FilterPartitionBenchmark
|
|||
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||
public void timeFilterAll(Blackhole blackhole)
|
||||
{
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, timeFilterAll)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, timeFilterAll)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursorLong(cursor, blackhole);
|
||||
}
|
||||
|
@ -293,8 +293,8 @@ public class FilterPartitionBenchmark
|
|||
{
|
||||
Filter filter = new SelectorFilter("dimSequential", "199");
|
||||
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, filter)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, filter)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursor(cursor, blackhole);
|
||||
}
|
||||
|
@ -307,8 +307,8 @@ public class FilterPartitionBenchmark
|
|||
{
|
||||
Filter filter = new NoBitmapSelectorFilter("dimSequential", "199");
|
||||
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, filter)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, filter)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursor(cursor, blackhole);
|
||||
}
|
||||
|
@ -321,8 +321,8 @@ public class FilterPartitionBenchmark
|
|||
{
|
||||
Filter filter = new SelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter();
|
||||
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, filter)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, filter)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursor(cursor, blackhole);
|
||||
}
|
||||
|
@ -335,8 +335,8 @@ public class FilterPartitionBenchmark
|
|||
{
|
||||
Filter filter = new NoBitmapSelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter();
|
||||
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, filter)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, filter)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursor(cursor, blackhole);
|
||||
}
|
||||
|
@ -354,8 +354,8 @@ public class FilterPartitionBenchmark
|
|||
)
|
||||
);
|
||||
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, andFilter)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, andFilter)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursor(cursor, blackhole);
|
||||
}
|
||||
|
@ -370,8 +370,8 @@ public class FilterPartitionBenchmark
|
|||
Filter filter2 = new AndFilter(Arrays.asList(new SelectorFilter("dimMultivalEnumerated2", "Corundum"), new NoBitmapSelectorFilter("dimMultivalEnumerated", "Bar")));
|
||||
Filter orFilter = new OrFilter(Arrays.asList(filter, filter2));
|
||||
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, orFilter)) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, orFilter)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursor(cursor, blackhole);
|
||||
}
|
||||
|
@ -386,8 +386,8 @@ public class FilterPartitionBenchmark
|
|||
Filter filter2 = new AndFilter(Arrays.asList(new SelectorFilter("dimMultivalEnumerated2", "Corundum"), new NoBitmapSelectorFilter("dimMultivalEnumerated", "Bar")));
|
||||
Filter orFilter = new OrFilter(Arrays.asList(filter, filter2));
|
||||
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, Filters.toCnf(orFilter))) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, Filters.toCnf(orFilter))) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursor(cursor, blackhole);
|
||||
}
|
||||
|
@ -425,8 +425,8 @@ public class FilterPartitionBenchmark
|
|||
))
|
||||
);
|
||||
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, dimFilter3.toFilter())) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, dimFilter3.toFilter())) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursor(cursor, blackhole);
|
||||
}
|
||||
|
@ -464,16 +464,16 @@ public class FilterPartitionBenchmark
|
|||
))
|
||||
);
|
||||
|
||||
StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(sa, Filters.toCnf(dimFilter3.toFilter()))) {
|
||||
final QueryableIndexCursorFactory cursorFactory = new QueryableIndexCursorFactory(qIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursorHolder(cursorFactory, Filters.toCnf(dimFilter3.toFilter()))) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
readCursor(cursor, blackhole);
|
||||
}
|
||||
}
|
||||
|
||||
private CursorHolder makeCursorHolder(StorageAdapter sa, Filter filter)
|
||||
private CursorHolder makeCursorHolder(CursorFactory factory, Filter filter)
|
||||
{
|
||||
return sa.makeCursorHolder(
|
||||
return factory.makeCursorHolder(
|
||||
CursorBuildSpec.builder()
|
||||
.setFilter(filter)
|
||||
.setInterval(schemaInfo.getDataInterval())
|
||||
|
|
|
@ -378,7 +378,6 @@ public class GroupByTypeInterfaceBenchmark
|
|||
final GroupingEngine groupingEngine = new GroupingEngine(
|
||||
druidProcessingConfig,
|
||||
configSupplier,
|
||||
bufferPool,
|
||||
groupByResourcesReservationPool,
|
||||
TestHelper.makeJsonMapper(),
|
||||
new ObjectMapper(new SmileFactory()),
|
||||
|
@ -387,7 +386,8 @@ public class GroupByTypeInterfaceBenchmark
|
|||
|
||||
factory = new GroupByQueryRunnerFactory(
|
||||
groupingEngine,
|
||||
new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool)
|
||||
new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool),
|
||||
bufferPool
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -238,7 +238,7 @@ public class IndexedTableJoinCursorBenchmark
|
|||
|
||||
private CursorHolder makeCursorHolder()
|
||||
{
|
||||
return hashJoinSegment.asStorageAdapter().makeCursorHolder(CursorBuildSpec.FULL_SCAN);
|
||||
return hashJoinSegment.asCursorFactory().makeCursorHolder(CursorBuildSpec.FULL_SCAN);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -369,7 +369,7 @@ public class JoinAndLookupBenchmark
|
|||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
public void baseSegment(Blackhole blackhole)
|
||||
{
|
||||
try (final CursorHolder cursorHolder = baseSegment.asStorageAdapter().makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
try (final CursorHolder cursorHolder = baseSegment.asCursorFactory().makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, "countryIsoCode"));
|
||||
}
|
||||
|
@ -384,7 +384,7 @@ public class JoinAndLookupBenchmark
|
|||
final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
|
||||
.setFilter(filter)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = baseSegment.asStorageAdapter().makeCursorHolder(buildSpec)) {
|
||||
try (final CursorHolder cursorHolder = baseSegment.asCursorFactory().makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, "countryIsoCode"));
|
||||
}
|
||||
|
@ -395,7 +395,7 @@ public class JoinAndLookupBenchmark
|
|||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
public void joinLookupStringKey(Blackhole blackhole)
|
||||
{
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter()
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
|
||||
.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, "c.v"));
|
||||
|
@ -411,7 +411,7 @@ public class JoinAndLookupBenchmark
|
|||
final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
|
||||
.setFilter(filter)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter()
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
|
||||
.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, "c.v"));
|
||||
|
@ -423,7 +423,7 @@ public class JoinAndLookupBenchmark
|
|||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
public void joinLookupLongKey(Blackhole blackhole)
|
||||
{
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter()
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
|
||||
.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, "c.v"));
|
||||
|
@ -439,7 +439,7 @@ public class JoinAndLookupBenchmark
|
|||
final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
|
||||
.setFilter(filter)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter()
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
|
||||
.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, "c.v"));
|
||||
|
@ -451,7 +451,7 @@ public class JoinAndLookupBenchmark
|
|||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
public void joinIndexedTableLongKey(Blackhole blackhole)
|
||||
{
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter()
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
|
||||
.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, "c.countryName"));
|
||||
|
@ -467,7 +467,7 @@ public class JoinAndLookupBenchmark
|
|||
final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
|
||||
.setFilter(filter)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter()
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
|
||||
.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, "c.countryName"));
|
||||
|
@ -479,7 +479,7 @@ public class JoinAndLookupBenchmark
|
|||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
public void joinIndexedTableStringKey(Blackhole blackhole)
|
||||
{
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter()
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
|
||||
.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, "c.countryName"));
|
||||
|
@ -495,7 +495,7 @@ public class JoinAndLookupBenchmark
|
|||
final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
|
||||
.setFilter(filter)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter()
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
|
||||
.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, "c.countryName"));
|
||||
|
@ -510,7 +510,7 @@ public class JoinAndLookupBenchmark
|
|||
final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
|
||||
.setVirtualColumns(lookupVirtualColumns)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter()
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
|
||||
.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_CODE_TO_NAME));
|
||||
|
@ -527,7 +527,7 @@ public class JoinAndLookupBenchmark
|
|||
.setFilter(filter)
|
||||
.setVirtualColumns(lookupVirtualColumns)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asStorageAdapter()
|
||||
try (final CursorHolder cursorHolder = hashJoinLookupStringKeySegment.asCursorFactory()
|
||||
.makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_CODE_TO_NAME));
|
||||
|
@ -542,7 +542,7 @@ public class JoinAndLookupBenchmark
|
|||
final CursorBuildSpec buildSpec = CursorBuildSpec.builder()
|
||||
.setVirtualColumns(lookupVirtualColumns)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = baseSegment.asStorageAdapter().makeCursorHolder(buildSpec)) {
|
||||
try (final CursorHolder cursorHolder = baseSegment.asCursorFactory().makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_NUMBER_TO_NAME));
|
||||
}
|
||||
|
@ -558,7 +558,7 @@ public class JoinAndLookupBenchmark
|
|||
.setVirtualColumns(lookupVirtualColumns)
|
||||
.setFilter(filter)
|
||||
.build();
|
||||
try (final CursorHolder cursorHolder = baseSegment.asStorageAdapter().makeCursorHolder(buildSpec)) {
|
||||
try (final CursorHolder cursorHolder = baseSegment.asCursorFactory().makeCursorHolder(buildSpec)) {
|
||||
final Cursor cursor = cursorHolder.asCursor();
|
||||
blackhole.consume(getLastValue(cursor, LOOKUP_COUNTRY_NUMBER_TO_NAME));
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ import com.google.common.base.Supplier;
|
|||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.java.util.common.FileUtils;
|
||||
import org.apache.druid.java.util.common.MappedByteBufferHandler;
|
||||
import org.apache.druid.segment.QueryableIndexStorageAdapter;
|
||||
import org.apache.druid.query.QueryContexts;
|
||||
import org.apache.druid.segment.data.ColumnarLongs;
|
||||
import org.apache.druid.segment.data.CompressedColumnarLongsSupplier;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
|
@ -118,7 +118,7 @@ public class LongCompressionBenchmark
|
|||
@Benchmark
|
||||
public void readVectorizedContinuous(Blackhole bh)
|
||||
{
|
||||
long[] vector = new long[QueryableIndexStorageAdapter.DEFAULT_VECTOR_SIZE];
|
||||
long[] vector = new long[QueryContexts.DEFAULT_VECTOR_SIZE];
|
||||
ColumnarLongs columnarLongs = supplier.get();
|
||||
int count = columnarLongs.size();
|
||||
for (int i = 0; i < count; i++) {
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.druid.benchmark.frame;
|
|||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.util.concurrent.ListenableFuture;
|
||||
import com.google.common.util.concurrent.ListeningExecutorService;
|
||||
import com.google.common.util.concurrent.MoreExecutors;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
import org.apache.druid.common.guava.FutureUtils;
|
||||
|
@ -203,6 +204,7 @@ public class FrameChannelMergerBenchmark
|
|||
private final List<KeyColumn> sortKey = ImmutableList.of(new KeyColumn(KEY, KeyOrder.ASCENDING));
|
||||
|
||||
private List<List<Frame>> channelFrames;
|
||||
private ListeningExecutorService innerExec;
|
||||
private FrameProcessorExecutor exec;
|
||||
private List<BlockingQueueFrameChannel> channels;
|
||||
|
||||
|
@ -226,7 +228,7 @@ public class FrameChannelMergerBenchmark
|
|||
frameReader = FrameReader.create(signature);
|
||||
|
||||
exec = new FrameProcessorExecutor(
|
||||
MoreExecutors.listeningDecorator(
|
||||
innerExec = MoreExecutors.listeningDecorator(
|
||||
Execs.singleThreaded(StringUtils.encodeForFormat(getClass().getSimpleName()))
|
||||
)
|
||||
);
|
||||
|
@ -284,7 +286,7 @@ public class FrameChannelMergerBenchmark
|
|||
signature
|
||||
);
|
||||
final Sequence<Frame> frameSequence =
|
||||
FrameSequenceBuilder.fromAdapter(segment.asStorageAdapter())
|
||||
FrameSequenceBuilder.fromCursorFactory(segment.asCursorFactory())
|
||||
.allocator(ArenaMemoryAllocator.createOnHeap(10_000_000))
|
||||
.frameType(FrameType.ROW_BASED)
|
||||
.frames();
|
||||
|
@ -335,8 +337,8 @@ public class FrameChannelMergerBenchmark
|
|||
@TearDown(Level.Trial)
|
||||
public void tearDown() throws Exception
|
||||
{
|
||||
exec.getExecutorService().shutdownNow();
|
||||
if (!exec.getExecutorService().awaitTermination(1, TimeUnit.MINUTES)) {
|
||||
innerExec.shutdownNow();
|
||||
if (!innerExec.awaitTermination(1, TimeUnit.MINUTES)) {
|
||||
throw new ISE("Could not terminate executor after 1 minute");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.druid.query.ordering.StringComparators;
|
|||
import org.apache.druid.query.search.ContainsSearchQuerySpec;
|
||||
import org.apache.druid.segment.Cursor;
|
||||
import org.apache.druid.segment.CursorBuildSpec;
|
||||
import org.apache.druid.segment.CursorFactory;
|
||||
import org.apache.druid.segment.CursorHolder;
|
||||
import org.apache.druid.segment.DimensionSelector;
|
||||
import org.apache.druid.segment.data.IndexedInts;
|
||||
|
@ -44,8 +45,8 @@ import org.apache.druid.segment.generator.GeneratorSchemaInfo;
|
|||
import org.apache.druid.segment.incremental.AppendableIndexSpec;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexCreator;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexCursorFactory;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
|
||||
import org.apache.druid.segment.serde.ComplexMetrics;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
|
@ -147,8 +148,8 @@ public class IncrementalIndexReadBenchmark
|
|||
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||
public void read(Blackhole blackhole)
|
||||
{
|
||||
IncrementalIndexStorageAdapter sa = new IncrementalIndexStorageAdapter(incIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursor(sa, null)) {
|
||||
final CursorFactory cursorFactory = new IncrementalIndexCursorFactory(incIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursor(cursorFactory, null)) {
|
||||
Cursor cursor = cursorHolder.asCursor();
|
||||
|
||||
List<DimensionSelector> selectors = new ArrayList<>();
|
||||
|
@ -183,8 +184,8 @@ public class IncrementalIndexReadBenchmark
|
|||
)
|
||||
);
|
||||
|
||||
IncrementalIndexStorageAdapter sa = new IncrementalIndexStorageAdapter(incIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursor(sa, filter)) {
|
||||
IncrementalIndexCursorFactory cursorFactory = new IncrementalIndexCursorFactory(incIndex);
|
||||
try (final CursorHolder cursorHolder = makeCursor(cursorFactory, filter)) {
|
||||
Cursor cursor = cursorHolder.asCursor();
|
||||
|
||||
List<DimensionSelector> selectors = new ArrayList<>();
|
||||
|
@ -204,14 +205,14 @@ public class IncrementalIndexReadBenchmark
|
|||
}
|
||||
}
|
||||
|
||||
private CursorHolder makeCursor(IncrementalIndexStorageAdapter sa, DimFilter filter)
|
||||
private CursorHolder makeCursor(CursorFactory factory, DimFilter filter)
|
||||
{
|
||||
CursorBuildSpec.CursorBuildSpecBuilder builder = CursorBuildSpec.builder()
|
||||
.setInterval(schemaInfo.getDataInterval());
|
||||
if (filter != null) {
|
||||
builder.setFilter(filter.toFilter());
|
||||
}
|
||||
return sa.makeCursorHolder(builder.build());
|
||||
return factory.makeCursorHolder(builder.build());
|
||||
}
|
||||
|
||||
private static DimensionSelector makeDimensionSelector(Cursor cursor, String name)
|
||||
|
|
|
@ -362,14 +362,13 @@ public class CachingClusteredClientBenchmark
|
|||
final GroupingEngine groupingEngine = new GroupingEngine(
|
||||
processingConfig,
|
||||
configSupplier,
|
||||
bufferPool,
|
||||
groupByResourcesReservationPool,
|
||||
mapper,
|
||||
mapper,
|
||||
QueryRunnerTestHelper.NOOP_QUERYWATCHER
|
||||
);
|
||||
final GroupByQueryQueryToolChest toolChest = new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool);
|
||||
return new GroupByQueryRunnerFactory(groupingEngine, toolChest);
|
||||
return new GroupByQueryRunnerFactory(groupingEngine, toolChest, bufferPool);
|
||||
}
|
||||
|
||||
@TearDown(Level.Trial)
|
||||
|
|
|
@ -495,7 +495,6 @@ public class GroupByBenchmark
|
|||
final GroupingEngine groupingEngine = new GroupingEngine(
|
||||
druidProcessingConfig,
|
||||
configSupplier,
|
||||
bufferPool,
|
||||
groupByResourcesReservationPool,
|
||||
TestHelper.makeJsonMapper(),
|
||||
new ObjectMapper(new SmileFactory()),
|
||||
|
@ -504,7 +503,8 @@ public class GroupByBenchmark
|
|||
|
||||
factory = new GroupByQueryRunnerFactory(
|
||||
groupingEngine,
|
||||
new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool)
|
||||
new GroupByQueryQueryToolChest(groupingEngine, groupByResourcesReservationPool),
|
||||
bufferPool
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -57,8 +57,8 @@ import org.apache.druid.query.lookup.LookupExtractor;
|
|||
import org.apache.druid.segment.AutoTypeColumnSchema;
|
||||
import org.apache.druid.segment.IndexSpec;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.QueryableIndexCursorFactory;
|
||||
import org.apache.druid.segment.QueryableIndexSegment;
|
||||
import org.apache.druid.segment.QueryableIndexStorageAdapter;
|
||||
import org.apache.druid.segment.column.StringEncodingStrategy;
|
||||
import org.apache.druid.segment.data.FrontCodedIndexed;
|
||||
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
|
||||
|
@ -694,8 +694,8 @@ public class SqlBenchmark
|
|||
} else if (STORAGE_FRAME_ROW.equals(storageType)) {
|
||||
walker.add(
|
||||
descriptor,
|
||||
FrameTestUtil.adapterToFrameSegment(
|
||||
new QueryableIndexStorageAdapter(index),
|
||||
FrameTestUtil.cursorFactoryToFrameSegment(
|
||||
new QueryableIndexCursorFactory(index),
|
||||
FrameType.ROW_BASED,
|
||||
descriptor.getId()
|
||||
)
|
||||
|
@ -703,8 +703,8 @@ public class SqlBenchmark
|
|||
} else if (STORAGE_FRAME_COLUMNAR.equals(storageType)) {
|
||||
walker.add(
|
||||
descriptor,
|
||||
FrameTestUtil.adapterToFrameSegment(
|
||||
new QueryableIndexStorageAdapter(index),
|
||||
FrameTestUtil.cursorFactoryToFrameSegment(
|
||||
new QueryableIndexCursorFactory(index),
|
||||
FrameType.COLUMNAR,
|
||||
descriptor.getId()
|
||||
)
|
||||
|
|
|
@ -55,8 +55,8 @@ import org.apache.druid.query.lookup.LookupExtractor;
|
|||
import org.apache.druid.segment.AutoTypeColumnSchema;
|
||||
import org.apache.druid.segment.IndexSpec;
|
||||
import org.apache.druid.segment.QueryableIndex;
|
||||
import org.apache.druid.segment.QueryableIndexCursorFactory;
|
||||
import org.apache.druid.segment.QueryableIndexSegment;
|
||||
import org.apache.druid.segment.QueryableIndexStorageAdapter;
|
||||
import org.apache.druid.segment.column.StringEncodingStrategy;
|
||||
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
|
||||
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
|
||||
|
@ -159,6 +159,12 @@ public class SqlWindowFunctionsBenchmark
|
|||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int intermediateComputeSizeBytes()
|
||||
{
|
||||
return 200_000_000;
|
||||
}
|
||||
};
|
||||
|
||||
@Setup(Level.Trial)
|
||||
|
@ -281,8 +287,8 @@ public class SqlWindowFunctionsBenchmark
|
|||
} else if (STORAGE_FRAME_ROW.equals(storageType)) {
|
||||
walker.add(
|
||||
descriptor,
|
||||
FrameTestUtil.adapterToFrameSegment(
|
||||
new QueryableIndexStorageAdapter(index),
|
||||
FrameTestUtil.cursorFactoryToFrameSegment(
|
||||
new QueryableIndexCursorFactory(index),
|
||||
FrameType.ROW_BASED,
|
||||
descriptor.getId()
|
||||
)
|
||||
|
@ -290,8 +296,8 @@ public class SqlWindowFunctionsBenchmark
|
|||
} else if (STORAGE_FRAME_COLUMNAR.equals(storageType)) {
|
||||
walker.add(
|
||||
descriptor,
|
||||
FrameTestUtil.adapterToFrameSegment(
|
||||
new QueryableIndexStorageAdapter(index),
|
||||
FrameTestUtil.cursorFactoryToFrameSegment(
|
||||
new QueryableIndexCursorFactory(index),
|
||||
FrameType.COLUMNAR,
|
||||
descriptor.getId()
|
||||
)
|
||||
|
@ -336,7 +342,8 @@ public class SqlWindowFunctionsBenchmark
|
|||
{
|
||||
final Map<String, Object> context = ImmutableMap.of(
|
||||
PlannerContext.CTX_ENABLE_WINDOW_FNS, true,
|
||||
QueryContexts.MAX_SUBQUERY_BYTES_KEY, "auto"
|
||||
QueryContexts.MAX_SUBQUERY_BYTES_KEY, "disabled",
|
||||
QueryContexts.MAX_SUBQUERY_ROWS_KEY, -1
|
||||
);
|
||||
try (final DruidPlanner planner = plannerFactory.createPlannerForTesting(engine, sql, context)) {
|
||||
final PlannerResult plannerResult = planner.plan();
|
||||
|
@ -420,4 +427,15 @@ public class SqlWindowFunctionsBenchmark
|
|||
+ "GROUP BY dimUniform, dimSequential";
|
||||
querySql(sql, blackhole);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void windowWithGroupbyTime(Blackhole blackhole)
|
||||
{
|
||||
String sql = "SELECT "
|
||||
+ "SUM(dimSequentialHalfNull) + SUM(dimHyperUnique), "
|
||||
+ "LAG(SUM(dimSequentialHalfNull + dimHyperUnique)) OVER (PARTITION BY dimUniform ORDER BY dimSequential) "
|
||||
+ "FROM foo "
|
||||
+ "GROUP BY __time, dimUniform, dimSequential";
|
||||
querySql(sql, blackhole);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
#!/bin/bash -eux
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
BASE_DIR=$(git rev-parse --show-toplevel)
|
||||
chmod 644 ${BASE_DIR}/target/*.hprof
|
|
@ -30,7 +30,7 @@
|
|||
<parent>
|
||||
<artifactId>druid</artifactId>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
|
@ -458,6 +458,8 @@
|
|||
<argument>org.apache.druid.extensions.contrib:druid-spectator-histogram</argument>
|
||||
<argument>-c</argument>
|
||||
<argument>org.apache.druid.extensions.contrib:druid-rabbit-indexing-service</argument>
|
||||
<argument>-c</argument>
|
||||
<argument>org.apache.druid.extensions.contrib:grpc-query</argument>
|
||||
</arguments>
|
||||
</configuration>
|
||||
</execution>
|
||||
|
|
File diff suppressed because one or more lines are too long
Before Width: | Height: | Size: 57 KiB After Width: | Height: | Size: 57 KiB |
|
@ -616,9 +616,10 @@ the [HDFS input source](../ingestion/input-sources.md#hdfs-input-source).
|
|||
You can set the following property to specify permissible protocols for
|
||||
the [HTTP input source](../ingestion/input-sources.md#http-input-source).
|
||||
|
||||
|Property|Possible values|Description|Default|
|
||||
|--------|---------------|-----------|-------|
|
||||
|`druid.ingestion.http.allowedProtocols`|List of protocols|Allowed protocols for the HTTP input source.|`["http", "https"]`|
|
||||
|Property| Possible values | Description |Default|
|
||||
|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------|-------|
|
||||
|`druid.ingestion.http.allowedProtocols`| List of protocols | Allowed protocols for the HTTP input source. |`["http", "https"]`|
|
||||
|`druid.ingestion.http.allowedHeaders`| A list of permitted request headers for the HTTP input source. By default, the list is empty, which means no headers are allowed in the ingestion specification. |`[]`|
|
||||
|
||||
### External data access security configuration
|
||||
|
||||
|
@ -1795,6 +1796,7 @@ This strategy can be enabled by setting `druid.query.scheduler.prioritization.st
|
|||
|`druid.query.scheduler.prioritization.periodThreshold`|ISO duration threshold for how old data can be queried before automatically adjusting query priority.|none|
|
||||
|`druid.query.scheduler.prioritization.durationThreshold`|ISO duration threshold for maximum duration a queries interval can span before the priority is automatically adjusted.|none|
|
||||
|`druid.query.scheduler.prioritization.segmentCountThreshold`|Number threshold for maximum number of segments that can take part in a query before its priority is automatically adjusted.|none|
|
||||
|`druid.query.scheduler.prioritization.segmentRangeThreshold`|ISO duration threshold for maximum segment range a query can span before the priority is automatically adjusted.|none|
|
||||
|`druid.query.scheduler.prioritization.adjustment`|Amount to reduce the priority of queries which cross any threshold.|none|
|
||||
|
||||
##### Laning strategies
|
||||
|
|
|
@ -51,9 +51,4 @@ java \
|
|||
-c "org.apache.druid.extensions.contrib:druid-deltalake-extensions:<VERSION>"
|
||||
```
|
||||
|
||||
See [Loading community extensions](../../configuration/extensions.md#loading-community-extensions) for more information.
|
||||
|
||||
## Known limitations
|
||||
|
||||
This extension relies on the Delta Kernel API and can only read from the latest Delta table snapshot. Ability to read from
|
||||
arbitrary snapshots is tracked [here](https://github.com/delta-io/delta/issues/2581).
|
||||
See [Loading community extensions](../../configuration/extensions.md#loading-community-extensions) for more information.
|
|
@ -31,9 +31,9 @@ This module can be used side to side with other lookup module like the global ca
|
|||
To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) `druid-lookups-cached-single` in the extensions load list.
|
||||
|
||||
:::info
|
||||
If using JDBC, you will need to add your database's client JAR files to the extension's directory.
|
||||
To use JDBC, you must add your database client JAR files to the extension's directory.
|
||||
For Postgres, the connector JAR is already included.
|
||||
See the MySQL extension documentation for instructions to obtain [MySQL](./mysql.md#installing-the-mysql-connector-library) or [MariaDB](./mysql.md#alternative-installing-the-mariadb-connector-library) connector libraries.
|
||||
See the MySQL extension documentation for instructions to obtain [MySQL](./mysql.md#install-mysql-connectorj) or [MariaDB](./mysql.md#install-mariadb-connectorj) connector libraries.
|
||||
Copy or symlink the downloaded file to `extensions/druid-lookups-cached-single` under the distribution root directory.
|
||||
:::
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
id: mysql
|
||||
title: "MySQL Metadata Store"
|
||||
title: "MySQL metadata store"
|
||||
---
|
||||
|
||||
<!--
|
||||
|
@ -25,41 +25,58 @@ title: "MySQL Metadata Store"
|
|||
|
||||
To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) `mysql-metadata-storage` in the extensions load list.
|
||||
|
||||
:::info
|
||||
The MySQL extension requires the MySQL Connector/J library or MariaDB Connector/J library, neither of which are included in the Druid distribution.
|
||||
Refer to the following section for instructions on how to install this library.
|
||||
:::
|
||||
With the MySQL extension, you can use MySQL as a metadata store or ingest from a MySQL database.
|
||||
|
||||
## Installing the MySQL connector library
|
||||
The extension requires a connector library that's not included with Druid.
|
||||
See the [Prerequisites](#prerequisites) for installation instructions.
|
||||
|
||||
This extension can use Oracle's MySQL JDBC driver which is not included in the Druid distribution. You must
|
||||
install it separately. There are a few ways to obtain this library:
|
||||
## Prerequisites
|
||||
|
||||
- It can be downloaded from the MySQL site at: https://dev.mysql.com/downloads/connector/j/
|
||||
- It can be fetched from Maven Central at: https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.2.0/mysql-connector-j-8.2.0.jar
|
||||
- It may be available through your package manager, e.g. as `libmysql-java` on APT for a Debian-based OS
|
||||
To use the MySQL extension, you need to install one of the following libraries:
|
||||
* [MySQL Connector/J](#install-mysql-connectorj)
|
||||
* [MariaDB Connector/J](#install-mariadb-connectorj)
|
||||
|
||||
This fetches the MySQL connector JAR file with a name like `mysql-connector-j-8.2.0.jar`.
|
||||
### Install MySQL Connector/J
|
||||
|
||||
Copy or symlink this file inside the folder `extensions/mysql-metadata-storage` under the distribution root directory.
|
||||
The MySQL extension uses Oracle's MySQL JDBC driver.
|
||||
The current version of Druid uses version 8.2.0.
|
||||
Other versions may not work with this extension.
|
||||
|
||||
## Alternative: Installing the MariaDB connector library
|
||||
You can download the library from one of the following sources:
|
||||
|
||||
This extension also supports using the MariaDB connector jar, though it is also not included in the Druid distribution, so you must install it separately.
|
||||
- [MySQL website](https://dev.mysql.com/downloads/connector/j/)
|
||||
Visit the archives page to access older product versions.
|
||||
- [Maven Central (direct download)](https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.2.0/mysql-connector-j-8.2.0.jar)
|
||||
- Your package manager. For example, `libmysql-java` on APT for a Debian-based OS.
|
||||
|
||||
- Download from the MariaDB site: https://mariadb.com/downloads/connector
|
||||
- Download from Maven Central: https://repo1.maven.org/maven2/org/mariadb/jdbc/mariadb-java-client/2.7.3/mariadb-java-client-2.7.3.jar
|
||||
The download includes the MySQL connector JAR file with a name like `mysql-connector-j-8.2.0.jar`.
|
||||
Copy or create a symbolic link to this file inside the `lib` folder in the distribution root directory.
|
||||
|
||||
This fetches the MariaDB connector JAR file with a name like `maria-java-client-2.7.3.jar`.
|
||||
### Install MariaDB Connector/J
|
||||
|
||||
Copy or symlink this file to `extensions/mysql-metadata-storage` under the distribution root directory.
|
||||
This extension also supports using the MariaDB connector jar.
|
||||
The current version of Druid uses version 2.7.3.
|
||||
Other versions may not work with this extension.
|
||||
|
||||
You can download the library from one of the following sources:
|
||||
|
||||
- [MariaDB website](https://mariadb.com/downloads/connectors/connectors-data-access/java8-connector)
|
||||
Click **Show All Files** to access older product versions.
|
||||
- [Maven Central (direct download)](https://repo1.maven.org/maven2/org/mariadb/jdbc/mariadb-java-client/2.7.3/mariadb-java-client-2.7.3.jar)
|
||||
|
||||
The download includes the MariaDB connector JAR file with a name like `maria-java-client-2.7.3.jar`.
|
||||
Copy or create a symbolic link to this file inside the `lib` folder in the distribution root directory.
|
||||
|
||||
To configure the `mysql-metadata-storage` extension to use the MariaDB connector library instead of MySQL, set `druid.metadata.mysql.driver.driverClassName=org.mariadb.jdbc.Driver`.
|
||||
|
||||
Depending on the MariaDB client library version, the connector supports both `jdbc:mysql:` and `jdbc:mariadb:` connection URIs. However, the parameters to configure the connection vary between implementations, so be sure to [check the documentation](https://mariadb.com/kb/en/about-mariadb-connector-j/#connection-strings) for details.
|
||||
The protocol of the connection string is `jdbc:mysql:` or `jdbc:mariadb:`,
|
||||
depending on your specific version of the MariaDB client library.
|
||||
For more information on the parameters to configure a connection,
|
||||
[see the MariaDB documentation](https://mariadb.com/kb/en/about-mariadb-connector-j/#connection-strings)
|
||||
for your connector version.
|
||||
|
||||
|
||||
## Setting up MySQL
|
||||
## Set up MySQL
|
||||
|
||||
To avoid issues with upgrades that require schema changes to a large metadata table, consider a MySQL version that supports instant ADD COLUMN semantics. For example, MySQL 8.
|
||||
|
||||
|
@ -90,7 +107,7 @@ This extension also supports using MariaDB server, https://mariadb.org/download/
|
|||
CREATE DATABASE druid DEFAULT CHARACTER SET utf8mb4;
|
||||
|
||||
-- create a druid user
|
||||
CREATE USER 'druid'@'localhost' IDENTIFIED BY 'diurd';
|
||||
CREATE USER 'druid'@'localhost' IDENTIFIED BY 'password';
|
||||
|
||||
-- grant the user all the permissions on the database we just created
|
||||
GRANT ALL PRIVILEGES ON druid.* TO 'druid'@'localhost';
|
||||
|
@ -111,10 +128,11 @@ This extension also supports using MariaDB server, https://mariadb.org/download/
|
|||
|
||||
If using the MariaDB connector library, set `druid.metadata.mysql.driver.driverClassName=org.mariadb.jdbc.Driver`.
|
||||
|
||||
## Encrypting MySQL connections
|
||||
This extension provides support for encrypting MySQL connections. To get more information about encrypting MySQL connections using TLS/SSL in general, please refer to this [guide](https://dev.mysql.com/doc/refman/5.7/en/using-encrypted-connections.html).
|
||||
## Encrypt MySQL connections
|
||||
|
||||
## Configuration
|
||||
This extension provides support for encrypting MySQL connections. To get more information about encrypting MySQL connections using TLS/SSL in general, please refer to this [guide](https://dev.mysql.com/doc/refman/5.7/en/using-encrypted-connections.html).
|
||||
|
||||
## Configuration properties
|
||||
|
||||
|Property|Description|Default|Required|
|
||||
|--------|-----------|-------|--------|
|
||||
|
@ -129,7 +147,10 @@ If using the MariaDB connector library, set `druid.metadata.mysql.driver.driverC
|
|||
|`druid.metadata.mysql.ssl.enabledSSLCipherSuites`|Overrides the existing cipher suites with these cipher suites.|none|no|
|
||||
|`druid.metadata.mysql.ssl.enabledTLSProtocols`|Overrides the TLS protocols with these protocols.|none|no|
|
||||
|
||||
### MySQL InputSource
|
||||
## MySQL input source
|
||||
|
||||
The MySQL extension provides an implementation of an SQL input source to ingest data into Druid from a MySQL database.
|
||||
For more information on the input source parameters, see [SQL input source](../../ingestion/input-sources.md#sql-input-source).
|
||||
|
||||
```json
|
||||
{
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
id: postgresql
|
||||
title: "PostgreSQL Metadata Store"
|
||||
title: "PostgreSQL metadata store"
|
||||
---
|
||||
|
||||
<!--
|
||||
|
@ -25,7 +25,9 @@ title: "PostgreSQL Metadata Store"
|
|||
|
||||
To use this Apache Druid extension, [include](../../configuration/extensions.md#loading-extensions) `postgresql-metadata-storage` in the extensions load list.
|
||||
|
||||
## Setting up PostgreSQL
|
||||
With the PostgreSQL extension, you can use PostgreSQL as a metadata store or ingest from a PostgreSQL database.
|
||||
|
||||
## Set up PostgreSQL
|
||||
|
||||
To avoid issues with upgrades that require schema changes to a large metadata table, consider a PostgreSQL version that supports instant ADD COLUMN semantics.
|
||||
|
||||
|
@ -69,7 +71,7 @@ To avoid issues with upgrades that require schema changes to a large metadata ta
|
|||
druid.metadata.storage.connector.password=diurd
|
||||
```
|
||||
|
||||
## Configuration
|
||||
## Configuration properties
|
||||
|
||||
In most cases, the configuration options map directly to the [postgres JDBC connection options](https://jdbc.postgresql.org/documentation/use/#connecting-to-the-database).
|
||||
|
||||
|
@ -87,9 +89,10 @@ In most cases, the configuration options map directly to the [postgres JDBC conn
|
|||
| `druid.metadata.postgres.ssl.sslPasswordCallback` | The classname of the SSL password provider. | none | no |
|
||||
| `druid.metadata.postgres.dbTableSchema` | druid meta table schema | `public` | no |
|
||||
|
||||
### PostgreSQL InputSource
|
||||
## PostgreSQL input source
|
||||
|
||||
The PostgreSQL extension provides an implementation of an [SQL input source](../../ingestion/input-sources.md) which can be used to ingest data into Druid from a PostgreSQL database.
|
||||
The PostgreSQL extension provides an implementation of an SQL input source to ingest data into Druid from a PostgreSQL database.
|
||||
For more information on the input source parameters, see [SQL input source](../../ingestion/input-sources.md#sql-input-source).
|
||||
|
||||
```json
|
||||
{
|
||||
|
|
|
@ -125,6 +125,7 @@ Configure the CSV `inputFormat` to load CSV data as follows:
|
|||
| columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing |
|
||||
| findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) |
|
||||
| skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) |
|
||||
| tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. This parsing also applies to values separated by `listDelimiter`. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) |
|
||||
|
||||
For example:
|
||||
|
||||
|
@ -150,6 +151,7 @@ Configure the TSV `inputFormat` to load TSV data as follows:
|
|||
| columns | JSON array | Specifies the columns of the data. The columns should be in the same order with the columns of your data. | yes if `findColumnsFromHeader` is false or missing |
|
||||
| findColumnsFromHeader | Boolean | If this is set, the task will find the column names from the header row. Note that `skipHeaderRows` will be applied before finding column names from the header. For example, if you set `skipHeaderRows` to 2 and `findColumnsFromHeader` to true, the task will skip the first two lines and then extract column information from the third line. `columns` will be ignored if this is set to true. | no (default = false if `columns` is set; otherwise null) |
|
||||
| skipHeaderRows | Integer | If this is set, the task will skip the first `skipHeaderRows` rows. | no (default = 0) |
|
||||
| tryParseNumbers| Boolean| If this is set, the task will attempt to parse numeric strings into long or double data type, in that order. This parsing also applies to values separated by `listDelimiter`. If the value cannot be parsed as a number, it is retained as a string. | no (default = false) |
|
||||
|
||||
Be sure to change the `delimiter` to the appropriate delimiter for your data. Like CSV, you must specify the columns and which subset of the columns you want indexed.
|
||||
|
||||
|
|
|
@ -29,10 +29,8 @@ For general information on native batch indexing and parallel task indexing, see
|
|||
|
||||
## S3 input source
|
||||
|
||||
:::info
|
||||
|
||||
You need to include the [`druid-s3-extensions`](../development/extensions-core/s3.md) as an extension to use the S3 input source.
|
||||
|
||||
:::info Required extension
|
||||
To use the S3 input source, load the extension [`druid-s3-extensions`](../development/extensions-core/s3.md) in your `common.runtime.properties` file.
|
||||
:::
|
||||
|
||||
The S3 input source reads objects directly from S3. You can specify either:
|
||||
|
@ -41,7 +39,7 @@ The S3 input source reads objects directly from S3. You can specify either:
|
|||
* a list of S3 location prefixes that attempts to list the contents and ingest
|
||||
all objects contained within the locations.
|
||||
|
||||
The S3 input source is splittable. Therefore, you can use it with the [Parallel task](./native-batch.md). Each worker task of `index_parallel` reads one or multiple objects.
|
||||
The S3 input source is splittable. Therefore, you can use it with the [parallel task](./native-batch.md). Each worker task of `index_parallel` reads one or multiple objects.
|
||||
|
||||
Sample specs:
|
||||
|
||||
|
@ -219,16 +217,14 @@ If `accessKeyId` and `secretAccessKey` are not given, the default [S3 credential
|
|||
|
||||
## Google Cloud Storage input source
|
||||
|
||||
:::info
|
||||
|
||||
You need to include the [`druid-google-extensions`](../development/extensions-core/google.md) as an extension to use the Google Cloud Storage input source.
|
||||
|
||||
:::info Required extension
|
||||
To use the Google Cloud Storage input source, load the extension [`druid-google-extensions`](../development/extensions-core/google.md) in your `common.runtime.properties` file.
|
||||
:::
|
||||
|
||||
The Google Cloud Storage input source is to support reading objects directly
|
||||
from Google Cloud Storage. Objects can be specified as list of Google
|
||||
Cloud Storage URI strings. The Google Cloud Storage input source is splittable
|
||||
and can be used by the [Parallel task](./native-batch.md), where each worker task of `index_parallel` will read
|
||||
and can be used by the [parallel task](./native-batch.md), where each worker task of `index_parallel` will read
|
||||
one or multiple objects.
|
||||
|
||||
Sample specs:
|
||||
|
@ -307,14 +303,12 @@ Google Cloud Storage object:
|
|||
|
||||
## Azure input source
|
||||
|
||||
:::info
|
||||
|
||||
You need to include the [`druid-azure-extensions`](../development/extensions-core/azure.md) as an extension to use the Azure input source.
|
||||
|
||||
:::info Required extension
|
||||
To use the Azure input source, load the extension [`druid-azure-extensions`](../development/extensions-core/azure.md) in your `common.runtime.properties` file.
|
||||
:::
|
||||
|
||||
The Azure input source (that uses the type `azureStorage`) reads objects directly from Azure Blob store or Azure Data Lake sources. You can
|
||||
specify objects as a list of file URI strings or prefixes. You can split the Azure input source for use with [Parallel task](./native-batch.md) indexing and each worker task reads one chunk of the split data.
|
||||
specify objects as a list of file URI strings or prefixes. You can split the Azure input source for use with [parallel task](./native-batch.md) indexing and each worker task reads one chunk of the split data.
|
||||
|
||||
The `azureStorage` input source is a new schema for Azure input sources that allows you to specify which storage account files should be ingested from. We recommend that you update any specs that use the old `azure` schema to use the new `azureStorage` schema. The new schema provides more functionality than the older `azure` schema.
|
||||
|
||||
|
@ -491,15 +485,13 @@ The `objects` property is:
|
|||
|
||||
## HDFS input source
|
||||
|
||||
:::info
|
||||
|
||||
You need to include the [`druid-hdfs-storage`](../development/extensions-core/hdfs.md) as an extension to use the HDFS input source.
|
||||
|
||||
:::info Required extension
|
||||
To use the HDFS input source, load the extension [`druid-hdfs-storage`](../development/extensions-core/hdfs.md) in your `common.runtime.properties` file.
|
||||
:::
|
||||
|
||||
The HDFS input source is to support reading files directly
|
||||
from HDFS storage. File paths can be specified as an HDFS URI string or a list
|
||||
of HDFS URI strings. The HDFS input source is splittable and can be used by the [Parallel task](./native-batch.md),
|
||||
of HDFS URI strings. The HDFS input source is splittable and can be used by the [parallel task](./native-batch.md),
|
||||
where each worker task of `index_parallel` will read one or multiple files.
|
||||
|
||||
Sample specs:
|
||||
|
@ -593,7 +585,7 @@ The `http` input source is not limited to the HTTP or HTTPS protocols. It uses t
|
|||
|
||||
For more information about security best practices, see [Security overview](../operations/security-overview.md#best-practices).
|
||||
|
||||
The HTTP input source is _splittable_ and can be used by the [Parallel task](./native-batch.md),
|
||||
The HTTP input source is _splittable_ and can be used by the [parallel task](./native-batch.md),
|
||||
where each worker task of `index_parallel` will read only one file. This input source does not support Split Hint Spec.
|
||||
|
||||
Sample specs:
|
||||
|
@ -701,7 +693,7 @@ Sample spec:
|
|||
|
||||
The Local input source is to support reading files directly from local storage,
|
||||
and is mainly intended for proof-of-concept testing.
|
||||
The Local input source is _splittable_ and can be used by the [Parallel task](./native-batch.md),
|
||||
The Local input source is _splittable_ and can be used by the [parallel task](./native-batch.md),
|
||||
where each worker task of `index_parallel` will read one or multiple files.
|
||||
|
||||
Sample spec:
|
||||
|
@ -736,7 +728,7 @@ Sample spec:
|
|||
|
||||
The Druid input source is to support reading data directly from existing Druid segments,
|
||||
potentially using a new schema and changing the name, dimensions, metrics, rollup, etc. of the segment.
|
||||
The Druid input source is _splittable_ and can be used by the [Parallel task](./native-batch.md).
|
||||
The Druid input source is _splittable_ and can be used by the [parallel task](./native-batch.md).
|
||||
This input source has a fixed input format for reading from Druid segments;
|
||||
no `inputFormat` field needs to be specified in the ingestion spec when using this input source.
|
||||
|
||||
|
@ -833,17 +825,29 @@ For more information on the `maxNumConcurrentSubTasks` field, see [Implementatio
|
|||
|
||||
## SQL input source
|
||||
|
||||
:::info Required extension
|
||||
To use the SQL input source, you must load the appropriate extension in your `common.runtime.properties` file.
|
||||
* To connect to MySQL, load the extension [`mysql-metadata-storage`](../development/extensions-core/mysql.md).
|
||||
* To connect to PostgreSQL, load the extension [`postgresql-metadata-storage`](../development/extensions-core/postgresql.md).
|
||||
|
||||
The MySQL extension requires a JDBC driver.
|
||||
For more information, see the [Installing the MySQL connector library](../development/extensions-core/mysql.md).
|
||||
:::
|
||||
|
||||
The SQL input source is used to read data directly from RDBMS.
|
||||
The SQL input source is _splittable_ and can be used by the [Parallel task](./native-batch.md), where each worker task will read from one SQL query from the list of queries.
|
||||
You can _split_ the ingestion tasks for a SQL input source. When you use the [parallel task](./native-batch.md) type, each worker task reads from one SQL query from the list of queries.
|
||||
This input source does not support Split Hint Spec.
|
||||
Since this input source has a fixed input format for reading events, no `inputFormat` field needs to be specified in the ingestion spec when using this input source.
|
||||
Please refer to the Recommended practices section below before using this input source.
|
||||
|
||||
The SQL input source has a fixed input format for reading events.
|
||||
Don't specify `inputFormat` when using this input source.
|
||||
|
||||
Refer to the [recommended practices](#recommended-practices) before using this input source.
|
||||
|
||||
|Property|Description|Required|
|
||||
|--------|-----------|---------|
|
||||
|type|Set the value to `sql`.|Yes|
|
||||
|database|Specifies the database connection details. The database type corresponds to the extension that supplies the `connectorConfig` support. The specified extension must be loaded into Druid:<br/><br/><ul><li>[mysql-metadata-storage](../development/extensions-core/mysql.md) for `mysql`</li><li> [postgresql-metadata-storage](../development/extensions-core/postgresql.md) extension for `postgresql`.</li></ul><br/><br/>You can selectively allow JDBC properties in `connectURI`. See [JDBC connections security config](../configuration/index.md#jdbc-connections-to-external-databases) for more details.|Yes|
|
||||
|foldCase|Toggle case folding of database column names. This may be enabled in cases where the database returns case insensitive column names in query results.|No|
|
||||
|database|Specifies the database connection details. The database type corresponds to the extension that supplies the `connectorConfig` support.<br/><br/>You can selectively allow JDBC properties in `connectURI`. See [JDBC connections security config](../configuration/index.md#jdbc-connections-to-external-databases) for more details.|Yes|
|
||||
|foldCase|Boolean to toggle case folding of database column names. For example, to ingest a database column named `Entry_Date` as `entry_date`, set `foldCase` to true and include `entry_date` in the [`dimensionsSpec`](ingestion-spec.md#dimensionsspec).|No|
|
||||
|sqls|List of SQL queries where each SQL query would retrieve the data to be indexed.|Yes|
|
||||
|
||||
The following is an example of an SQL input source spec:
|
||||
|
@ -887,7 +891,7 @@ Compared to the other native batch input sources, SQL input source behaves diffe
|
|||
|
||||
The Combining input source lets you read data from multiple input sources.
|
||||
It identifies the splits from delegate input sources and uses a worker task to process each split.
|
||||
Use the Combining input source only if all the delegates are splittable and can be used by the [Parallel task](./native-batch.md).
|
||||
Each delegate input source must be splittable and compatible with the [parallel task type](./native-batch.md).
|
||||
|
||||
Similar to other input sources, the Combining input source supports a single `inputFormat`.
|
||||
Delegate input sources that require an `inputFormat` must have the same format for input data.
|
||||
|
@ -931,10 +935,8 @@ The following is an example of a Combining input source spec:
|
|||
|
||||
## Iceberg input source
|
||||
|
||||
:::info
|
||||
|
||||
To use the Iceberg input source, load the extension [`druid-iceberg-extensions`](../development/extensions-contrib/iceberg.md).
|
||||
|
||||
:::info Required extension
|
||||
To use the Iceberg input source, load the extension [`druid-iceberg-extensions`](../development/extensions-contrib/iceberg.md) in your `common.runtime.properties` file.
|
||||
:::
|
||||
|
||||
You use the Iceberg input source to read data stored in the Iceberg table format. For a given table, the input source scans up to the latest Iceberg snapshot from the configured Hive catalog. Druid ingests the underlying live data files using the existing input source formats.
|
||||
|
@ -1138,20 +1140,19 @@ This input source provides the following filters: `and`, `equals`, `interval`, a
|
|||
|
||||
## Delta Lake input source
|
||||
|
||||
:::info
|
||||
|
||||
To use the Delta Lake input source, load the extension [`druid-deltalake-extensions`](../development/extensions-contrib/delta-lake.md).
|
||||
|
||||
:::info Required extension
|
||||
To use the Delta Lake input source, load the extension [`druid-deltalake-extensions`](../development/extensions-contrib/delta-lake.md) in your `common.runtime.properties` file.
|
||||
:::
|
||||
|
||||
You can use the Delta input source to read data stored in a Delta Lake table. For a given table, the input source scans
|
||||
the latest snapshot from the configured table. Druid ingests the underlying delta files from the table.
|
||||
|
||||
| Property|Description|Required|
|
||||
|---------|-----------|--------|
|
||||
| type|Set this value to `delta`.|yes|
|
||||
| tablePath|The location of the Delta table.|yes|
|
||||
| filter|The JSON Object that filters data files within a snapshot.|no|
|
||||
| Property|Description| Default|Required |
|
||||
|---------|-----------|-----------------|
|
||||
|type|Set this value to `delta`.| None|yes|
|
||||
|tablePath|The location of the Delta table.|None|yes|
|
||||
|filter|The JSON Object that filters data files within a snapshot.|None|no|
|
||||
|snapshotVersion|The snapshot version to read from the Delta table. An integer value must be specified.|Latest|no|
|
||||
|
||||
### Delta filter object
|
||||
|
||||
|
@ -1224,7 +1225,7 @@ filters on partitioned columns.
|
|||
| column | The table column to apply the filter on. | yes |
|
||||
| value | The value to use in the filter. | yes |
|
||||
|
||||
The following is a sample spec to read all records from the Delta table `/delta-table/foo`:
|
||||
The following is a sample spec to read all records from the latest snapshot from Delta table `/delta-table/foo`:
|
||||
|
||||
```json
|
||||
...
|
||||
|
@ -1237,7 +1238,8 @@ The following is a sample spec to read all records from the Delta table `/delta-
|
|||
}
|
||||
```
|
||||
|
||||
The following is a sample spec to read records from the Delta table `/delta-table/foo` to select records where `name = 'Employee4' and age >= 30`:
|
||||
The following is a sample spec to read records from the Delta table `/delta-table/foo` snapshot version `3` to select records where
|
||||
`name = 'Employee4' and age >= 30`:
|
||||
|
||||
```json
|
||||
...
|
||||
|
@ -1260,7 +1262,8 @@ The following is a sample spec to read records from the Delta table `/delta-tabl
|
|||
"value": "30"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"snapshotVersion": 3
|
||||
},
|
||||
}
|
||||
```
|
||||
|
|
|
@ -124,7 +124,7 @@ For configuration properties shared across all streaming ingestion methods, refe
|
|||
|`topicPattern`|String|Multiple Kafka topics to read from, passed as a regex pattern. See [Ingest from multiple topics](#ingest-from-multiple-topics) for more information.|Yes if `topic` isn't set.||
|
||||
|`consumerProperties`|String, Object|A map of properties to pass to the Kafka consumer. See [Consumer properties](#consumer-properties) for details.|Yes. At the minimum, you must set the `bootstrap.servers` property to establish the initial connection to the Kafka cluster.||
|
||||
|`pollTimeout`|Long|The length of time to wait for the Kafka consumer to poll records, in milliseconds.|No|100|
|
||||
|`useEarliestOffset`|Boolean|If a supervisor manages a datasource for the first time, it obtains a set of starting offsets from Kafka. This flag determines whether it retrieves the earliest or latest offsets in Kafka. Under normal circumstances, subsequent tasks start from where the previous segments ended. Druid only uses `useEarliestOffset` on the first run.|No|`false`|
|
||||
|`useEarliestOffset`|Boolean|If a supervisor is managing a datasource for the first time, it obtains a set of starting offsets from Kafka. This flag determines whether the supervisor retrieves the earliest or latest offsets in Kafka. Under normal circumstances, subsequent tasks start from where the previous segments ended so this flag is only used on the first run.|No|`false`|
|
||||
|`idleConfig`|Object|Defines how and when the Kafka supervisor can become idle. See [Idle configuration](#idle-configuration) for more details.|No|null|
|
||||
|
||||
#### Ingest from multiple topics
|
||||
|
|
|
@ -128,7 +128,7 @@ For configuration properties shared across all streaming ingestion methods, refe
|
|||
|--------|----|-----------|--------|-------|
|
||||
|`stream`|String|The Kinesis stream to read.|Yes||
|
||||
|`endpoint`|String|The AWS Kinesis stream endpoint for a region. You can find a list of endpoints in the [AWS service endpoints](http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region) document.|No|`kinesis.us-east-1.amazonaws.com`|
|
||||
|`useEarliestSequenceNumber`|Boolean|If a supervisor is managing a datasource for the first time, it obtains a set of starting sequence numbers from Kinesis. This flag determines whether a supervisor retrieves the earliest or latest sequence numbers in Kinesis. Under normal circumstances, subsequent tasks start from where the previous segments ended so this flag is only used on the first run.|No|`false`|
|
||||
|`useEarliestSequenceNumber`|Boolean|If a supervisor is managing a datasource for the first time, it obtains a set of starting sequence numbers from Kinesis. This flag determines whether the supervisor retrieves the earliest or latest sequence numbers in Kinesis. Under normal circumstances, subsequent tasks start from where the previous segments ended so this flag is only used on the first run.|No|`false`|
|
||||
|`fetchDelayMillis`|Integer|Time in milliseconds to wait between subsequent calls to fetch records from Kinesis. See [Determine fetch settings](#determine-fetch-settings).|No|0|
|
||||
|`awsAssumedRoleArn`|String|The AWS assumed role to use for additional permissions.|No||
|
||||
|`awsExternalId`|String|The AWS external ID to use for additional permissions.|No||
|
||||
|
@ -155,7 +155,7 @@ For configuration properties shared across all streaming ingestion methods, refe
|
|||
|
||||
|Property|Type|Description|Required|Default|
|
||||
|--------|----|-----------|--------|-------|
|
||||
|`skipSequenceNumberAvailabilityCheck`|Boolean|Whether to enable checking if the current sequence number is still available in a particular Kinesis shard. If `false`, the indexing task attempts to reset the current sequence number, depending on the value of `resetOffsetAutomatically`.|No|`false`|
|
||||
|`skipSequenceNumberAvailabilityCheck`|Boolean|Whether to enable checking if the current sequence number is still available in a particular Kinesis shard. If `false`, the indexing task attempts to reset the current sequence number, depending on the value of `resetOffsetAutomatically`. For more information on the `resetOffsetAutomatically` property, see [Supervisor tuning configuration](supervisor.md#tuning-configuration).|No|`false`|
|
||||
|`recordBufferSizeBytes`|Integer| The size of the buffer (heap memory bytes) Druid uses between the Kinesis fetch threads and the main ingestion thread.|No| See [Determine fetch settings](#determine-fetch-settings) for defaults.|
|
||||
|`recordBufferOfferTimeout`|Integer|The number of milliseconds to wait for space to become available in the buffer before timing out.|No|5000|
|
||||
|`recordBufferFullWait`|Integer|The number of milliseconds to wait for the buffer to drain before Druid attempts to fetch records from Kinesis again.|No|5000|
|
||||
|
@ -315,7 +315,7 @@ This window with early task shutdowns and possible task failures concludes when:
|
|||
- All closed shards have been fully read and the Kinesis ingestion tasks have published the data from those shards, committing the "closed" state to metadata storage.
|
||||
- Any remaining tasks that had inactive shards in the assignment have been shut down. These tasks would have been created before the closed shards were completely drained.
|
||||
|
||||
Note that when the supervisor is running and detects new partitions, tasks read new partitions from the earliest offsets, irrespective of the `useEarliestSequence` setting. This is because these new shards were immediately discovered and are therefore unlikely to experience a lag.
|
||||
Note that when the supervisor is running and detects new partitions, tasks read new partitions from the earliest sequence number, irrespective of the `useEarliestSequence` setting. This is because these new shards were immediately discovered and are therefore unlikely to experience a lag.
|
||||
|
||||
If resharding occurs when the supervisor is suspended and `useEarliestSequence` is set to `false`, resuming the supervisor causes tasks to read the new shards from the latest sequence. This is by design so that the consumer can catch up quickly with any lag accumulated while the supervisor was suspended.
|
||||
|
||||
|
@ -324,7 +324,7 @@ If resharding occurs when the supervisor is suspended and `useEarliestSequence`
|
|||
Before you deploy the `druid-kinesis-indexing-service` extension to production, consider the following known issues:
|
||||
|
||||
- Kinesis imposes a read throughput limit per shard. If you have multiple supervisors reading from the same Kinesis stream, consider adding more shards to ensure sufficient read throughput for all supervisors.
|
||||
- A Kinesis supervisor can sometimes compare the checkpoint offset to retention window of the stream to see if it has fallen behind. These checks fetch the earliest sequence number for Kinesis which can result in `IteratorAgeMilliseconds` becoming very high in AWS CloudWatch.
|
||||
- A Kinesis supervisor can sometimes compare the checkpoint sequence number to the retention window of the stream to see if it has fallen behind. These checks fetch the earliest sequence number for Kinesis which can result in `IteratorAgeMilliseconds` becoming very high in AWS CloudWatch.
|
||||
|
||||
## Learn more
|
||||
|
||||
|
|
|
@ -204,7 +204,7 @@ For configuration properties specific to Kafka and Kinesis, see [Kafka tuning co
|
|||
|`indexSpecForIntermediatePersists`|Object|Defines segment storage format options to use at indexing time for intermediate persisted temporary segments. You can use `indexSpecForIntermediatePersists` to disable dimension/metric compression on intermediate segments to reduce memory required for final merging. However, disabling compression on intermediate segments might increase page cache use while they are used before getting merged into final segment published.|No||
|
||||
|`reportParseExceptions`|Boolean|DEPRECATED. If `true`, Druid throws exceptions encountered during parsing causing ingestion to halt. If `false`, Druid skips unparseable rows and fields. Setting `reportParseExceptions` to `true` overrides existing configurations for `maxParseExceptions` and `maxSavedParseExceptions`, setting `maxParseExceptions` to 0 and limiting `maxSavedParseExceptions` to not more than 1.|No|`false`|
|
||||
|`handoffConditionTimeout`|Long|Number of milliseconds to wait for segment handoff. Set to a value >= 0, where 0 means to wait indefinitely.|No|900000 (15 minutes) for Kafka. 0 for Kinesis.|
|
||||
|`resetOffsetAutomatically`|Boolean|Resets partitions when the sequence number is unavailable. If set to `true`, Druid resets partitions to the earliest or latest offset, based on the value of `useEarliestSequenceNumber` or `useEarliestOffset` (earliest if `true`, latest if `false`). If set to `false`, Druid surfaces the exception causing tasks to fail and ingestion to halt. If this occurs, manual intervention is required to correct the situation, potentially through [resetting the supervisor](../api-reference/supervisor-api.md#reset-a-supervisor).|No|`false`|
|
||||
|`resetOffsetAutomatically`|Boolean|Resets partitions when the offset is unavailable. If set to `true`, Druid resets partitions to the earliest or latest offset, based on the value of `useEarliestOffset` or `useEarliestSequenceNumber` (earliest if `true`, latest if `false`). If set to `false`, Druid surfaces the exception causing tasks to fail and ingestion to halt. If this occurs, manual intervention is required to correct the situation, potentially through [resetting the supervisor](../api-reference/supervisor-api.md#reset-a-supervisor).|No|`false`|
|
||||
|`workerThreads`|Integer|The number of threads that the supervisor uses to handle requests/responses for worker tasks, along with any other internal asynchronous operation.|No|`min(10, taskCount)`|
|
||||
|`chatRetries`|Integer|The number of times Druid retries HTTP requests to indexing tasks before considering tasks unresponsive.|No|8|
|
||||
|`httpTimeout`|ISO 8601 period|The period of time to wait for a HTTP response from an indexing task.|No|`PT10S`|
|
||||
|
@ -214,6 +214,7 @@ For configuration properties specific to Kafka and Kinesis, see [Kafka tuning co
|
|||
|`logParseExceptions`|Boolean|If `true`, Druid logs an error message when a parsing exception occurs, containing information about the row where the error occurred.|No|`false`|
|
||||
|`maxParseExceptions`|Integer|The maximum number of parse exceptions that can occur before the task halts ingestion and fails. Setting `reportParseExceptions` overrides this limit.|No|unlimited|
|
||||
|`maxSavedParseExceptions`|Integer|When a parse exception occurs, Druid keeps track of the most recent parse exceptions. `maxSavedParseExceptions` limits the number of saved exception instances. These saved exceptions are available after the task finishes in the [task completion report](../ingestion/tasks.md#task-reports). Setting `reportParseExceptions` overrides this limit.|No|0|
|
||||
|`maxColumnsToMerge`|Integer|Limit of the number of segments to merge in a single phase when merging segments for publishing. This limit affects the total number of columns present in a set of segments to merge. If the limit is exceeded, segment merging occurs in multiple phases. Druid merges at least 2 segments per phase, regardless of this setting.|No|-1|
|
||||
|
||||
## Start a supervisor
|
||||
|
||||
|
@ -395,7 +396,7 @@ For information on how to terminate a supervisor by API, see [Supervisors: Termi
|
|||
|
||||
Indexing tasks run on Middle Managers and are limited by the resources available in the Middle Manager cluster. In particular, you should make sure that you have sufficient worker capacity, configured using the
|
||||
`druid.worker.capacity` property, to handle the configuration in the supervisor spec. Note that worker capacity is
|
||||
shared across all types of indexing tasks, so you should plan your worker capacity to handle your total indexing load, such as batch processing, streaming tasks, and merging tasks. If your workers run out of capacity, indexing tasks queue and wait for the next available worker. This may cause queries to return partial results but will not result in data loss, assuming the tasks run before the stream purges those sequence numbers.
|
||||
shared across all types of indexing tasks, so you should plan your worker capacity to handle your total indexing load, such as batch processing, streaming tasks, and merging tasks. If your workers run out of capacity, indexing tasks queue and wait for the next available worker. This may cause queries to return partial results but will not result in data loss, assuming the tasks run before the stream purges those offsets.
|
||||
|
||||
A running task can be in one of two states: reading or publishing. A task remains in reading state for the period defined in `taskDuration`, at which point it transitions to publishing state. A task remains in publishing state for as long as it takes to generate segments, push segments to deep storage, and have them loaded and served by a Historical service or until `completionTimeout` elapses.
|
||||
|
||||
|
|
|
@ -36,8 +36,8 @@ Task APIs are available in two main places:
|
|||
- The [Overlord](../design/overlord.md) process offers HTTP APIs to submit tasks, cancel tasks, check their status,
|
||||
review logs and reports, and more. Refer to the [Tasks API reference](../api-reference/tasks-api.md) for a
|
||||
full list.
|
||||
- Druid SQL includes a [`sys.tasks`](../querying/sql-metadata-tables.md#tasks-table) table that provides information about currently
|
||||
running tasks. This table is read-only, and has a limited (but useful!) subset of the full information available through
|
||||
- Druid SQL includes a [`sys.tasks`](../querying/sql-metadata-tables.md#tasks-table) table that provides information about active
|
||||
and recently completed tasks. This table is read-only and has a subset of the full task report available through
|
||||
the Overlord APIs.
|
||||
|
||||
<a name="reports"></a>
|
||||
|
|
|
@ -508,6 +508,8 @@ These metrics are only available if the `OshiSysMonitor` module is included.
|
|||
|`sys/tcpv4/out/rsts`|Total "out reset" packets sent to reset the connection||Generally 0|
|
||||
|`sys/tcpv4/retrans/segs`|Total segments re-transmitted||Varies|
|
||||
|
||||
If you want to enable only some of these metrics categories you could specify `druid.monitoring.sys.categories`.
|
||||
Possible values are `mem`, `swap`, `fs`, `disk`, `net`, `cpu`, `sys`, and `tcp`.
|
||||
|
||||
## S3 multi-part upload
|
||||
|
||||
|
|
|
@ -377,7 +377,7 @@ The JDBC lookups will poll a database to populate its local cache. If the `tsCol
|
|||
:::info
|
||||
If using JDBC, you will need to add your database's client JAR files to the extension's directory.
|
||||
For Postgres, the connector JAR is already included.
|
||||
See the MySQL extension documentation for instructions to obtain [MySQL](../development/extensions-core/mysql.md#installing-the-mysql-connector-library) or [MariaDB](../development/extensions-core/mysql.md#alternative-installing-the-mariadb-connector-library) connector libraries.
|
||||
See the MySQL extension documentation for instructions to obtain [MySQL](../development/extensions-core/mysql.md#install-mysql-connectorj) or [MariaDB](../development/extensions-core/mysql.md#install-mariadb-connectorj) connector libraries.
|
||||
The connector JAR should reside in the classpath of Druid's main class loader.
|
||||
To add the connector JAR to the classpath, you can copy the downloaded file to `lib/` under the distribution root directory. Alternatively, create a symbolic link to the connector in the `lib` directory.
|
||||
:::
|
||||
|
|
|
@ -246,6 +246,7 @@ JSON functions provide facilities to extract, transform, and create `COMPLEX<jso
|
|||
| to_json_string(expr) | Convert `expr` into a JSON `STRING` value |
|
||||
| json_keys(expr, path) | Get array of field names from `expr` at the specified JSONPath `path`, or null if the data does not exist or have any fields |
|
||||
| json_paths(expr) | Get array of all JSONPath paths available from `expr` |
|
||||
| json_merge(expr1, expr2[, expr3 ...]) | Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the rightmost value when there are key overlaps. |
|
||||
|
||||
### JSONPath syntax
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -38,6 +38,7 @@ You can use the following JSON functions to extract, transform, and create `COMP
|
|||
| --- | --- |
|
||||
|`JSON_KEYS(expr, path)`| Returns an array of field names from `expr` at the specified `path`.|
|
||||
|`JSON_OBJECT(KEY expr1 VALUE expr2[, KEY expr3 VALUE expr4, ...])` | Constructs a new `COMPLEX<json>` object. The `KEY` expressions must evaluate to string types. The `VALUE` expressions can be composed of any input type, including other `COMPLEX<json>` values. `JSON_OBJECT` can accept colon-separated key-value pairs. The following syntax is equivalent: `JSON_OBJECT(expr1:expr2[, expr3:expr4, ...])`.|
|
||||
|`JSON_MERGE(expr1, expr2[, expr3 ...])`| Merges two or more JSON `STRING` or `COMPLEX<json>` into one. Preserves the rightmost value when there are key overlaps. Returning always a `COMPLEX<json>` type.|
|
||||
|`JSON_PATHS(expr)`| Returns an array of all paths which refer to literal values in `expr` in JSONPath format. |
|
||||
|`JSON_QUERY(expr, path)`| Extracts a `COMPLEX<json>` value from `expr`, at the specified `path`. |
|
||||
|`JSON_QUERY_ARRAY(expr, path)`| Extracts an `ARRAY<COMPLEX<json>>` value from `expr` at the specified `path`. If value is not an `ARRAY`, it gets translated into a single element `ARRAY` containing the value at `path`. The primary use of this function is to extract arrays of objects to use as inputs to other [array functions](./sql-array-functions.md).|
|
||||
|
|
|
@ -266,7 +266,7 @@ GROUP BY servers.server;
|
|||
|
||||
### TASKS table
|
||||
|
||||
The tasks table provides information about active and recently-completed indexing tasks. For more information
|
||||
The tasks table provides information about active and recently completed tasks. For more information
|
||||
check out the documentation for [ingestion tasks](../ingestion/tasks.md).
|
||||
|
||||
|Column|Type|Notes|
|
||||
|
|
|
@ -173,10 +173,10 @@ overhead.
|
|||
|`MILLIS_TO_TIMESTAMP(millis_expr)`|Converts a number of milliseconds since the epoch (1970-01-01 00:00:00 UTC) into a timestamp.|
|
||||
|`TIMESTAMP_TO_MILLIS(timestamp_expr)`|Converts a timestamp into a number of milliseconds since the epoch.|
|
||||
|`EXTRACT(unit FROM timestamp_expr)`|Extracts a time part from `expr`, returning it as a number. Unit can be EPOCH, MICROSECOND, MILLISECOND, SECOND, MINUTE, HOUR, DAY (day of month), DOW (day of week), ISODOW (ISO day of week), DOY (day of year), WEEK (week of year), MONTH, QUARTER, YEAR, ISOYEAR, DECADE, CENTURY or MILLENNIUM. Units must be provided unquoted, like `EXTRACT(HOUR FROM __time)`.|
|
||||
|`FLOOR(timestamp_expr TO unit)`|Rounds down a timestamp, returning it as a new timestamp. Unit can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|
||||
|`CEIL(timestamp_expr TO unit)`|Rounds up a timestamp, returning it as a new timestamp. Unit can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|
||||
|`TIMESTAMPADD(unit, count, timestamp)`|Equivalent to `timestamp + count * INTERVAL '1' UNIT`.|
|
||||
|`TIMESTAMPDIFF(unit, timestamp1, timestamp2)`|Returns the (signed) number of `unit` between `timestamp1` and `timestamp2`. Unit can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|
||||
|`FLOOR(timestamp_expr TO unit)`|Rounds down a timestamp, returning it as a new timestamp. The `unit` parameter must be unquoted and can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|
||||
|`CEIL(timestamp_expr TO unit)`|Rounds up a timestamp, returning it as a new timestamp. The `unit` parameter must be unquoted and can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|
||||
|`TIMESTAMPADD(unit, count, timestamp)`|Adds a `count` number of time `unit` to timestamp, equivalent to `timestamp + count * unit`. The `unit` parameter must be unquoted and can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|
||||
|`TIMESTAMPDIFF(unit, timestamp1, timestamp2)`|Returns a signed number of `unit` between `timestamp1` and `timestamp2`. The `unit` parameter must be unquoted and can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, or YEAR.|
|
||||
|
||||
|
||||
## Reduction functions
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
-server
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms15g
|
||||
-Xmx15g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
-server
|
||||
-Xms1g
|
||||
-Xmx1g
|
||||
-XX:+UseG1GC
|
||||
-XX:MaxDirectMemorySize=128m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-Duser.timezone=UTC
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx12g
|
||||
-XX:MaxDirectMemorySize=11g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms15g
|
||||
-Xmx15g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx16g
|
||||
-XX:MaxDirectMemorySize=25g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms256m
|
||||
-Xmx256m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
-server
|
||||
-Xms1g
|
||||
-Xmx1g
|
||||
-XX:+UseG1GC
|
||||
-XX:MaxDirectMemorySize=128m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-Duser.timezone=UTC
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx8g
|
||||
-XX:MaxDirectMemorySize=5g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms9g
|
||||
-Xmx9g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx8g
|
||||
-XX:MaxDirectMemorySize=13g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms256m
|
||||
-Xmx256m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
-server
|
||||
-Xms512m
|
||||
-Xmx512m
|
||||
-XX:+UseG1GC
|
||||
-XX:MaxDirectMemorySize=128m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-Duser.timezone=UTC
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx512m
|
||||
-XX:MaxDirectMemorySize=768m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms256m
|
||||
-Xmx256m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx512m
|
||||
-XX:MaxDirectMemorySize=1280m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms64m
|
||||
-Xmx64m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
-server
|
||||
-Xms128m
|
||||
-Xmx128m
|
||||
-XX:+UseG1GC
|
||||
-XX:MaxDirectMemorySize=128m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-Duser.timezone=UTC
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx512m
|
||||
-XX:MaxDirectMemorySize=400m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms256m
|
||||
-Xmx256m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx512m
|
||||
-XX:MaxDirectMemorySize=400m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms64m
|
||||
-Xmx64m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
-server
|
||||
-Xms128m
|
||||
-Xmx128m
|
||||
-XX:+UseG1GC
|
||||
-XX:MaxDirectMemorySize=128m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-Duser.timezone=UTC
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx4g
|
||||
-XX:MaxDirectMemorySize=3g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms4500m
|
||||
-Xmx4500m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx4g
|
||||
-XX:MaxDirectMemorySize=8g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms128m
|
||||
-Xmx128m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
-server
|
||||
-Xms512m
|
||||
-Xmx512m
|
||||
-XX:+UseG1GC
|
||||
-XX:MaxDirectMemorySize=128m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx16g
|
||||
-XX:MaxDirectMemorySize=12g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms18g
|
||||
-Xmx18g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
-Xmx24g
|
||||
-XX:MaxDirectMemorySize=44g
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
-Xms256m
|
||||
-Xmx256m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-XX:+UseG1GC
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=var/tmp
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
-server
|
||||
-Xms1g
|
||||
-Xmx1g
|
||||
-XX:+UseG1GC
|
||||
-XX:MaxDirectMemorySize=128m
|
||||
-XX:+ExitOnOutOfMemoryError
|
||||
-Duser.timezone=UTC
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<parent>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -537,7 +537,7 @@ public class OssInputSourceTest extends InitializedNullHandlingTest
|
|||
|
||||
InputSourceReader reader = inputSource.reader(
|
||||
someSchema,
|
||||
new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0),
|
||||
new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0, null),
|
||||
temporaryFolder.newFolder()
|
||||
);
|
||||
|
||||
|
@ -584,7 +584,7 @@ public class OssInputSourceTest extends InitializedNullHandlingTest
|
|||
|
||||
InputSourceReader reader = inputSource.reader(
|
||||
someSchema,
|
||||
new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0),
|
||||
new CsvInputFormat(ImmutableList.of("time", "dim1", "dim2"), "|", false, null, 0, null),
|
||||
temporaryFolder.newFolder()
|
||||
);
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
<parent>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
<parent>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
<parent>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
<parent>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
<parent>
|
||||
<artifactId>druid</artifactId>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
<parent>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -35,8 +35,8 @@ import org.apache.druid.query.timeseries.TimeseriesResultValue;
|
|||
import org.apache.druid.segment.IncrementalIndexTimeBoundaryInspector;
|
||||
import org.apache.druid.segment.TestHelper;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexCursorFactory;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
|
||||
import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
|
||||
import org.apache.druid.testing.InitializedNullHandlingTest;
|
||||
import org.joda.time.DateTime;
|
||||
|
@ -104,7 +104,7 @@ public class DistinctCountTimeseriesQueryTest extends InitializedNullHandlingTes
|
|||
final Iterable<Result<TimeseriesResultValue>> results =
|
||||
engine.process(
|
||||
query,
|
||||
new IncrementalIndexStorageAdapter(index),
|
||||
new IncrementalIndexCursorFactory(index),
|
||||
new IncrementalIndexTimeBoundaryInspector(index),
|
||||
new DefaultTimeseriesQueryMetrics()
|
||||
).toList();
|
||||
|
|
|
@ -33,13 +33,13 @@ import org.apache.druid.query.topn.TopNQuery;
|
|||
import org.apache.druid.query.topn.TopNQueryBuilder;
|
||||
import org.apache.druid.query.topn.TopNQueryEngine;
|
||||
import org.apache.druid.query.topn.TopNResultValue;
|
||||
import org.apache.druid.segment.IncrementalIndexTimeBoundaryInspector;
|
||||
import org.apache.druid.segment.IncrementalIndexSegment;
|
||||
import org.apache.druid.segment.TestHelper;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndex;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
|
||||
import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
|
||||
import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
|
||||
import org.apache.druid.testing.InitializedNullHandlingTest;
|
||||
import org.apache.druid.timeline.SegmentId;
|
||||
import org.joda.time.DateTime;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
@ -133,8 +133,7 @@ public class DistinctCountTopNQueryTest extends InitializedNullHandlingTest
|
|||
final Iterable<Result<TopNResultValue>> results =
|
||||
engine.query(
|
||||
query,
|
||||
new IncrementalIndexStorageAdapter(index),
|
||||
new IncrementalIndexTimeBoundaryInspector(index),
|
||||
new IncrementalIndexSegment(index, SegmentId.dummy(QueryRunnerTestHelper.DATA_SOURCE)),
|
||||
null
|
||||
).toList();
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
<parent>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
<parent>
|
||||
<artifactId>druid</artifactId>
|
||||
<groupId>org.apache.druid</groupId>
|
||||
<version>31.0.0-SNAPSHOT</version>
|
||||
<version>32.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
|
|
@ -67,9 +67,9 @@ import java.util.stream.Collectors;
|
|||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Input source to ingest data from a Delta Lake. This input source reads the latest snapshot from a Delta table
|
||||
* specified by {@code tablePath} parameter. If {@code filter} is specified, it's used at the Kernel level
|
||||
* for data pruning. The filtering behavior is as follows:
|
||||
* Input source to ingest data from a Delta Lake. This input source reads the given {@code snapshotVersion} from a Delta
|
||||
* table specified by {@code tablePath} parameter, or the latest snapshot if it's not specified.
|
||||
* If {@code filter} is specified, it's used at the Kernel level for data pruning. The filtering behavior is as follows:
|
||||
* <ul>
|
||||
* <li> When a filter is applied on a partitioned table using the partitioning columns, the filtering is guaranteed. </li>
|
||||
* <li> When a filter is applied on non-partitioned columns, the filtering is best-effort as the Delta
|
||||
|
@ -79,7 +79,6 @@ import java.util.stream.Stream;
|
|||
* <p>
|
||||
* We leverage the Delta Kernel APIs to interact with a Delta table. The Kernel API abstracts away the
|
||||
* complexities of the Delta protocol itself.
|
||||
* Note: currently, the Kernel table API only supports reading from the latest snapshot.
|
||||
* </p>
|
||||
*/
|
||||
public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
|
||||
|
@ -97,11 +96,15 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
|
|||
@Nullable
|
||||
private final DeltaFilter filter;
|
||||
|
||||
@JsonProperty
|
||||
private final Long snapshotVersion;
|
||||
|
||||
@JsonCreator
|
||||
public DeltaInputSource(
|
||||
@JsonProperty("tablePath") final String tablePath,
|
||||
@JsonProperty("deltaSplit") @Nullable final DeltaSplit deltaSplit,
|
||||
@JsonProperty("filter") @Nullable final DeltaFilter filter
|
||||
@JsonProperty("filter") @Nullable final DeltaFilter filter,
|
||||
@JsonProperty("snapshotVersion") @Nullable final Long snapshotVersion
|
||||
)
|
||||
{
|
||||
if (tablePath == null) {
|
||||
|
@ -110,6 +113,7 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
|
|||
this.tablePath = tablePath;
|
||||
this.deltaSplit = deltaSplit;
|
||||
this.filter = filter;
|
||||
this.snapshotVersion = snapshotVersion;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -152,15 +156,15 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
|
|||
}
|
||||
} else {
|
||||
final Table table = Table.forPath(engine, tablePath);
|
||||
final Snapshot latestSnapshot = getLatestSnapshotForTable(table, engine);
|
||||
final Snapshot snapshot = getSnapshotForTable(table, engine);
|
||||
|
||||
final StructType fullSnapshotSchema = latestSnapshot.getSchema(engine);
|
||||
final StructType fullSnapshotSchema = snapshot.getSchema(engine);
|
||||
final StructType prunedSchema = pruneSchema(
|
||||
fullSnapshotSchema,
|
||||
inputRowSchema.getColumnsFilter()
|
||||
);
|
||||
|
||||
final ScanBuilder scanBuilder = latestSnapshot.getScanBuilder(engine);
|
||||
final ScanBuilder scanBuilder = snapshot.getScanBuilder(engine);
|
||||
if (filter != null) {
|
||||
scanBuilder.withFilter(engine, filter.getFilterPredicate(fullSnapshotSchema));
|
||||
}
|
||||
|
@ -206,17 +210,17 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
|
|||
}
|
||||
|
||||
final Engine engine = createDeltaEngine();
|
||||
final Snapshot latestSnapshot;
|
||||
final Snapshot snapshot;
|
||||
final Table table = Table.forPath(engine, tablePath);
|
||||
try {
|
||||
latestSnapshot = getLatestSnapshotForTable(table, engine);
|
||||
snapshot = getSnapshotForTable(table, engine);
|
||||
}
|
||||
catch (TableNotFoundException e) {
|
||||
throw InvalidInput.exception(e, "tablePath[%s] not found.", tablePath);
|
||||
}
|
||||
final StructType fullSnapshotSchema = latestSnapshot.getSchema(engine);
|
||||
final StructType fullSnapshotSchema = snapshot.getSchema(engine);
|
||||
|
||||
final ScanBuilder scanBuilder = latestSnapshot.getScanBuilder(engine);
|
||||
final ScanBuilder scanBuilder = snapshot.getScanBuilder(engine);
|
||||
if (filter != null) {
|
||||
scanBuilder.withFilter(engine, filter.getFilterPredicate(fullSnapshotSchema));
|
||||
}
|
||||
|
@ -254,7 +258,8 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
|
|||
return new DeltaInputSource(
|
||||
tablePath,
|
||||
split.get(),
|
||||
filter
|
||||
filter,
|
||||
snapshotVersion
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -333,7 +338,7 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
|
|||
);
|
||||
}
|
||||
|
||||
private Snapshot getLatestSnapshotForTable(final Table table, final Engine engine)
|
||||
private Snapshot getSnapshotForTable(final Table table, final Engine engine)
|
||||
{
|
||||
// Setting the LogStore class loader before calling the Delta Kernel snapshot API is required as a workaround with
|
||||
// the 3.2.0 Delta Kernel because the Kernel library cannot instantiate the LogStore class otherwise. Please see
|
||||
|
@ -341,7 +346,11 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
|
|||
final ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader();
|
||||
try {
|
||||
Thread.currentThread().setContextClassLoader(LogStore.class.getClassLoader());
|
||||
return table.getLatestSnapshot(engine);
|
||||
if (snapshotVersion != null) {
|
||||
return table.getSnapshotAsOfVersion(engine, snapshotVersion);
|
||||
} else {
|
||||
return table.getLatestSnapshot(engine);
|
||||
}
|
||||
}
|
||||
finally {
|
||||
Thread.currentThread().setContextClassLoader(currCtxCl);
|
||||
|
@ -359,4 +368,10 @@ public class DeltaInputSource implements SplittableInputSource<DeltaSplit>
|
|||
{
|
||||
return filter;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
Long getSnapshotVersion()
|
||||
{
|
||||
return snapshotVersion;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,7 +55,8 @@ public class DeltaInputRowTest
|
|||
Object[][] data = new Object[][]{
|
||||
{NonPartitionedDeltaTable.DELTA_TABLE_PATH, NonPartitionedDeltaTable.FULL_SCHEMA, NonPartitionedDeltaTable.DIMENSIONS, NonPartitionedDeltaTable.EXPECTED_ROWS},
|
||||
{PartitionedDeltaTable.DELTA_TABLE_PATH, PartitionedDeltaTable.FULL_SCHEMA, PartitionedDeltaTable.DIMENSIONS, PartitionedDeltaTable.EXPECTED_ROWS},
|
||||
{ComplexTypesDeltaTable.DELTA_TABLE_PATH, ComplexTypesDeltaTable.FULL_SCHEMA, ComplexTypesDeltaTable.DIMENSIONS, ComplexTypesDeltaTable.EXPECTED_ROWS}
|
||||
{ComplexTypesDeltaTable.DELTA_TABLE_PATH, ComplexTypesDeltaTable.FULL_SCHEMA, ComplexTypesDeltaTable.DIMENSIONS, ComplexTypesDeltaTable.EXPECTED_ROWS},
|
||||
{SnapshotDeltaTable.DELTA_TABLE_PATH, SnapshotDeltaTable.FULL_SCHEMA, SnapshotDeltaTable.DIMENSIONS, SnapshotDeltaTable.LATEST_SNAPSHOT_EXPECTED_ROWS}
|
||||
};
|
||||
return Arrays.asList(data);
|
||||
}
|
||||
|
@ -124,7 +125,7 @@ public class DeltaInputRowTest
|
|||
@ParameterizedTest(name = "{index}:with context {0}")
|
||||
public void testReadNonExistentTable()
|
||||
{
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null);
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null, null);
|
||||
|
||||
MatcherAssert.assertThat(
|
||||
Assert.assertThrows(
|
||||
|
|
|
@ -139,4 +139,18 @@ public class DeltaInputSourceSerdeTest
|
|||
exception.getCause().getMessage()
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeltaInputSourceWithSnapshotVersion() throws JsonProcessingException
|
||||
{
|
||||
final String payload = "{\n"
|
||||
+ " \"type\": \"delta\",\n"
|
||||
+ " \"tablePath\": \"foo/bar\",\n"
|
||||
+ " \"snapshotVersion\": 56\n"
|
||||
+ " }";
|
||||
|
||||
final DeltaInputSource deltaInputSource = OBJECT_MAPPER.readValue(payload, DeltaInputSource.class);
|
||||
Assert.assertEquals("foo/bar", deltaInputSource.getTablePath());
|
||||
Assert.assertEquals((Long) 56L, deltaInputSource.getSnapshotVersion());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.apache.druid.delta.input;
|
||||
|
||||
import io.delta.kernel.exceptions.KernelException;
|
||||
import org.apache.druid.data.input.InputRow;
|
||||
import org.apache.druid.data.input.InputRowListPlusRawValues;
|
||||
import org.apache.druid.data.input.InputRowSchema;
|
||||
|
@ -68,27 +69,62 @@ public class DeltaInputSourceTest
|
|||
{
|
||||
NonPartitionedDeltaTable.DELTA_TABLE_PATH,
|
||||
NonPartitionedDeltaTable.FULL_SCHEMA,
|
||||
null,
|
||||
NonPartitionedDeltaTable.EXPECTED_ROWS
|
||||
},
|
||||
{
|
||||
NonPartitionedDeltaTable.DELTA_TABLE_PATH,
|
||||
NonPartitionedDeltaTable.SCHEMA_1,
|
||||
null,
|
||||
NonPartitionedDeltaTable.EXPECTED_ROWS
|
||||
},
|
||||
{
|
||||
NonPartitionedDeltaTable.DELTA_TABLE_PATH,
|
||||
NonPartitionedDeltaTable.SCHEMA_2,
|
||||
null,
|
||||
NonPartitionedDeltaTable.EXPECTED_ROWS
|
||||
},
|
||||
{
|
||||
PartitionedDeltaTable.DELTA_TABLE_PATH,
|
||||
PartitionedDeltaTable.FULL_SCHEMA,
|
||||
null,
|
||||
PartitionedDeltaTable.EXPECTED_ROWS
|
||||
},
|
||||
{
|
||||
ComplexTypesDeltaTable.DELTA_TABLE_PATH,
|
||||
ComplexTypesDeltaTable.FULL_SCHEMA,
|
||||
null,
|
||||
ComplexTypesDeltaTable.EXPECTED_ROWS
|
||||
},
|
||||
{
|
||||
SnapshotDeltaTable.DELTA_TABLE_PATH,
|
||||
SnapshotDeltaTable.FULL_SCHEMA,
|
||||
0L,
|
||||
SnapshotDeltaTable.V0_SNAPSHOT_EXPECTED_ROWS
|
||||
},
|
||||
{
|
||||
SnapshotDeltaTable.DELTA_TABLE_PATH,
|
||||
SnapshotDeltaTable.FULL_SCHEMA,
|
||||
1L,
|
||||
SnapshotDeltaTable.V1_SNAPSHOT_EXPECTED_ROWS
|
||||
},
|
||||
{
|
||||
SnapshotDeltaTable.DELTA_TABLE_PATH,
|
||||
SnapshotDeltaTable.FULL_SCHEMA,
|
||||
2L,
|
||||
SnapshotDeltaTable.V2_SNAPSHOT_EXPECTED_ROWS
|
||||
},
|
||||
{
|
||||
SnapshotDeltaTable.DELTA_TABLE_PATH,
|
||||
SnapshotDeltaTable.FULL_SCHEMA,
|
||||
3L,
|
||||
SnapshotDeltaTable.LATEST_SNAPSHOT_EXPECTED_ROWS
|
||||
},
|
||||
{
|
||||
SnapshotDeltaTable.DELTA_TABLE_PATH,
|
||||
SnapshotDeltaTable.FULL_SCHEMA,
|
||||
null,
|
||||
SnapshotDeltaTable.LATEST_SNAPSHOT_EXPECTED_ROWS
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -98,12 +134,14 @@ public class DeltaInputSourceTest
|
|||
@Parameterized.Parameter(1)
|
||||
public InputRowSchema schema;
|
||||
@Parameterized.Parameter(2)
|
||||
public Long snapshotVersion;
|
||||
@Parameterized.Parameter(3)
|
||||
public List<Map<String, Object>> expectedRows;
|
||||
|
||||
@Test
|
||||
public void testSampleDeltaTable() throws IOException
|
||||
{
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, null);
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, null, snapshotVersion);
|
||||
final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null);
|
||||
|
||||
List<InputRowListPlusRawValues> actualSampledRows = sampleAllRows(inputSourceReader);
|
||||
|
@ -137,7 +175,7 @@ public class DeltaInputSourceTest
|
|||
@Test
|
||||
public void testReadDeltaTable() throws IOException
|
||||
{
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, null);
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, null, snapshotVersion);
|
||||
final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null);
|
||||
final List<InputRow> actualReadRows = readAllRows(inputSourceReader);
|
||||
validateRows(expectedRows, actualReadRows, schema);
|
||||
|
@ -269,7 +307,7 @@ public class DeltaInputSourceTest
|
|||
@Test
|
||||
public void testSampleDeltaTable() throws IOException
|
||||
{
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, filter);
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, filter, null);
|
||||
final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null);
|
||||
|
||||
List<InputRowListPlusRawValues> actualSampledRows = sampleAllRows(inputSourceReader);
|
||||
|
@ -311,7 +349,7 @@ public class DeltaInputSourceTest
|
|||
@Test
|
||||
public void testReadDeltaTable() throws IOException
|
||||
{
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, filter);
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource(deltaTablePath, null, filter, null);
|
||||
final InputSourceReader inputSourceReader = deltaInputSource.reader(schema, null, null);
|
||||
final List<InputRow> actualReadRows = readAllRows(inputSourceReader);
|
||||
validateRows(expectedRows, actualReadRows, schema);
|
||||
|
@ -326,7 +364,7 @@ public class DeltaInputSourceTest
|
|||
MatcherAssert.assertThat(
|
||||
Assert.assertThrows(
|
||||
DruidException.class,
|
||||
() -> new DeltaInputSource(null, null, null)
|
||||
() -> new DeltaInputSource(null, null, null, null)
|
||||
),
|
||||
DruidExceptionMatcher.invalidInput().expectMessageIs(
|
||||
"tablePath cannot be null."
|
||||
|
@ -337,7 +375,7 @@ public class DeltaInputSourceTest
|
|||
@Test
|
||||
public void testSplitNonExistentTable()
|
||||
{
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null);
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null, null);
|
||||
|
||||
MatcherAssert.assertThat(
|
||||
Assert.assertThrows(
|
||||
|
@ -353,7 +391,7 @@ public class DeltaInputSourceTest
|
|||
@Test
|
||||
public void testReadNonExistentTable()
|
||||
{
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null);
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource("non-existent-table", null, null, null);
|
||||
|
||||
MatcherAssert.assertThat(
|
||||
Assert.assertThrows(
|
||||
|
@ -365,6 +403,22 @@ public class DeltaInputSourceTest
|
|||
)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadNonExistentSnapshot()
|
||||
{
|
||||
final DeltaInputSource deltaInputSource = new DeltaInputSource(
|
||||
SnapshotDeltaTable.DELTA_TABLE_PATH,
|
||||
null,
|
||||
null,
|
||||
100L
|
||||
);
|
||||
|
||||
Assert.assertThrows(
|
||||
KernelException.class,
|
||||
() -> deltaInputSource.reader(null, null, null)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private static List<InputRowListPlusRawValues> sampleAllRows(InputSourceReader reader) throws IOException
|
||||
|
|
|
@ -37,7 +37,9 @@ public class RowSerdeTest
|
|||
{
|
||||
Object[][] data = new Object[][]{
|
||||
{NonPartitionedDeltaTable.DELTA_TABLE_PATH},
|
||||
{PartitionedDeltaTable.DELTA_TABLE_PATH}
|
||||
{PartitionedDeltaTable.DELTA_TABLE_PATH},
|
||||
{ComplexTypesDeltaTable.DELTA_TABLE_PATH},
|
||||
{SnapshotDeltaTable.DELTA_TABLE_PATH}
|
||||
};
|
||||
return Arrays.asList(data);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.delta.input;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.druid.data.input.ColumnsFilter;
|
||||
import org.apache.druid.data.input.InputRowSchema;
|
||||
import org.apache.druid.data.input.impl.DimensionsSpec;
|
||||
import org.apache.druid.data.input.impl.TimestampSpec;
|
||||
import org.apache.druid.java.util.common.DateTimes;
|
||||
import org.apache.druid.segment.AutoTypeColumnSchema;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Refer to extensions-contrib/druid-deltalake-extensions/src/test/resources/README.md to generate the
|
||||
* sample complex types Delta Lake table used in the unit tests.
|
||||
*
|
||||
*/
|
||||
public class SnapshotDeltaTable
|
||||
{
|
||||
/**
|
||||
* The Delta table path used by unit tests.
|
||||
*/
|
||||
public static final String DELTA_TABLE_PATH = "src/test/resources/snapshot-table";
|
||||
|
||||
/**
|
||||
* The list of dimensions in the Delta table {@link #DELTA_TABLE_PATH}.
|
||||
*/
|
||||
public static final List<String> DIMENSIONS = ImmutableList.of("id", "map_info");
|
||||
|
||||
public static final List<Map<String, Object>> V0_SNAPSHOT_EXPECTED_ROWS = new ArrayList<>(
|
||||
ImmutableList.of(
|
||||
ImmutableMap.of(
|
||||
"id", 0L,
|
||||
"map_info", ImmutableMap.of("snapshotVersion", 0)
|
||||
),
|
||||
ImmutableMap.of(
|
||||
"id", 1L,
|
||||
"map_info", ImmutableMap.of("snapshotVersion", 0)
|
||||
),
|
||||
ImmutableMap.of(
|
||||
"id", 2L,
|
||||
"map_info", ImmutableMap.of("snapshotVersion", 0)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
public static final List<Map<String, Object>> V1_SNAPSHOT_EXPECTED_ROWS = new ArrayList<>(
|
||||
ImmutableList.of(
|
||||
ImmutableMap.of(
|
||||
"id", 0L,
|
||||
"map_info", ImmutableMap.of("snapshotVersion", 0)
|
||||
),
|
||||
ImmutableMap.of(
|
||||
"id", 2L,
|
||||
"map_info", ImmutableMap.of("snapshotVersion", 0)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
public static final List<Map<String, Object>> V2_SNAPSHOT_EXPECTED_ROWS = new ArrayList<>(
|
||||
ImmutableList.of(
|
||||
ImmutableMap.of(
|
||||
"id", 2L,
|
||||
"map_info", ImmutableMap.of("snapshotVersion", 2)
|
||||
),
|
||||
ImmutableMap.of(
|
||||
"id", 0L,
|
||||
"map_info", ImmutableMap.of("snapshotVersion", 0)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
public static final List<Map<String, Object>> LATEST_SNAPSHOT_EXPECTED_ROWS = new ArrayList<>(
|
||||
ImmutableList.of(
|
||||
ImmutableMap.of(
|
||||
"id", 1L,
|
||||
"map_info", ImmutableMap.of("snapshotVersion", 3)
|
||||
),
|
||||
ImmutableMap.of(
|
||||
"id", 4L,
|
||||
"map_info", ImmutableMap.of("snapshotVersion", 3)
|
||||
),
|
||||
ImmutableMap.of(
|
||||
"id", 2L,
|
||||
"map_info", ImmutableMap.of("snapshotVersion", 2)
|
||||
),
|
||||
ImmutableMap.of(
|
||||
"id", 0L,
|
||||
"map_info", ImmutableMap.of("snapshotVersion", 0)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
/**
|
||||
* The Druid schema used for ingestion of {@link #DELTA_TABLE_PATH}.
|
||||
*/
|
||||
public static final InputRowSchema FULL_SCHEMA = new InputRowSchema(
|
||||
new TimestampSpec("na", "posix", DateTimes.of("2024-01-01")),
|
||||
new DimensionsSpec(
|
||||
ImmutableList.of(
|
||||
new AutoTypeColumnSchema("id", null),
|
||||
new AutoTypeColumnSchema("map_info", null)
|
||||
)
|
||||
),
|
||||
ColumnsFilter.all()
|
||||
);
|
||||
}
|
|
@ -44,18 +44,20 @@ Delta table to `resources/employee-delta-table`. You can override the defaults b
|
|||
```shell
|
||||
python3 create_delta_table.py -h
|
||||
|
||||
usage: create_delta_table.py [-h] --save_path SAVE_PATH [--save_mode {append,overwrite}] [--partitioned_by {date,name}] [--num_records NUM_RECORDS]
|
||||
usage: create_delta_table.py [-h] [--delta_table_type {TableType.SIMPLE,TableType.COMPLEX,TableType.SNAPSHOTS}] --save_path SAVE_PATH [--save_mode {append,overwrite}] [--partitioned_by {date,name,id}] [--num_records NUM_RECORDS]
|
||||
|
||||
Script to write a Delta Lake table.
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--delta_table_type {TableType.SIMPLE,TableType.COMPLEX,TableType.SNAPSHOTS}
|
||||
Choose a Delta table type to generate. (default: TableType.SIMPLE)
|
||||
--save_path SAVE_PATH
|
||||
Save path for Delta table (default: None)
|
||||
--save_mode {append,overwrite}
|
||||
Specify write mode (append/overwrite) (default: append)
|
||||
--partitioned_by {date,name}
|
||||
Partitioned by columns (default: None)
|
||||
--partitioned_by {date,name,id}
|
||||
Column to partition the Delta table (default: None)
|
||||
--num_records NUM_RECORDS
|
||||
Specify number of Delta records to write (default: 5)
|
||||
```
|
||||
|
@ -88,10 +90,21 @@ The resulting Delta table is checked in to the repo. The expectated rows to be u
|
|||
### Complex types table `complex-types-table`:
|
||||
|
||||
The test data in `resources/complex-types-table` contains 5 Delta records generated with 1 snapshot.
|
||||
The table was generated by running the following commands:
|
||||
The table was generated by running the following command:
|
||||
```shell
|
||||
python3 create_delta_table.py --save_path=complex-types-table --num_records=5 --gen_complex_types=True
|
||||
python3 create_delta_table.py --save_path=complex-types-table --delta_table_type=complex
|
||||
```
|
||||
|
||||
The resulting Delta table is checked in to the repo. The expectated rows to be used in tests are updated in
|
||||
`ComplexTypesDeltaTable.java` accordingly.
|
||||
|
||||
### Snapshots table `snapshot-table`:
|
||||
|
||||
The test data in `resources/snapshot-table` contains 4 Delta snapshots with delete, update and removal of records across
|
||||
snapshots. The table was generated by running the following command:
|
||||
```shell
|
||||
python3 create_delta_table.py --save_path=snapshot-table --partitioned_by=id --delta_table_type=snapshots --num_records=3
|
||||
```
|
||||
|
||||
The resulting Delta table is checked in to the repo. The expectated rows to be used in tests are updated in
|
||||
`SnapshotDeltaTable.java` accordingly.
|
||||
|
|
|
@ -16,11 +16,20 @@
|
|||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
from enum import Enum
|
||||
|
||||
from delta import *
|
||||
import pyspark
|
||||
from pyspark.sql.types import MapType, StructType, StructField, ShortType, StringType, TimestampType, LongType, IntegerType, DoubleType, FloatType, DateType, BooleanType, ArrayType
|
||||
from pyspark.sql.functions import expr
|
||||
from datetime import datetime, timedelta
|
||||
import random
|
||||
from delta.tables import DeltaTable
|
||||
|
||||
class TableType(Enum):
|
||||
SIMPLE = "simple"
|
||||
COMPLEX = "complex"
|
||||
SNAPSHOTS = "snapshots"
|
||||
|
||||
|
||||
def config_spark_with_delta_lake():
|
||||
|
@ -40,15 +49,12 @@ def config_spark_with_delta_lake():
|
|||
def create_dataset_with_complex_types(num_records):
|
||||
"""
|
||||
Create a mock dataset with records containing complex types like arrays, structs and maps.
|
||||
|
||||
Parameters:
|
||||
- num_records (int): Number of records to generate.
|
||||
|
||||
Returns:
|
||||
- Tuple: A tuple containing a list of records and the corresponding schema.
|
||||
- List of Records: Each record is a tuple representing a row of data.
|
||||
- StructType: The schema defining the structure of the records.
|
||||
|
||||
Example:
|
||||
```python
|
||||
data, schema = create_dataset_with_complex_types(10)
|
||||
|
@ -86,6 +92,59 @@ def create_dataset_with_complex_types(num_records):
|
|||
return data, schema
|
||||
|
||||
|
||||
def create_snapshots_table(num_records):
|
||||
"""
|
||||
Create a mock dataset for snapshots.
|
||||
Parameters:
|
||||
- num_records (int): Number of records to generate.
|
||||
Returns:
|
||||
- Tuple: A tuple containing a list of records and the corresponding schema pertaining to a single snapshot.
|
||||
Example:
|
||||
```python
|
||||
data, schema = create_snapshots_table(5)
|
||||
```
|
||||
"""
|
||||
schema = StructType([
|
||||
StructField("id", LongType(), False),
|
||||
StructField("map_info", MapType(StringType(), IntegerType()))
|
||||
])
|
||||
|
||||
data = []
|
||||
|
||||
for idx in range(num_records):
|
||||
record = (
|
||||
idx,
|
||||
{"snapshotVersion": 0}
|
||||
)
|
||||
data.append(record)
|
||||
return data, schema
|
||||
|
||||
|
||||
def update_table(spark, schema, delta_table_path):
|
||||
"""
|
||||
Update table at the specified delta path with updates: deletion, partial upsert, and insertion.
|
||||
Each update generates a distinct snapshot for the Delta table.
|
||||
"""
|
||||
delta_table = DeltaTable.forPath(spark, delta_table_path)
|
||||
|
||||
# Snapshot 1: remove record with id = 2; result : (id=0, id=2)
|
||||
delta_table.delete(condition="id=1")
|
||||
|
||||
# Snapshot 2: do a partial update of snapshotInfo map for id = 2 ; result : (id=2, id=0)
|
||||
delta_table.update(
|
||||
condition="id=2",
|
||||
set={"map_info": expr("map('snapshotVersion', 2)")}
|
||||
)
|
||||
|
||||
# Snapshot 3: New records to be appended; result : (id=1, id=4, id=2, id=0)
|
||||
append_data = [
|
||||
(1, {"snapshotVersion": 3}),
|
||||
(4, {"snapshotVersion": 3})
|
||||
]
|
||||
append_df = spark.createDataFrame(append_data, schema)
|
||||
append_df.write.format("delta").mode("append").save(delta_table_path)
|
||||
|
||||
|
||||
def create_dataset(num_records):
|
||||
"""
|
||||
Generate a mock employee dataset with different datatypes for testing purposes.
|
||||
|
@ -141,19 +200,18 @@ def main():
|
|||
parser = argparse.ArgumentParser(description="Script to write a Delta Lake table.",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
|
||||
parser.add_argument("--gen_complex_types", type=bool, default=False, help="Generate a Delta table with records"
|
||||
" containing complex types like structs,"
|
||||
" maps and arrays.")
|
||||
parser.add_argument('--delta_table_type', type=lambda t: TableType[t.upper()], choices=TableType,
|
||||
default=TableType.SIMPLE, help='Choose a Delta table type to generate.')
|
||||
parser.add_argument('--save_path', default=None, required=True, help="Save path for Delta table")
|
||||
parser.add_argument('--save_mode', choices=('append', 'overwrite'), default="append",
|
||||
help="Specify write mode (append/overwrite)")
|
||||
parser.add_argument('--partitioned_by', choices=("date", "name"), default=None,
|
||||
parser.add_argument('--partitioned_by', choices=("date", "name", "id"), default=None,
|
||||
help="Column to partition the Delta table")
|
||||
parser.add_argument('--num_records', type=int, default=5, help="Specify number of Delta records to write")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
is_gen_complex_types = args.gen_complex_types
|
||||
delta_table_type = args.delta_table_type
|
||||
save_mode = args.save_mode
|
||||
save_path = args.save_path
|
||||
num_records = args.num_records
|
||||
|
@ -161,21 +219,29 @@ def main():
|
|||
|
||||
spark = config_spark_with_delta_lake()
|
||||
|
||||
if is_gen_complex_types:
|
||||
data, schema = create_dataset_with_complex_types(num_records=num_records)
|
||||
else:
|
||||
if delta_table_type == TableType.SIMPLE:
|
||||
data, schema = create_dataset(num_records=num_records)
|
||||
elif delta_table_type == TableType.COMPLEX:
|
||||
data, schema = create_dataset_with_complex_types(num_records=num_records)
|
||||
elif delta_table_type == TableType.SNAPSHOTS:
|
||||
data, schema = create_snapshots_table(num_records)
|
||||
else:
|
||||
args.print_help()
|
||||
raise Exception("Unknown value specified for --delta_table_type")
|
||||
|
||||
df = spark.createDataFrame(data, schema=schema)
|
||||
if not partitioned_by:
|
||||
df.write.format("delta").mode(save_mode).save(save_path)
|
||||
else:
|
||||
df.write.format("delta").partitionBy("name").mode(save_mode).save(save_path)
|
||||
df.write.format("delta").partitionBy(partitioned_by).mode(save_mode).save(save_path)
|
||||
|
||||
df.show()
|
||||
|
||||
print(f"Generated Delta table records partitioned by {partitioned_by} in {save_path} in {save_mode} mode"
|
||||
f" with {num_records} records.")
|
||||
f" with {num_records} records with {delta_table_type}.")
|
||||
|
||||
if delta_table_type == TableType.SNAPSHOTS:
|
||||
update_table(spark, schema, save_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,6 @@
|
|||
{"commitInfo":{"timestamp":1725465348581,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"id\"]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"3","numOutputRows":"3","numOutputBytes":"2607"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.1.0","txnId":"d52bcd81-2310-417a-acb2-e206a4882383"}}
|
||||
{"metaData":{"id":"5a4682fa-c3d8-4f49-8825-b8540e20ce93","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"map_info\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"integer\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["id"],"configuration":{},"createdTime":1725465346226}}
|
||||
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
|
||||
{"add":{"path":"id=0/part-00003-8610110f-f5a0-4856-a5a8-516e5b35ef44.c000.snappy.parquet","partitionValues":{"id":"0"},"size":869,"modificationTime":1725465348507,"dataChange":true,"stats":"{\"numRecords\":1,\"nullCount\":{\"map_info\":0}}"}}
|
||||
{"add":{"path":"id=1/part-00006-120df0a3-1c7a-4a2e-81aa-7bc8140b0f09.c000.snappy.parquet","partitionValues":{"id":"1"},"size":869,"modificationTime":1725465348507,"dataChange":true,"stats":"{\"numRecords\":1,\"nullCount\":{\"map_info\":0}}"}}
|
||||
{"add":{"path":"id=2/part-00009-246861b8-01b0-446c-b4f1-ab0c2e762044.c000.snappy.parquet","partitionValues":{"id":"2"},"size":869,"modificationTime":1725465348506,"dataChange":true,"stats":"{\"numRecords\":1,\"nullCount\":{\"map_info\":0}}"}}
|
|
@ -0,0 +1,2 @@
|
|||
{"commitInfo":{"timestamp":1725465352088,"operation":"DELETE","operationParameters":{"predicate":"[\"(id#852L = 1)\"]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"1","numRemovedBytes":"869","numCopiedRows":"0","numDeletionVectorsAdded":"0","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"0","executionTimeMs":"426","numDeletionVectorsUpdated":"0","numDeletedRows":"1","scanTimeMs":"421","numAddedFiles":"0","numAddedBytes":"0","rewriteTimeMs":"0"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.1.0","txnId":"5af91bc8-feb5-40e2-b7d0-76acd1038ba7"}}
|
||||
{"remove":{"path":"id=1/part-00006-120df0a3-1c7a-4a2e-81aa-7bc8140b0f09.c000.snappy.parquet","deletionTimestamp":1725465351650,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{"id":"1"},"size":869,"stats":"{\"numRecords\":1}"}}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue