use the latest datasketches-java-4.0.0 (#14334)

* use the latest datasketches-java-4.0.0

* updated versions of datasketches

* adjusted expectation

* fixed the expectations

---------

Co-authored-by: AlexanderSaydakov <AlexanderSaydakov@users.noreply.github.com>
This commit is contained in:
Alexander Saydakov 2023-05-27 22:19:18 -07:00 committed by GitHub
parent 8d256e35b4
commit 4131c0df13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
45 changed files with 149 additions and 148 deletions

View File

@ -78,7 +78,7 @@ public class KllDoublesSketchToHistogramPostAggregator implements PostAggregator
return histogram;
}
final double[] histogram = sketch.getPMF(splitPoints != null ? splitPoints :
equallySpacedPoints(numBins, sketch.getMinValue(), sketch.getMaxValue()));
equallySpacedPoints(numBins, sketch.getMinItem(), sketch.getMaxItem()));
for (int i = 0; i < histogram.length; i++) {
histogram[i] *= sketch.getN(); // scale fractions to counts
}

View File

@ -83,7 +83,7 @@ public class KllDoublesSketchToRankPostAggregator implements PostAggregator
public Object compute(final Map<String, Object> combinedAggregators)
{
final KllDoublesSketch sketch = (KllDoublesSketch) field.compute(combinedAggregators);
return sketch.getRank(value);
return sketch.isEmpty() ? Double.NaN : sketch.getRank(value);
}
@Override

View File

@ -78,7 +78,7 @@ public class KllFloatsSketchToHistogramPostAggregator implements PostAggregator
return histogram;
}
final double[] histogram = sketch.getPMF(splitPoints != null ? splitPoints :
equallySpacedPoints(numBins, sketch.getMinValue(), sketch.getMaxValue()));
equallySpacedPoints(numBins, sketch.getMinItem(), sketch.getMaxItem()));
for (int i = 0; i < histogram.length; i++) {
histogram[i] *= sketch.getN(); // scale fractions to counts
}

View File

@ -83,7 +83,7 @@ public class KllFloatsSketchToRankPostAggregator implements PostAggregator
public Object compute(final Map<String, Object> combinedAggregators)
{
final KllFloatsSketch sketch = (KllFloatsSketch) field.compute(combinedAggregators);
return sketch.getRank(value);
return sketch.isEmpty() ? Double.NaN : sketch.getRank(value);
}
@Override

View File

@ -23,7 +23,7 @@ import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting;
import org.apache.datasketches.Util;
import org.apache.datasketches.common.Util;
import org.apache.datasketches.quantiles.DoublesSketch;
import org.apache.datasketches.quantiles.DoublesUnion;
import org.apache.druid.jackson.DefaultTrueJsonIncludeFilter;
@ -123,7 +123,7 @@ public class DoublesSketchAggregatorFactory extends AggregatorFactory
}
this.fieldName = fieldName;
this.k = k == null ? DEFAULT_K : k;
Util.checkIfPowerOf2(this.k, "k");
Util.checkIfIntPowerOf2(this.k, "k");
this.maxStreamLength = maxStreamLength == null ? DEFAULT_MAX_STREAM_LENGTH : maxStreamLength;
this.shouldFinalize = shouldFinalize == null ? DEFAULT_SHOULD_FINALIZE : shouldFinalize;
this.cacheTypeId = cacheTypeId;
@ -240,8 +240,8 @@ public class DoublesSketchAggregatorFactory extends AggregatorFactory
public Object combine(final Object lhs, final Object rhs)
{
final DoublesUnion union = DoublesUnion.builder().setMaxK(k).build();
union.update((DoublesSketch) lhs);
union.update((DoublesSketch) rhs);
union.union((DoublesSketch) lhs);
union.union((DoublesSketch) rhs);
return union.getResultAndReset();
}
@ -263,7 +263,7 @@ public class DoublesSketchAggregatorFactory extends AggregatorFactory
public void fold(final ColumnValueSelector selector)
{
final DoublesSketch sketch = (DoublesSketch) selector.getObject();
union.update(sketch);
union.union(sketch);
}
@Nullable

View File

@ -77,7 +77,7 @@ public class DoublesSketchMergeAggregator implements Aggregator
return;
}
if (object instanceof DoublesSketch) {
union.update((DoublesSketch) object);
union.union((DoublesSketch) object);
} else {
union.update(selector.getDouble());
}

View File

@ -58,7 +58,7 @@ public class DoublesSketchMergeVectorAggregator implements VectorAggregator
for (int i = startRow; i < endRow; i++) {
final DoublesSketch sketch = (DoublesSketch) vector[i];
if (sketch != null) {
union.update(sketch);
union.union(sketch);
}
}
}
@ -80,7 +80,7 @@ public class DoublesSketchMergeVectorAggregator implements VectorAggregator
if (sketch != null) {
final int position = positions[i] + positionOffset;
final DoublesUnion union = helper.getSketchAtPosition(buf, position);
union.update(sketch);
union.union(sketch);
}
}
}

View File

@ -78,7 +78,7 @@ public class DoublesSketchToHistogramPostAggregator implements PostAggregator
return histogram;
}
final double[] histogram = sketch.getPMF(splitPoints != null ? splitPoints :
equallySpacedPoints(numBins, sketch.getMinValue(), sketch.getMaxValue()));
equallySpacedPoints(numBins, sketch.getMinItem(), sketch.getMaxItem()));
for (int i = 0; i < histogram.length; i++) {
histogram[i] *= sketch.getN(); // scale fractions to counts
}

View File

@ -83,7 +83,7 @@ public class DoublesSketchToQuantilePostAggregator implements PostAggregator
public Object compute(final Map<String, Object> combinedAggregators)
{
final DoublesSketch sketch = (DoublesSketch) field.compute(combinedAggregators);
return sketch.getQuantile(fraction);
return sketch.isEmpty() ? Double.NaN : sketch.getQuantile(fraction);
}
@Override

View File

@ -83,7 +83,7 @@ public class DoublesSketchToRankPostAggregator implements PostAggregator
public Object compute(final Map<String, Object> combinedAggregators)
{
final DoublesSketch sketch = (DoublesSketch) field.compute(combinedAggregators);
return sketch.getRank(value);
return sketch.isEmpty() ? Double.NaN : sketch.getRank(value);
}
@Override

View File

@ -19,7 +19,7 @@
package org.apache.druid.query.aggregation.datasketches.theta;
import org.apache.datasketches.Family;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.theta.SetOperation;
import org.apache.datasketches.theta.Union;
import org.apache.druid.common.config.NullHandling;

View File

@ -22,10 +22,11 @@ package org.apache.druid.query.aggregation.datasketches.theta;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Ints;
import org.apache.datasketches.Family;
import org.apache.datasketches.Util;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.Util;
import org.apache.datasketches.theta.SetOperation;
import org.apache.datasketches.theta.Union;
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.aggregation.AggregateCombiner;
import org.apache.druid.query.aggregation.Aggregator;
@ -52,7 +53,7 @@ public abstract class SketchAggregatorFactory extends AggregatorFactory
// Smallest number of entries in an Aggregator. Each entry is a long. Based on the constructor of
// HeapQuickSelectSketch and used by guessAggregatorHeapFootprint.
private static final int MIN_ENTRIES_PER_AGGREGATOR = 1 << Util.MIN_LG_ARR_LONGS;
private static final int MIN_ENTRIES_PER_AGGREGATOR = 1 << ThetaUtil.MIN_LG_ARR_LONGS;
// Largest preamble size for the sketch stored in an Aggregator, in bytes. Based on Util.getMaxUnionBytes.
private static final int LONGEST_POSSIBLE_PREAMBLE_BYTES = Family.UNION.getMaxPreLongs() << 3;
@ -68,7 +69,7 @@ public abstract class SketchAggregatorFactory extends AggregatorFactory
this.fieldName = Preconditions.checkNotNull(fieldName, "Must have a valid, non-null fieldName");
this.size = size == null ? DEFAULT_MAX_SKETCH_SIZE : size;
Util.checkIfPowerOf2(this.size, "size");
Util.checkIfIntPowerOf2(this.size, "size");
this.cacheId = cacheId;
}
@ -197,7 +198,7 @@ public abstract class SketchAggregatorFactory extends AggregatorFactory
expectedEntries = maxEntries;
} else {
// rows is within int range since it's <= maxEntries, so casting is OK.
expectedEntries = Math.max(MIN_ENTRIES_PER_AGGREGATOR, Util.ceilingPowerOf2(Ints.checkedCast(rows)));
expectedEntries = Math.max(MIN_ENTRIES_PER_AGGREGATOR, Util.ceilingIntPowerOf2(Ints.checkedCast(rows)));
}
// 8 bytes per entry + largest possible preamble.

View File

@ -21,7 +21,7 @@ package org.apache.druid.query.aggregation.datasketches.theta;
import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
import org.apache.datasketches.Family;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.theta.SetOperation;
import org.apache.datasketches.theta.Union;

View File

@ -23,7 +23,7 @@ import com.google.common.base.Preconditions;
import com.google.common.collect.Ordering;
import com.google.common.primitives.Doubles;
import com.google.common.primitives.Longs;
import org.apache.datasketches.Family;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.theta.AnotB;
import org.apache.datasketches.theta.Intersection;

View File

@ -21,7 +21,7 @@ package org.apache.druid.query.aggregation.datasketches.theta;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.datasketches.Util;
import org.apache.datasketches.common.Util;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator;
@ -55,7 +55,7 @@ public class SketchSetPostAggregator implements PostAggregator
this.fields = fields;
this.func = SketchHolder.Func.valueOf(func);
this.maxSketchSize = maxSize == null ? SketchAggregatorFactory.DEFAULT_MAX_SKETCH_SIZE : maxSize;
Util.checkIfPowerOf2(this.maxSketchSize, "size");
Util.checkIfIntPowerOf2(this.maxSketchSize, "size");
if (fields.size() <= 1) {
throw new IAE("Illegal number of fields[%s], must be > 1", fields.size());

View File

@ -22,7 +22,8 @@ package org.apache.druid.query.aggregation.datasketches.tuple;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import org.apache.datasketches.Util;
import org.apache.datasketches.common.Util;
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSetOperationBuilder;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesUnion;
@ -74,8 +75,8 @@ public class ArrayOfDoublesSketchAggregatorFactory extends AggregatorFactory
{
this.name = Preconditions.checkNotNull(name, "Must have a valid, non-null aggregator name");
this.fieldName = Preconditions.checkNotNull(fieldName, "Must have a valid, non-null fieldName");
this.nominalEntries = nominalEntries == null ? Util.DEFAULT_NOMINAL_ENTRIES : nominalEntries;
Util.checkIfPowerOf2(this.nominalEntries, "nominalEntries");
this.nominalEntries = nominalEntries == null ? ThetaUtil.DEFAULT_NOMINAL_ENTRIES : nominalEntries;
Util.checkIfIntPowerOf2(this.nominalEntries, "nominalEntries");
this.metricColumns = metricColumns;
this.numberOfValues = numberOfValues == null ? (metricColumns == null ? 1 : metricColumns.size()) : numberOfValues;
if (metricColumns != null && metricColumns.size() != this.numberOfValues) {

View File

@ -21,7 +21,8 @@ package org.apache.druid.query.aggregation.datasketches.tuple;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.datasketches.Util;
import org.apache.datasketches.common.Util;
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.apache.datasketches.tuple.arrayofdoubles.ArrayOfDoublesSketch;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.aggregation.AggregatorUtil;
@ -58,9 +59,9 @@ public class ArrayOfDoublesSketchSetOpPostAggregator extends ArrayOfDoublesSketc
{
super(name, fields);
this.operation = ArrayOfDoublesSketchOperations.Operation.valueOf(operation);
this.nominalEntries = nominalEntries == null ? Util.DEFAULT_NOMINAL_ENTRIES : nominalEntries;
this.nominalEntries = nominalEntries == null ? ThetaUtil.DEFAULT_NOMINAL_ENTRIES : nominalEntries;
this.numberOfValues = numberOfValues == null ? 1 : numberOfValues;
Util.checkIfPowerOf2(this.nominalEntries, "size");
Util.checkIfIntPowerOf2(this.nominalEntries, "size");
if (fields.size() <= 1) {
throw new IAE("Illegal number of fields[%d], must be > 1", fields.size());

View File

@ -28,7 +28,7 @@ import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.SqlOperandCountRanges;
import org.apache.datasketches.Util;
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.aggregation.datasketches.tuple.ArrayOfDoublesSketchSetOpPostAggregator;
@ -92,7 +92,7 @@ public abstract class ArrayOfDoublesSketchSetBaseOperatorConversion implements S
nominalEntries = ((Number) RexLiteral.value(potentialNominalEntriesArg)).intValue();
metricExpressionEndIndex = lastArgIndex - 1;
} else {
nominalEntries = Util.DEFAULT_NOMINAL_ENTRIES;
nominalEntries = ThetaUtil.DEFAULT_NOMINAL_ENTRIES;
metricExpressionEndIndex = lastArgIndex;
}

View File

@ -31,7 +31,7 @@ import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.type.InferTypes;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.util.Optionality;
import org.apache.datasketches.Util;
import org.apache.datasketches.thetacommon.ThetaUtil;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.datasketches.tuple.ArrayOfDoublesSketchAggregatorFactory;
@ -99,7 +99,7 @@ public class ArrayOfDoublesSketchSqlAggregator implements SqlAggregator
nominalEntries = ((Number) RexLiteral.value(potentialNominalEntriesArg)).intValue();
metricExpressionEndIndex = lastArgIndex - 1;
} else {
nominalEntries = Util.DEFAULT_NOMINAL_ENTRIES;
nominalEntries = ThetaUtil.DEFAULT_NOMINAL_ENTRIES;
metricExpressionEndIndex = lastArgIndex;
}

View File

@ -19,7 +19,7 @@
package org.apache.druid.query.aggregation.datasketches.hll;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.hll.HllSketch;
import org.apache.druid.java.util.common.StringUtils;
import org.junit.Assert;
@ -53,7 +53,7 @@ public class HllSketchObjectStrategyTest
final ByteBuffer buf2 = ByteBuffer.wrap(garbage2).order(ByteOrder.LITTLE_ENDIAN);
Assert.assertThrows(
IndexOutOfBoundsException.class,
Exception.class, // can throw either SketchesArgumentException or IndexOutOfBoundsException
() -> objectStrategy.fromByteBufferSafe(buf2, garbage2.length).getSketch().copy()
);
}
@ -62,7 +62,7 @@ public class HllSketchObjectStrategyTest
final byte[] garbage = new byte[]{0x01, 0x02};
final ByteBuffer buf3 = ByteBuffer.wrap(garbage).order(ByteOrder.LITTLE_ENDIAN);
Assert.assertThrows(
IndexOutOfBoundsException.class,
SketchesArgumentException.class,
() -> objectStrategy.fromByteBufferSafe(buf3, garbage.length).getSketch().copy()
);

View File

@ -55,7 +55,7 @@ public class KllDoublesSketchComplexMetricSerdeTest
"foo"
);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(777d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(777d, sketch.getMaxItem(), 0.01d);
}
@Test
@ -68,7 +68,7 @@ public class KllDoublesSketchComplexMetricSerdeTest
"foo"
);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(-133d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(-133d, sketch.getMaxItem(), 0.01d);
}
@Test
@ -81,7 +81,7 @@ public class KllDoublesSketchComplexMetricSerdeTest
"foo"
);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(3.1d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(3.1d, sketch.getMaxItem(), 0.01d);
}
@Test
@ -94,7 +94,7 @@ public class KllDoublesSketchComplexMetricSerdeTest
"foo"
);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(0.1d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(0.1d, sketch.getMaxItem(), 0.01d);
}
@Test

View File

@ -142,7 +142,7 @@ public class KllDoublesSketchToCDFPostAggregatorTest
final PostAggregator postAgg = new KllDoublesSketchToCDFPostAggregator(
"cdf",
new FieldAccessPostAggregator("field", "sketch"),
new double[] {4} // half of the distribution is below 4
new double[] {3} // half of the distribution is less or equals 3
);
final double[] cdf = (double[]) postAgg.compute(fields);

View File

@ -120,7 +120,7 @@ public class KllDoublesSketchToRankPostAggregatorTest
final PostAggregator postAgg = new KllDoublesSketchToRankPostAggregator(
"rank",
new FieldAccessPostAggregator("field", "sketch"),
4
3
);
final double rank = (double) postAgg.compute(fields);

View File

@ -55,7 +55,7 @@ public class KllFloatsSketchComplexMetricSerdeTest
"foo"
);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(777d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(777d, sketch.getMaxItem(), 0.01d);
}
@Test
@ -68,7 +68,7 @@ public class KllFloatsSketchComplexMetricSerdeTest
"foo"
);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(-133d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(-133d, sketch.getMaxItem(), 0.01d);
}
@Test
@ -81,7 +81,7 @@ public class KllFloatsSketchComplexMetricSerdeTest
"foo"
);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(3.1d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(3.1d, sketch.getMaxItem(), 0.01d);
}
@Test
@ -94,7 +94,7 @@ public class KllFloatsSketchComplexMetricSerdeTest
"foo"
);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(0.1d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(0.1d, sketch.getMaxItem(), 0.01d);
}
@Test

View File

@ -142,7 +142,7 @@ public class KllFloatsSketchToCDFPostAggregatorTest
final PostAggregator postAgg = new KllFloatsSketchToCDFPostAggregator(
"cdf",
new FieldAccessPostAggregator("field", "sketch"),
new float[] {4} // half of the distribution is below 4
new float[] {3} // half of the distribution is less or equals 3
);
final double[] cdf = (double[]) postAgg.compute(fields);

View File

@ -120,7 +120,7 @@ public class KllFloatsSketchToRankPostAggregatorTest
final PostAggregator postAgg = new KllFloatsSketchToRankPostAggregator(
"rank",
new FieldAccessPostAggregator("field", "sketch"),
4
3
);
final double rank = (double) postAgg.compute(fields);

View File

@ -43,7 +43,7 @@ public class DoublesSketchComplexMetricSerdeTest
new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "")),
"foo"
);
Assert.assertEquals(0, sketch.getRetainedItems());
Assert.assertEquals(0, sketch.getNumRetained());
}
@Test
@ -55,8 +55,8 @@ public class DoublesSketchComplexMetricSerdeTest
new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "777")),
"foo"
);
Assert.assertEquals(1, sketch.getRetainedItems());
Assert.assertEquals(777d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(777d, sketch.getMaxItem(), 0.01d);
}
@Test
@ -68,8 +68,8 @@ public class DoublesSketchComplexMetricSerdeTest
new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "-133")),
"foo"
);
Assert.assertEquals(1, sketch.getRetainedItems());
Assert.assertEquals(-133d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(-133d, sketch.getMaxItem(), 0.01d);
}
@Test
@ -81,8 +81,8 @@ public class DoublesSketchComplexMetricSerdeTest
new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", "3.1")),
"foo"
);
Assert.assertEquals(1, sketch.getRetainedItems());
Assert.assertEquals(3.1d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(3.1d, sketch.getMaxItem(), 0.01d);
}
@Test
@ -94,8 +94,8 @@ public class DoublesSketchComplexMetricSerdeTest
new MapBasedInputRow(0L, ImmutableList.of(), ImmutableMap.of("foo", ".1")),
"foo"
);
Assert.assertEquals(1, sketch.getRetainedItems());
Assert.assertEquals(0.1d, sketch.getMaxValue(), 0.01d);
Assert.assertEquals(1, sketch.getNumRetained());
Assert.assertEquals(0.1d, sketch.getMaxItem(), 0.01d);
}
@Test

View File

@ -142,7 +142,7 @@ public class DoublesSketchToCDFPostAggregatorTest
final PostAggregator postAgg = new DoublesSketchToCDFPostAggregator(
"cdf",
new FieldAccessPostAggregator("field", "sketch"),
new double[] {4} // half of the distribution is below 4
new double[] {3} // half of the distribution is less or equals 3
);
final double[] cdf = (double[]) postAgg.compute(fields);

View File

@ -120,7 +120,7 @@ public class DoublesSketchToRankPostAggregatorTest
final PostAggregator postAgg = new DoublesSketchToRankPostAggregator(
"rank",
new FieldAccessPostAggregator("field", "sketch"),
4
3
);
final double rank = (double) postAgg.compute(fields);

View File

@ -186,7 +186,7 @@ public class DoublesSketchSqlAggregatorTest extends BaseCalciteQueryTest
ImmutableList.of(
new Object[]{
1.0,
4.0,
3.0,
6.0,
6.0,
12.0,
@ -245,7 +245,7 @@ public class DoublesSketchSqlAggregatorTest extends BaseCalciteQueryTest
ImmutableList.of(
new Object[]{
1.0,
4.0,
3.0,
6.0,
6.0,
6.0,
@ -264,7 +264,7 @@ public class DoublesSketchSqlAggregatorTest extends BaseCalciteQueryTest
expectedResults = ImmutableList.of(
new Object[]{
0.0,
1.0,
0.0,
10.1,
10.1,
20.2,
@ -644,7 +644,7 @@ public class DoublesSketchSqlAggregatorTest extends BaseCalciteQueryTest
"\n"
+ "### Quantiles HeapUpdateDoublesSketch SUMMARY: \n"
+ " Empty : false\n"
+ " Direct, Capacity bytes : false, \n"
+ " Memory, Capacity bytes : false, \n"
+ " Estimation Mode : false\n"
+ " K : 128\n"
+ " N : 6\n"
@ -657,8 +657,8 @@ public class DoublesSketchSqlAggregatorTest extends BaseCalciteQueryTest
+ " Updatable Storage Bytes : 96\n"
+ " Normalized Rank Error : 1.406%\n"
+ " Normalized Rank Error (PMF) : 1.711%\n"
+ " Min Value : 1.000000e+00\n"
+ " Max Value : 1.000000e+00\n"
+ " Min Item : 1.000000e+00\n"
+ " Max Item : 1.000000e+00\n"
+ "### END SKETCH SUMMARY\n"
}
)
@ -706,7 +706,7 @@ public class DoublesSketchSqlAggregatorTest extends BaseCalciteQueryTest
),
ImmutableList.of(
new Object[]{
4.0d,
3.0d,
6.0d
}
)

View File

@ -24,7 +24,7 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import org.apache.datasketches.Family;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.theta.SetOperation;
import org.apache.datasketches.theta.Sketch;
import org.apache.datasketches.theta.Sketches;

View File

@ -19,8 +19,8 @@
package org.apache.druid.query.aggregation.datasketches.theta;
import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.theta.SetOperation;
import org.apache.datasketches.theta.Union;
import org.apache.druid.java.util.common.StringUtils;

View File

@ -19,7 +19,7 @@
package org.apache.druid.query.aggregation.datasketches.theta;
import org.apache.datasketches.Family;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.theta.SetOperation;
import org.apache.datasketches.theta.Union;
import org.apache.druid.java.util.common.StringUtils;

View File

@ -23,7 +23,7 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.datasketches.Family;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.theta.SetOperation;
import org.apache.datasketches.theta.Union;
import org.apache.druid.jackson.DefaultObjectMapper;

View File

@ -22,7 +22,7 @@ package org.apache.druid.query.aggregation.datasketches.theta;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.collect.ImmutableMap;
import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.datasketches.Family;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.theta.SetOperation;
import org.apache.datasketches.theta.Union;
import org.apache.druid.jackson.DefaultObjectMapper;

View File

@ -168,8 +168,8 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(40, ds.getN());
Assert.assertEquals(1.0, ds.getMinValue(), 0);
Assert.assertEquals(1.0, ds.getMaxValue(), 0);
Assert.assertEquals(1.0, ds.getMinItem(), 0);
Assert.assertEquals(1.0, ds.getMaxItem(), 0);
final String expectedSummary = "### HeapArrayOfDoublesCompactSketch SUMMARY: \n"
+ " Estimate : 40.0\n"
@ -283,8 +283,8 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling
Assert.assertTrue(quantilesObj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) quantilesObj;
Assert.assertEquals(40, ds.getN());
Assert.assertEquals(1.0, ds.getMinValue(), 0);
Assert.assertEquals(1.0, ds.getMaxValue(), 0);
Assert.assertEquals(1.0, ds.getMinItem(), 0);
Assert.assertEquals(1.0, ds.getMaxItem(), 0);
}
@Test
@ -369,8 +369,8 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(40, ds.getN());
Assert.assertEquals(1.0, ds.getMinValue(), 0);
Assert.assertEquals(1.0, ds.getMaxValue(), 0);
Assert.assertEquals(1.0, ds.getMinItem(), 0);
Assert.assertEquals(1.0, ds.getMaxItem(), 0);
}
@Test
@ -471,8 +471,8 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(40, ds.getN());
Assert.assertEquals(2.0, ds.getMinValue(), 0);
Assert.assertEquals(2.0, ds.getMaxValue(), 0);
Assert.assertEquals(2.0, ds.getMinItem(), 0);
Assert.assertEquals(2.0, ds.getMaxItem(), 0);
}
@Test
@ -574,8 +574,8 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(40, ds.getN());
Assert.assertEquals(2.0, ds.getMinValue(), 0);
Assert.assertEquals(2.0, ds.getMaxValue(), 0);
Assert.assertEquals(2.0, ds.getMinItem(), 0);
Assert.assertEquals(2.0, ds.getMaxItem(), 0);
}
@Test
@ -681,15 +681,15 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(NullHandling.replaceWithDefault() ? 40 : 30, ds.getN());
Assert.assertEquals(2.0, ds.getMinValue(), 0);
Assert.assertEquals(2.0, ds.getMaxValue(), 0);
Assert.assertEquals(2.0, ds.getMinItem(), 0);
Assert.assertEquals(2.0, ds.getMaxItem(), 0);
Object objSketch2 = row.get(7); // quantiles-sketch-with-nulls
Assert.assertTrue(objSketch2 instanceof DoublesSketch);
DoublesSketch ds2 = (DoublesSketch) objSketch2;
Assert.assertEquals(NullHandling.replaceWithDefault() ? 40 : 30, ds2.getN());
Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.0 : 3.0, ds2.getMinValue(), 0);
Assert.assertEquals(3.0, ds2.getMaxValue(), 0);
Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.0 : 3.0, ds2.getMinItem(), 0);
Assert.assertEquals(3.0, ds2.getMaxItem(), 0);
}
@Test
@ -776,8 +776,8 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(40, ds.getN());
Assert.assertEquals(1.0, ds.getMinValue(), 0);
Assert.assertEquals(1.0, ds.getMaxValue(), 0);
Assert.assertEquals(1.0, ds.getMinItem(), 0);
Assert.assertEquals(1.0, ds.getMaxItem(), 0);
}
@Test
@ -864,8 +864,8 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(40, ds.getN());
Assert.assertEquals(1.0, ds.getMinValue(), 0);
Assert.assertEquals(1.0, ds.getMaxValue(), 0);
Assert.assertEquals(1.0, ds.getMinItem(), 0);
Assert.assertEquals(1.0, ds.getMaxItem(), 0);
}
@Test
@ -951,8 +951,8 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(40, ds.getN());
Assert.assertEquals(1.0, ds.getMinValue(), 0);
Assert.assertEquals(1.0, ds.getMaxValue(), 0);
Assert.assertEquals(1.0, ds.getMinItem(), 0);
Assert.assertEquals(1.0, ds.getMaxItem(), 0);
}
@Test
@ -1038,8 +1038,8 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(40, ds.getN());
Assert.assertEquals(1.0, ds.getMinValue(), 0);
Assert.assertEquals(1.0, ds.getMaxValue(), 0);
Assert.assertEquals(1.0, ds.getMinItem(), 0);
Assert.assertEquals(1.0, ds.getMaxItem(), 0);
}
// Two buckets with statistically significant difference.
@ -1227,22 +1227,22 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(NullHandling.replaceWithDefault() ? 40 : 30, ds.getN());
Assert.assertEquals(2.0, ds.getMinValue(), 0);
Assert.assertEquals(2.0, ds.getMaxValue(), 0);
Assert.assertEquals(2.0, ds.getMinItem(), 0);
Assert.assertEquals(2.0, ds.getMaxItem(), 0);
Object objSketch2 = row.get(9); // quantiles-sketch-with-nulls
Assert.assertTrue(objSketch2 instanceof DoublesSketch);
DoublesSketch ds2 = (DoublesSketch) objSketch2;
Assert.assertEquals(NullHandling.replaceWithDefault() ? 40 : 30, ds2.getN());
Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.0 : 3.0, ds2.getMinValue(), 0);
Assert.assertEquals(3.0, ds2.getMaxValue(), 0);
Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.0 : 3.0, ds2.getMinItem(), 0);
Assert.assertEquals(3.0, ds2.getMaxItem(), 0);
Object objSketch3 = row.get(10); // quantiles-sketch-no-nulls
Assert.assertTrue(objSketch3 instanceof DoublesSketch);
DoublesSketch ds3 = (DoublesSketch) objSketch3;
Assert.assertEquals(40, ds3.getN());
Assert.assertEquals(0.0, ds3.getMinValue(), 0);
Assert.assertEquals(3.0, ds3.getMaxValue(), 0);
Assert.assertEquals(0.0, ds3.getMinItem(), 0);
Assert.assertEquals(3.0, ds3.getMaxItem(), 0);
}

View File

@ -76,6 +76,7 @@ public class QuantilesSketchKeyCollector implements KeyCollector<QuantilesSketch
public void addAll(QuantilesSketchKeyCollector other)
{
final ItemsUnion<byte[]> union = ItemsUnion.getInstance(
byte[].class,
Math.max(sketch.getK(), other.sketch.getK()),
comparator
);
@ -84,8 +85,8 @@ public class QuantilesSketchKeyCollector implements KeyCollector<QuantilesSketch
double otherBytesCount = other.averageKeyLength * other.getSketch().getN();
averageKeyLength = ((sketchBytesCount + otherBytesCount) / (sketch.getN() + other.sketch.getN()));
union.update(sketch);
union.update(other.sketch);
union.union(sketch);
union.union(other.sketch);
sketch = union.getResultAndReset();
}
@ -110,7 +111,7 @@ public class QuantilesSketchKeyCollector implements KeyCollector<QuantilesSketch
@Override
public int estimatedRetainedKeys()
{
return sketch.getRetainedItems();
return sketch.getNumRetained();
}
@Override
@ -129,13 +130,10 @@ public class QuantilesSketchKeyCollector implements KeyCollector<QuantilesSketch
@Override
public RowKey minKey()
{
final byte[] minValue = sketch.getMinValue();
if (minValue != null) {
return RowKey.wrap(minValue);
} else {
if (sketch.isEmpty()) {
throw new NoSuchElementException();
}
return RowKey.wrap(sketch.getMinItem());
}
@Override
@ -151,8 +149,7 @@ public class QuantilesSketchKeyCollector implements KeyCollector<QuantilesSketch
final int numPartitions = Ints.checkedCast(LongMath.divide(sketch.getN(), targetWeight, RoundingMode.CEILING));
// numPartitions + 1, because the final quantile is the max, and we won't build a partition based on that.
final byte[][] quantiles = sketch.getQuantiles(numPartitions + 1);
final byte[][] quantiles = (sketch.getPartitionBoundaries(numPartitions)).boundaries;
final List<ClusterByPartition> partitions = new ArrayList<>();
for (int i = 0; i < numPartitions; i++) {

View File

@ -23,7 +23,7 @@ import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JsonDeserializer;
import com.google.common.annotations.VisibleForTesting;
import org.apache.datasketches.ArrayOfItemsSerDe;
import org.apache.datasketches.common.ArrayOfItemsSerDe;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.quantiles.ItemsSketch;
@ -56,7 +56,7 @@ public class QuantilesSketchKeyCollectorFactory
@Override
public QuantilesSketchKeyCollector newKeyCollector()
{
return new QuantilesSketchKeyCollector(comparator, ItemsSketch.getInstance(SKETCH_INITIAL_K, comparator), 0);
return new QuantilesSketchKeyCollector(comparator, ItemsSketch.getInstance(byte[].class, SKETCH_INITIAL_K, comparator), 0);
}
@Override
@ -87,7 +87,7 @@ public class QuantilesSketchKeyCollectorFactory
final String encodedSketch = snapshot.getEncodedSketch();
final byte[] bytes = StringUtils.decodeBase64String(encodedSketch);
final ItemsSketch<byte[]> sketch =
ItemsSketch.getInstance(Memory.wrap(bytes), comparator, ByteRowKeySerde.INSTANCE);
ItemsSketch.getInstance(byte[].class, Memory.wrap(bytes), comparator, ByteRowKeySerde.INSTANCE);
return new QuantilesSketchKeyCollector(comparator, sketch, snapshot.getAverageKeyLength());
}

View File

@ -19,8 +19,8 @@
package org.apache.druid.indexing.common.task.batch.parallel.distribution;
import org.apache.datasketches.ArrayOfItemsSerDe;
import org.apache.datasketches.ArrayOfStringsSerDe;
import org.apache.datasketches.common.ArrayOfItemsSerDe;
import org.apache.datasketches.common.ArrayOfStringsSerDe;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.memory.internal.UnsafeUtil;

View File

@ -19,9 +19,9 @@
package org.apache.druid.indexing.common.task.batch.parallel.distribution;
import org.apache.datasketches.ArrayOfItemsSerDe;
import org.apache.datasketches.ArrayOfStringsSerDe;
import org.apache.datasketches.Util;
import org.apache.datasketches.common.ArrayOfItemsSerDe;
import org.apache.datasketches.common.ArrayOfStringsSerDe;
import org.apache.datasketches.common.Util;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
import org.apache.druid.data.input.StringTuple;

View File

@ -56,7 +56,7 @@ public class StringSketch implements StringDistribution
public StringSketch()
{
this(ItemsSketch.getInstance(SKETCH_K, STRING_TUPLE_COMPARATOR));
this(ItemsSketch.getInstance(StringTuple.class, SKETCH_K, STRING_TUPLE_COMPARATOR));
}
StringSketch(ItemsSketch<StringTuple> sketch)
@ -73,8 +73,7 @@ public class StringSketch implements StringDistribution
@Override
public void putIfNewMin(StringTuple value)
{
StringTuple min = delegate.getMinValue();
if (min == null || min.compareTo(value) > 0) {
if (delegate.isEmpty() || delegate.getMinItem().compareTo(value) > 0) {
delegate.update(value);
}
}
@ -82,8 +81,7 @@ public class StringSketch implements StringDistribution
@Override
public void putIfNewMax(StringTuple value)
{
StringTuple max = delegate.getMaxValue();
if (max == null || max.compareTo(value) < 0) {
if (delegate.isEmpty() || delegate.getMaxItem().compareTo(value) < 0) {
delegate.update(value);
}
}
@ -111,13 +109,13 @@ public class StringSketch implements StringDistribution
@VisibleForTesting
public StringTuple getMin()
{
return delegate.getMinValue();
return delegate.getMinItem();
}
@VisibleForTesting
public StringTuple getMax()
{
return delegate.getMaxValue();
return delegate.getMaxItem();
}
private PartitionBoundaries getEvenPartitionsByCount(int evenPartitionCount)
@ -127,8 +125,10 @@ public class StringSketch implements StringDistribution
"evenPartitionCount must be positive but is %s",
evenPartitionCount
);
StringTuple[] partitions = delegate.getQuantiles(evenPartitionCount + 1); // add 1 since this returns endpoints
return new PartitionBoundaries((partitions == null) ? new StringTuple[0] : partitions);
if (delegate.isEmpty()) {
return new PartitionBoundaries(new StringTuple[0]);
}
return new PartitionBoundaries((delegate.getPartitionBoundaries(evenPartitionCount)).boundaries);
}
@Override
@ -157,8 +157,8 @@ public class StringSketch implements StringDistribution
// check is best-effort as it is possible for it to return true for sketches that contain different values.
return delegate.getK() == that.delegate.getK() &&
delegate.getN() == that.delegate.getN() &&
Objects.equals(delegate.getMaxValue(), that.delegate.getMaxValue()) &&
Objects.equals(delegate.getMinValue(), that.delegate.getMinValue());
Objects.equals(delegate.getMaxItem(), that.delegate.getMaxItem()) &&
Objects.equals(delegate.getMinItem(), that.delegate.getMinItem());
}
@Override
@ -168,8 +168,8 @@ public class StringSketch implements StringDistribution
return Objects.hash(
delegate.getK(),
delegate.getN(),
delegate.getMaxValue(),
delegate.getMinValue()
delegate.getMaxItem(),
delegate.getMinItem()
);
}
@ -232,6 +232,7 @@ public class StringSketch implements StringDistribution
JsonNode jsonNode = jsonParser.getCodec().readTree(jsonParser);
byte[] sketchBytes = jsonNode.get(FIELD_SKETCH).binaryValue();
ItemsSketch<StringTuple> sketch = ItemsSketch.getInstance(
StringTuple.class,
Memory.wrap(sketchBytes),
STRING_TUPLE_COMPARATOR,
ARRAY_OF_STRINGS_SERDE

View File

@ -31,7 +31,7 @@ public class StringSketchMerger implements StringDistributionMerger
public StringSketchMerger()
{
delegate = ItemsUnion.getInstance(StringSketch.SKETCH_K, StringSketch.STRING_TUPLE_COMPARATOR);
delegate = ItemsUnion.getInstance(StringTuple.class, StringSketch.SKETCH_K, StringSketch.STRING_TUPLE_COMPARATOR);
}
@Override
@ -42,7 +42,7 @@ public class StringSketchMerger implements StringDistributionMerger
}
StringSketch stringSketch = (StringSketch) stringDistribution;
delegate.update(stringSketch.getDelegate());
delegate.union(stringSketch.getDelegate());
}
@Override

View File

@ -105,13 +105,13 @@ public class StringSketchTest
target.putIfNewMin(value);
Assert.assertEquals(1, getCount());
Assert.assertEquals(value, target.getDelegate().getMinValue());
Assert.assertEquals(value, target.getDelegate().getMaxValue());
Assert.assertEquals(value, target.getDelegate().getMinItem());
Assert.assertEquals(value, target.getDelegate().getMaxItem());
target.putIfNewMin(MIN_STRING);
Assert.assertEquals(2, getCount());
Assert.assertEquals(MIN_STRING, target.getDelegate().getMinValue());
Assert.assertEquals(MAX_STRING, target.getDelegate().getMaxValue());
Assert.assertEquals(MIN_STRING, target.getDelegate().getMinItem());
Assert.assertEquals(MAX_STRING, target.getDelegate().getMaxItem());
}
@Test
@ -125,13 +125,13 @@ public class StringSketchTest
target.putIfNewMax(value);
Assert.assertEquals(1, getCount());
Assert.assertEquals(value, target.getDelegate().getMinValue());
Assert.assertEquals(value, target.getDelegate().getMaxValue());
Assert.assertEquals(value, target.getDelegate().getMinItem());
Assert.assertEquals(value, target.getDelegate().getMaxItem());
target.putIfNewMax(MAX_STRING);
Assert.assertEquals(2, getCount());
Assert.assertEquals(MIN_STRING, target.getDelegate().getMinValue());
Assert.assertEquals(MAX_STRING, target.getDelegate().getMaxValue());
Assert.assertEquals(MIN_STRING, target.getDelegate().getMinItem());
Assert.assertEquals(MAX_STRING, target.getDelegate().getMaxItem());
}
private long getCount()

View File

@ -3784,7 +3784,7 @@ name: DataSketches
license_category: binary
module: java-core
license_name: Apache License version 2.0
version: 3.1.0
version: 4.0.0
libraries:
- org.apache.datasketches: datasketches-java
@ -3794,7 +3794,7 @@ name: DataSketches
license_category: binary
module: java-core
license_name: Apache License version 2.0
version: 2.0.0
version: 2.2.0
libraries:
- org.apache.datasketches: datasketches-memory

View File

@ -87,8 +87,8 @@
Also, CalcitePlanner is a clone of Calcite's PlannerImpl and may require updates when
Calcite is upgrade. -->
<calcite.version>1.21.0</calcite.version>
<datasketches.version>3.2.0</datasketches.version>
<datasketches.memory.version>2.0.0</datasketches.memory.version>
<datasketches.version>4.0.0</datasketches.version>
<datasketches.memory.version>2.2.0</datasketches.memory.version>
<derby.version>10.14.2.0</derby.version>
<dropwizard.metrics.version>4.0.0</dropwizard.metrics.version>
<errorprone.version>2.11.0</errorprone.version>