From b153cb2342cd5f33c79c063db3ae49ebaf498bb1 Mon Sep 17 00:00:00 2001 From: imply-cheddar <86940447+imply-cheddar@users.noreply.github.com> Date: Wed, 12 Jan 2022 06:04:11 +0900 Subject: [PATCH] Add a small LRU cache and use utf8 bytes in ArrayOfDoubles (#12130) * Add a small LRU cache and use utf8 bytes in ArrayOfDoubles * Add tests for extra branches * Even more tests for branch coverage * Fix Style --- .../ArrayOfDoublesSketchBuildAggregator.java | 51 ++- ...yOfDoublesSketchBuildBufferAggregator.java | 51 ++- .../ArrayOfDoublesSketchAggregationTest.java | 375 +++++++++++++++++- .../tuple/array_of_doubles_build_data.tsv | 80 ++-- ...uild_data_two_values_and_key_as_number.tsv | 40 ++ 5 files changed, 546 insertions(+), 51 deletions(-) create mode 100644 extensions-core/datasketches/src/test/resources/tuple/array_of_doubles_build_data_two_values_and_key_as_number.tsv diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildAggregator.java index 3ee6e25d852..7ca1061889d 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildAggregator.java @@ -28,7 +28,10 @@ import org.apache.druid.segment.data.IndexedInts; import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; /** * This aggregator builds sketches from raw data. @@ -45,6 +48,17 @@ public class ArrayOfDoublesSketchBuildAggregator implements Aggregator @Nullable private ArrayOfDoublesUpdatableSketch sketch; + private final boolean canLookupUtf8; + private final boolean canCacheById; + private final LinkedHashMap stringCache = new LinkedHashMap() + { + @Override + protected boolean removeEldestEntry(Map.Entry eldest) + { + return size() >= 10; + } + }; + public ArrayOfDoublesSketchBuildAggregator( final DimensionSelector keySelector, final List valueSelectors, @@ -55,7 +69,10 @@ public class ArrayOfDoublesSketchBuildAggregator implements Aggregator this.valueSelectors = valueSelectors.toArray(new BaseDoubleColumnValueSelector[0]); values = new double[valueSelectors.size()]; sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalEntries) - .setNumberOfValues(valueSelectors.size()).build(); + .setNumberOfValues(valueSelectors.size()).build(); + + this.canCacheById = this.keySelector.nameLookupPossibleInAdvance(); + this.canLookupUtf8 = this.keySelector.supportsLookupNameUtf8(); } /** @@ -75,9 +92,35 @@ public class ArrayOfDoublesSketchBuildAggregator implements Aggregator } } synchronized (this) { - for (int i = 0, keysSize = keys.size(); i < keysSize; i++) { - final String key = keySelector.lookupName(keys.get(i)); - sketch.update(key, values); + if (canLookupUtf8) { + for (int i = 0, keysSize = keys.size(); i < keysSize; i++) { + final ByteBuffer key; + if (canCacheById) { + key = (ByteBuffer) stringCache.computeIfAbsent(keys.get(i), keySelector::lookupNameUtf8); + } else { + key = keySelector.lookupNameUtf8(keys.get(i)); + } + + if (key != null) { + byte[] bytes = new byte[key.remaining()]; + key.mark(); + key.get(bytes); + key.reset(); + + sketch.update(bytes, values); + } + } + } else { + for (int i = 0, keysSize = keys.size(); i < keysSize; i++) { + final String key; + if (canCacheById) { + key = (String) stringCache.computeIfAbsent(keys.get(i), keySelector::lookupName); + } else { + key = keySelector.lookupName(keys.get(i)); + } + + sketch.update(key, values); + } } } } diff --git a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildBufferAggregator.java b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildBufferAggregator.java index 3e8122eeb75..18906d12936 100644 --- a/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildBufferAggregator.java +++ b/extensions-core/datasketches/src/main/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchBuildBufferAggregator.java @@ -32,7 +32,9 @@ import org.apache.druid.segment.data.IndexedInts; import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; /** * This aggregator builds sketches from raw data. @@ -48,6 +50,18 @@ public class ArrayOfDoublesSketchBuildBufferAggregator implements BufferAggregat @Nullable private double[] values; // not part of the state, but to reuse in aggregate() method + + private final boolean canLookupUtf8; + private final boolean canCacheById; + private final LinkedHashMap stringCache = new LinkedHashMap() + { + @Override + protected boolean removeEldestEntry(Map.Entry eldest) + { + return size() >= 10; + } + }; + public ArrayOfDoublesSketchBuildBufferAggregator( final DimensionSelector keySelector, final List valueSelectors, @@ -60,6 +74,9 @@ public class ArrayOfDoublesSketchBuildBufferAggregator implements BufferAggregat this.nominalEntries = nominalEntries; this.maxIntermediateSize = maxIntermediateSize; values = new double[valueSelectors.size()]; + + this.canCacheById = this.keySelector.nameLookupPossibleInAdvance(); + this.canLookupUtf8 = this.keySelector.supportsLookupNameUtf8(); } @Override @@ -82,16 +99,42 @@ public class ArrayOfDoublesSketchBuildBufferAggregator implements BufferAggregat values[i] = valueSelectors[i].getDouble(); } } - final IndexedInts keys = keySelector.getRow(); // Wrapping memory and ArrayOfDoublesSketch is inexpensive compared to sketch operations. // Maintaining a cache of wrapped objects per buffer position like in Theta sketch aggregator // might might be considered, but it would increase complexity including relocate() support. final WritableMemory mem = WritableMemory.writableWrap(buf, ByteOrder.LITTLE_ENDIAN); final WritableMemory region = mem.writableRegion(position, maxIntermediateSize); final ArrayOfDoublesUpdatableSketch sketch = ArrayOfDoublesSketches.wrapUpdatableSketch(region); - for (int i = 0, keysSize = keys.size(); i < keysSize; i++) { - final String key = keySelector.lookupName(keys.get(i)); - sketch.update(key, values); + final IndexedInts keys = keySelector.getRow(); + if (canLookupUtf8) { + for (int i = 0, keysSize = keys.size(); i < keysSize; i++) { + final ByteBuffer key; + if (canCacheById) { + key = (ByteBuffer) stringCache.computeIfAbsent(keys.get(i), keySelector::lookupNameUtf8); + } else { + key = keySelector.lookupNameUtf8(keys.get(i)); + } + + if (key != null) { + byte[] bytes = new byte[key.remaining()]; + key.mark(); + key.get(bytes); + key.reset(); + + sketch.update(bytes, values); + } + } + } else { + for (int i = 0, keysSize = keys.size(); i < keysSize; i++) { + final String key; + if (canCacheById) { + key = (String) stringCache.computeIfAbsent(keys.get(i), keySelector::lookupName); + } else { + key = keySelector.lookupName(keys.get(i)); + } + + sketch.update(key, values); + } } } diff --git a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchAggregationTest.java b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchAggregationTest.java index b1e3e59f680..7c37097860c 100644 --- a/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchAggregationTest.java +++ b/extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/ArrayOfDoublesSketchAggregationTest.java @@ -24,10 +24,12 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.initialization.DruidModule; import org.apache.druid.java.util.common.granularity.Granularities; import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Result; import org.apache.druid.query.aggregation.AggregationTestHelper; import org.apache.druid.query.groupby.GroupByQueryConfig; import org.apache.druid.query.groupby.GroupByQueryRunnerTest; import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.timeseries.TimeseriesResultValue; import org.apache.druid.testing.InitializedNullHandlingTest; import org.junit.After; import org.junit.Assert; @@ -49,6 +51,7 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); private final AggregationTestHelper helper; + private final AggregationTestHelper tsHelper; public ArrayOfDoublesSketchAggregationTest(final GroupByQueryConfig config) { @@ -56,6 +59,7 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling module.configure(null); helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( module.getJacksonModules(), config, tempFolder); + tsHelper = AggregationTestHelper.createTimeseriesQueryAggregationTestHelper(module.getJacksonModules(), tempFolder); } @Parameterized.Parameters(name = "{0}") @@ -298,7 +302,7 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling " \"dimensionExclusions\": [],", " \"spatialDimensions\": []", " },", - " \"columns\": [\"timestamp\", \"product\", \"key\", \"value\"]", + " \"columns\": [\"timestamp\", \"product\", \"key\", \"key_num\", \"value\"]", " }", "}" ), @@ -469,6 +473,109 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling Assert.assertEquals(2.0, ds.getMaxValue(), 0); } + @Test + public void buildingSketchesAtIngestionTimeTwoValuesAndNumericalKey() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File( + this.getClass().getClassLoader().getResource( + "tuple/array_of_doubles_build_data_two_values_and_key_as_number.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\", {\"type\": \"long\", \"name\": \"key_num\"}],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"product\", \"key\", \"key_num\", \"value1\", \"value2\"]", + " }", + "}" + ), + String.join( + "\n", + "[", + " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key_num\", \"metricColumns\": [ \"value1\", \"value2\" ], \"nominalEntries\": 1024}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 2}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"column\": 2, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"union\",", + " \"operation\": \"UNION\",", + " \"nominalEntries\": 1024,", + " \"numberOfValues\": 2,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"intersection\",", + " \"operation\": \"INTERSECT\",", + " \"nominalEntries\": 1024,", + " \"numberOfValues\": 2,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"anotb\",", + " \"operation\": \"NOT\",", + " \"nominalEntries\": 1024,", + " \"numberOfValues\": 2,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }},", + " {", + " \"type\": \"arrayOfDoublesSketchToMeans\",", + " \"name\": \"means\",", + " \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}", + " }", + " ],", + " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + ResultRow row = results.get(0); + Assert.assertEquals("sketch", 40.0, (double) row.get(0), 0); + Assert.assertEquals("estimate", 40.0, (double) row.get(1), 0); + Assert.assertEquals("union", 40.0, (double) row.get(3), 0); + Assert.assertEquals("intersection", 40.0, (double) row.get(4), 0); + Assert.assertEquals("anotb", 0, (double) row.get(5), 0); + + Object meansObj = row.get(6); // means + Assert.assertTrue(meansObj instanceof double[]); + double[] means = (double[]) meansObj; + Assert.assertEquals(2, means.length); + Assert.assertEquals(1.0, means[0], 0); + Assert.assertEquals(2.0, means[1], 0); + + Object obj = row.get(2); // quantiles-sketch + Assert.assertTrue(obj instanceof DoublesSketch); + DoublesSketch ds = (DoublesSketch) obj; + Assert.assertEquals(40, ds.getN()); + Assert.assertEquals(2.0, ds.getMinValue(), 0); + Assert.assertEquals(2.0, ds.getMaxValue(), 0); + } + @Test public void buildingSketchesAtIngestionTimeThreeValuesAndNulls() throws Exception { @@ -596,11 +703,11 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling " \"format\": \"tsv\",", " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", " \"dimensionsSpec\": {", - " \"dimensions\": [\"product\", \"key\"],", + " \"dimensions\": [\"product\", \"key\", {\"type\": \"long\", \"name\": \"key_num\"}],", " \"dimensionExclusions\": [],", " \"spatialDimensions\": []", " },", - " \"columns\": [\"timestamp\", \"product\", \"key\", \"value\"]", + " \"columns\": [\"timestamp\", \"product\", \"key\", \"key_num\", \"value\"]", " }", "}" ), @@ -671,6 +778,268 @@ public class ArrayOfDoublesSketchAggregationTest extends InitializedNullHandling Assert.assertEquals(1.0, ds.getMaxValue(), 0); } + @Test + public void buildingSketchesAtQueryTimeUseNumerical() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\", \"key\", {\"type\": \"long\", \"name\": \"key_num\"}],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"product\", \"key\", \"key_num\", \"value\"]", + " }", + "}" + ), + String.join( + "\n", + "[", + " {\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 40, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key_num\", \"metricColumns\": [\"value\"], \"nominalEntries\": 1024},", + " {\"type\": \"count\", \"name\":\"cnt\"}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"union\",", + " \"operation\": \"UNION\",", + " \"nominalEntries\": 1024,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"intersection\",", + " \"operation\": \"INTERSECT\",", + " \"nominalEntries\": 1024,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"anotb\",", + " \"operation\": \"NOT\",", + " \"nominalEntries\": 1024,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }}", + " ],", + " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + ResultRow row = results.get(0); + Assert.assertEquals("cnt", 40.0, new Double(row.get(1).toString()), 0); + Assert.assertEquals("sketch", 40.0, (double) row.get(0), 0); + Assert.assertEquals("estimate", 40.0, new Double(row.get(2).toString()), 0); + Assert.assertEquals("union", 40.0, new Double(row.get(4).toString()), 0); + Assert.assertEquals("intersection", 40.0, new Double(row.get(5).toString()), 0); + Assert.assertEquals("anotb", 0, new Double(row.get(6).toString()), 0); + + Object obj = row.get(3); // quantiles-sketch + Assert.assertTrue(obj instanceof DoublesSketch); + DoublesSketch ds = (DoublesSketch) obj; + Assert.assertEquals(40, ds.getN()); + Assert.assertEquals(1.0, ds.getMinValue(), 0); + Assert.assertEquals(1.0, ds.getMaxValue(), 0); + } + + @Test + public void buildingSketchesAtQueryTimeTimeseries() throws Exception + { + Sequence> seq = tsHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\", \"key\", {\"type\": \"long\", \"name\": \"key_num\"}],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"product\", \"key\", \"key_num\", \"value\"]", + " }", + "}" + ), + String.join( + "\n", + "[", + " {\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 40, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [\"value\"], \"nominalEntries\": 1024},", + " {\"type\": \"count\", \"name\":\"cnt\"}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"union\",", + " \"operation\": \"UNION\",", + " \"nominalEntries\": 1024,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"intersection\",", + " \"operation\": \"INTERSECT\",", + " \"nominalEntries\": 1024,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"anotb\",", + " \"operation\": \"NOT\",", + " \"nominalEntries\": 1024,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }}", + " ],", + " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List> results = seq.toList(); + Assert.assertEquals(1, results.size()); + TimeseriesResultValue row = results.get(0).getValue(); + Assert.assertEquals("cnt", 40.0, row.getDoubleMetric("cnt"), 0); + Assert.assertEquals("sketch", 40.0, row.getDoubleMetric("sketch"), 0); + Assert.assertEquals("estimate", 40.0, row.getDoubleMetric("estimate"), 0); + Assert.assertEquals("union", 40.0, row.getDoubleMetric("union"), 0); + Assert.assertEquals("intersection", 40.0, row.getDoubleMetric("intersection"), 0); + Assert.assertEquals("anotb", 0, row.getDoubleMetric("anotb"), 0); + + Object obj = row.getMetric("quantiles-sketch"); // quantiles-sketch + Assert.assertTrue(obj instanceof DoublesSketch); + DoublesSketch ds = (DoublesSketch) obj; + Assert.assertEquals(40, ds.getN()); + Assert.assertEquals(1.0, ds.getMinValue(), 0); + Assert.assertEquals(1.0, ds.getMaxValue(), 0); + } + + @Test + public void buildingSketchesAtQueryTimeUsingNumericalTimeseries() throws Exception + { + Sequence> seq = tsHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\", \"key\", {\"type\": \"long\", \"name\": \"key_num\"}],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"product\", \"key\", \"key_num\", \"value\"]", + " }", + "}" + ), + String.join( + "\n", + "[", + " {\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 40, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key_num\", \"metricColumns\": [\"value\"], \"nominalEntries\": 1024},", + " {\"type\": \"count\", \"name\":\"cnt\"}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"union\",", + " \"operation\": \"UNION\",", + " \"nominalEntries\": 1024,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"intersection\",", + " \"operation\": \"INTERSECT\",", + " \"nominalEntries\": 1024,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }},", + " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", + " \"type\": \"arrayOfDoublesSketchSetOp\",", + " \"name\": \"anotb\",", + " \"operation\": \"NOT\",", + " \"nominalEntries\": 1024,", + " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", + " }}", + " ],", + " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List> results = seq.toList(); + Assert.assertEquals(1, results.size()); + TimeseriesResultValue row = results.get(0).getValue(); + Assert.assertEquals("cnt", 40.0, row.getDoubleMetric("cnt"), 0); + Assert.assertEquals("sketch", 40.0, row.getDoubleMetric("sketch"), 0); + Assert.assertEquals("estimate", 40.0, row.getDoubleMetric("estimate"), 0); + Assert.assertEquals("union", 40.0, row.getDoubleMetric("union"), 0); + Assert.assertEquals("intersection", 40.0, row.getDoubleMetric("intersection"), 0); + Assert.assertEquals("anotb", 0, row.getDoubleMetric("anotb"), 0); + + Object obj = row.getMetric("quantiles-sketch"); // quantiles-sketch + Assert.assertTrue(obj instanceof DoublesSketch); + DoublesSketch ds = (DoublesSketch) obj; + Assert.assertEquals(40, ds.getN()); + Assert.assertEquals(1.0, ds.getMinValue(), 0); + Assert.assertEquals(1.0, ds.getMaxValue(), 0); + } + // Two buckets with statistically significant difference. // See GenerateTestData class for details. @Test diff --git a/extensions-core/datasketches/src/test/resources/tuple/array_of_doubles_build_data.tsv b/extensions-core/datasketches/src/test/resources/tuple/array_of_doubles_build_data.tsv index 3ea6df0697a..e34280de5ae 100644 --- a/extensions-core/datasketches/src/test/resources/tuple/array_of_doubles_build_data.tsv +++ b/extensions-core/datasketches/src/test/resources/tuple/array_of_doubles_build_data.tsv @@ -1,40 +1,40 @@ -2015010101 product_2 key1 1.0 -2015010101 product_3 key2 1.0 -2015010101 product_8 key3 1.0 -2015010101 product_1 key4 1.0 -2015010101 product_1 key5 1.0 -2015010101 product_7 key6 1.0 -2015010101 product_5 key7 1.0 -2015010101 product_4 key8 1.0 -2015010101 product_3 key9 1.0 -2015010101 product_6 key10 1.0 -2015010101 product_5 key11 1.0 -2015010101 product_6 key12 1.0 -2015010101 product_6 key13 1.0 -2015010101 product_6 key14 1.0 -2015010101 product_6 key15 1.0 -2015010101 product_6 key16 1.0 -2015010101 product_3 key17 1.0 -2015010101 product_1 key18 1.0 -2015010101 product_2 key19 1.0 -2015010101 product_10 key20 1.0 -2015010101 product_2 key21 1.0 -2015010101 product_3 key22 1.0 -2015010101 product_8 key23 1.0 -2015010101 product_1 key24 1.0 -2015010101 product_1 key25 1.0 -2015010101 product_7 key26 1.0 -2015010101 product_5 key27 1.0 -2015010101 product_4 key28 1.0 -2015010101 product_3 key29 1.0 -2015010101 product_6 key30 1.0 -2015010101 product_5 key31 1.0 -2015010101 product_6 key32 1.0 -2015010101 product_6 key33 1.0 -2015010101 product_6 key34 1.0 -2015010101 product_6 key35 1.0 -2015010101 product_6 key36 1.0 -2015010101 product_3 key37 1.0 -2015010101 product_1 key38 1.0 -2015010101 product_2 key39 1.0 -2015010101 product_10 key40 1.0 +2015010101 product_2 key1 1 1.0 +2015010101 product_3 key2 2 1.0 +2015010101 product_8 key3 3 1.0 +2015010101 product_1 key4 4 1.0 +2015010101 product_1 key5 5 1.0 +2015010101 product_7 key6 6 1.0 +2015010101 product_5 key7 7 1.0 +2015010101 product_4 key8 8 1.0 +2015010101 product_3 key9 9 1.0 +2015010101 product_6 key10 10 1.0 +2015010101 product_5 key11 11 1.0 +2015010101 product_6 key12 12 1.0 +2015010101 product_6 key13 13 1.0 +2015010101 product_6 key14 14 1.0 +2015010101 product_6 key15 15 1.0 +2015010101 product_6 key16 16 1.0 +2015010101 product_3 key17 17 1.0 +2015010101 product_1 key18 18 1.0 +2015010101 product_2 key19 19 1.0 +2015010101 product_10 key20 20 1.0 +2015010101 product_2 key21 21 1.0 +2015010101 product_3 key22 22 1.0 +2015010101 product_8 key23 23 1.0 +2015010101 product_1 key24 24 1.0 +2015010101 product_1 key25 25 1.0 +2015010101 product_7 key26 26 1.0 +2015010101 product_5 key27 27 1.0 +2015010101 product_4 key28 28 1.0 +2015010101 product_3 key29 29 1.0 +2015010101 product_6 key30 30 1.0 +2015010101 product_5 key31 31 1.0 +2015010101 product_6 key32 32 1.0 +2015010101 product_6 key33 33 1.0 +2015010101 product_6 key34 34 1.0 +2015010101 product_6 key35 35 1.0 +2015010101 product_6 key36 36 1.0 +2015010101 product_3 key37 37 1.0 +2015010101 product_1 key38 38 1.0 +2015010101 product_2 key39 39 1.0 +2015010101 product_10 key40 40 1.0 diff --git a/extensions-core/datasketches/src/test/resources/tuple/array_of_doubles_build_data_two_values_and_key_as_number.tsv b/extensions-core/datasketches/src/test/resources/tuple/array_of_doubles_build_data_two_values_and_key_as_number.tsv new file mode 100644 index 00000000000..eaa9bc99a4e --- /dev/null +++ b/extensions-core/datasketches/src/test/resources/tuple/array_of_doubles_build_data_two_values_and_key_as_number.tsv @@ -0,0 +1,40 @@ +2015010101 product_2 key1 1 1.0 2.0 +2015010101 product_3 key2 2 1.0 2.0 +2015010101 product_8 key3 3 1.0 2.0 +2015010101 product_1 key4 4 1.0 2.0 +2015010101 product_1 key5 5 1.0 2.0 +2015010101 product_7 key6 6 1.0 2.0 +2015010101 product_5 key7 7 1.0 2.0 +2015010101 product_4 key8 8 1.0 2.0 +2015010101 product_3 key9 9 1.0 2.0 +2015010101 product_6 key10 10 1.0 2.0 +2015010101 product_5 key11 11 1.0 2.0 +2015010101 product_6 key12 12 1.0 2.0 +2015010101 product_6 key13 13 1.0 2.0 +2015010101 product_6 key14 14 1.0 2.0 +2015010101 product_6 key15 15 1.0 2.0 +2015010101 product_6 key16 16 1.0 2.0 +2015010101 product_3 key17 17 1.0 2.0 +2015010101 product_1 key18 18 1.0 2.0 +2015010101 product_2 key19 19 1.0 2.0 +2015010101 product_10 key20 20 1.0 2.0 +2015010101 product_2 key21 21 1.0 2.0 +2015010101 product_3 key22 22 1.0 2.0 +2015010101 product_8 key23 23 1.0 2.0 +2015010101 product_1 key24 24 1.0 2.0 +2015010101 product_1 key25 25 1.0 2.0 +2015010101 product_7 key26 26 1.0 2.0 +2015010101 product_5 key27 27 1.0 2.0 +2015010101 product_4 key28 28 1.0 2.0 +2015010101 product_3 key29 29 1.0 2.0 +2015010101 product_6 key30 30 1.0 2.0 +2015010101 product_5 key31 31 1.0 2.0 +2015010101 product_6 key32 32 1.0 2.0 +2015010101 product_6 key33 33 1.0 2.0 +2015010101 product_6 key34 34 1.0 2.0 +2015010101 product_6 key35 35 1.0 2.0 +2015010101 product_6 key36 36 1.0 2.0 +2015010101 product_3 key37 37 1.0 2.0 +2015010101 product_1 key38 38 1.0 2.0 +2015010101 product_2 key39 39 1.0 2.0 +2015010101 product_10 key40 40 1.0 2.0