From 3097562adf82dae195d258214467949308e0c3fe Mon Sep 17 00:00:00 2001 From: Furkan KAMACI Date: Thu, 7 Feb 2019 22:51:07 +0300 Subject: [PATCH 01/25] Improper getter value is fixed. (#6930) * Improper getter value is fixed. * Test class is added. --- .../histogram/BucketsPostAggregator.java | 2 +- .../histogram/BucketsPostAggregatorTest.java | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/BucketsPostAggregatorTest.java diff --git a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/BucketsPostAggregator.java b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/BucketsPostAggregator.java index 94400f781bb..c47044cd330 100644 --- a/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/BucketsPostAggregator.java +++ b/extensions-core/histogram/src/main/java/org/apache/druid/query/aggregation/histogram/BucketsPostAggregator.java @@ -82,7 +82,7 @@ public class BucketsPostAggregator extends ApproximateHistogramPostAggregator @JsonProperty public float getOffset() { - return bucketSize; + return offset; } @Override diff --git a/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/BucketsPostAggregatorTest.java b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/BucketsPostAggregatorTest.java new file mode 100644 index 00000000000..ebcd2823dc8 --- /dev/null +++ b/extensions-core/histogram/src/test/java/org/apache/druid/query/aggregation/histogram/BucketsPostAggregatorTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.histogram; + +import org.apache.druid.jackson.DefaultObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +public class BucketsPostAggregatorTest +{ + @Test + public void testSerde() throws Exception + { + BucketsPostAggregator aggregator1 = + new BucketsPostAggregator("buckets_post_aggregator", "test_field", 2f, 4f); + + DefaultObjectMapper mapper = new DefaultObjectMapper(); + BucketsPostAggregator aggregator2 = mapper.readValue( + mapper.writeValueAsString(aggregator1), + BucketsPostAggregator.class + ); + + Assert.assertEquals(aggregator1.getBucketSize(), aggregator2.getBucketSize(), 0.0001); + Assert.assertEquals(aggregator1.getOffset(), aggregator2.getOffset(), 0.0001); + Assert.assertArrayEquals(aggregator1.getCacheKey(), aggregator2.getCacheKey()); + Assert.assertEquals(aggregator1.getDependentFields(), aggregator2.getDependentFields()); + } +} From fafbc4a80e0bba0c1fbb5066b9f0ce37ffcab929 Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Thu, 7 Feb 2019 14:02:52 -0800 Subject: [PATCH 02/25] Set version to 0.15.0-incubating-SNAPSHOT (#7014) --- aws-common/pom.xml | 2 +- benchmarks/pom.xml | 2 +- core/pom.xml | 2 +- distribution/pom.xml | 2 +- examples/pom.xml | 2 +- extendedset/pom.xml | 2 +- extensions-contrib/ambari-metrics-emitter/pom.xml | 2 +- extensions-contrib/azure-extensions/pom.xml | 2 +- extensions-contrib/cassandra-storage/pom.xml | 2 +- extensions-contrib/cloudfiles-extensions/pom.xml | 2 +- extensions-contrib/distinctcount/pom.xml | 2 +- extensions-contrib/druid-rocketmq/pom.xml | 2 +- extensions-contrib/google-extensions/pom.xml | 2 +- extensions-contrib/graphite-emitter/pom.xml | 2 +- extensions-contrib/influx-extensions/pom.xml | 2 +- extensions-contrib/kafka-eight-simpleConsumer/pom.xml | 2 +- extensions-contrib/kafka-emitter/pom.xml | 2 +- extensions-contrib/materialized-view-maintenance/pom.xml | 2 +- extensions-contrib/materialized-view-selection/pom.xml | 2 +- extensions-contrib/opentsdb-emitter/pom.xml | 2 +- extensions-contrib/orc-extensions/pom.xml | 2 +- extensions-contrib/rabbitmq/pom.xml | 2 +- extensions-contrib/redis-cache/pom.xml | 2 +- extensions-contrib/sqlserver-metadata-storage/pom.xml | 2 +- extensions-contrib/statsd-emitter/pom.xml | 2 +- extensions-contrib/thrift-extensions/pom.xml | 2 +- extensions-contrib/time-min-max/pom.xml | 2 +- extensions-contrib/virtual-columns/pom.xml | 2 +- extensions-core/avro-extensions/pom.xml | 2 +- extensions-core/datasketches/pom.xml | 2 +- extensions-core/druid-basic-security/pom.xml | 2 +- extensions-core/druid-bloom-filter/pom.xml | 2 +- extensions-core/druid-kerberos/pom.xml | 2 +- extensions-core/hdfs-storage/pom.xml | 2 +- extensions-core/histogram/pom.xml | 2 +- extensions-core/kafka-eight/pom.xml | 2 +- extensions-core/kafka-extraction-namespace/pom.xml | 2 +- extensions-core/kafka-indexing-service/pom.xml | 2 +- extensions-core/kinesis-indexing-service/pom.xml | 2 +- extensions-core/lookups-cached-global/pom.xml | 2 +- extensions-core/lookups-cached-single/pom.xml | 2 +- extensions-core/mysql-metadata-storage/pom.xml | 2 +- extensions-core/parquet-extensions/pom.xml | 2 +- extensions-core/postgresql-metadata-storage/pom.xml | 2 +- extensions-core/protobuf-extensions/pom.xml | 2 +- extensions-core/s3-extensions/pom.xml | 2 +- extensions-core/simple-client-sslcontext/pom.xml | 2 +- extensions-core/stats/pom.xml | 2 +- hll/pom.xml | 2 +- indexing-hadoop/pom.xml | 2 +- indexing-service/pom.xml | 2 +- integration-tests/pom.xml | 2 +- pom.xml | 2 +- processing/pom.xml | 2 +- server/pom.xml | 2 +- services/pom.xml | 2 +- sql/pom.xml | 2 +- web-console/pom.xml | 2 +- 58 files changed, 58 insertions(+), 58 deletions(-) diff --git a/aws-common/pom.xml b/aws-common/pom.xml index 2a65b70c42e..b4e5e0d1a44 100644 --- a/aws-common/pom.xml +++ b/aws-common/pom.xml @@ -28,7 +28,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/benchmarks/pom.xml b/benchmarks/pom.xml index 685ffe05131..80f2461904f 100644 --- a/benchmarks/pom.xml +++ b/benchmarks/pom.xml @@ -27,7 +27,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/core/pom.xml b/core/pom.xml index 9dbe3de96ca..deee4602905 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -31,7 +31,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/distribution/pom.xml b/distribution/pom.xml index bf069aa38f9..89df39f69b3 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -31,7 +31,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/examples/pom.xml b/examples/pom.xml index c9ba9106024..995d73c8a0d 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/extendedset/pom.xml b/extendedset/pom.xml index 783528a4e75..cdfa493dd6b 100755 --- a/extendedset/pom.xml +++ b/extendedset/pom.xml @@ -32,7 +32,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/extensions-contrib/ambari-metrics-emitter/pom.xml b/extensions-contrib/ambari-metrics-emitter/pom.xml index 886a7630a26..dc83ba5326b 100644 --- a/extensions-contrib/ambari-metrics-emitter/pom.xml +++ b/extensions-contrib/ambari-metrics-emitter/pom.xml @@ -25,7 +25,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/azure-extensions/pom.xml b/extensions-contrib/azure-extensions/pom.xml index 2980d5e6c1c..473baa69857 100644 --- a/extensions-contrib/azure-extensions/pom.xml +++ b/extensions-contrib/azure-extensions/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/cassandra-storage/pom.xml b/extensions-contrib/cassandra-storage/pom.xml index 67c215f43c5..8e9c74b211c 100644 --- a/extensions-contrib/cassandra-storage/pom.xml +++ b/extensions-contrib/cassandra-storage/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/cloudfiles-extensions/pom.xml b/extensions-contrib/cloudfiles-extensions/pom.xml index f80ca1f9893..237be09a26c 100644 --- a/extensions-contrib/cloudfiles-extensions/pom.xml +++ b/extensions-contrib/cloudfiles-extensions/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/distinctcount/pom.xml b/extensions-contrib/distinctcount/pom.xml index c4aeacea7b7..263fc9d37b2 100644 --- a/extensions-contrib/distinctcount/pom.xml +++ b/extensions-contrib/distinctcount/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/druid-rocketmq/pom.xml b/extensions-contrib/druid-rocketmq/pom.xml index 0394abcb775..88267554142 100644 --- a/extensions-contrib/druid-rocketmq/pom.xml +++ b/extensions-contrib/druid-rocketmq/pom.xml @@ -24,7 +24,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/google-extensions/pom.xml b/extensions-contrib/google-extensions/pom.xml index f252f58fe1d..bbd20b2837e 100644 --- a/extensions-contrib/google-extensions/pom.xml +++ b/extensions-contrib/google-extensions/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/graphite-emitter/pom.xml b/extensions-contrib/graphite-emitter/pom.xml index b48d6909aa6..92ec543f9b8 100644 --- a/extensions-contrib/graphite-emitter/pom.xml +++ b/extensions-contrib/graphite-emitter/pom.xml @@ -25,7 +25,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/influx-extensions/pom.xml b/extensions-contrib/influx-extensions/pom.xml index 916780dcc12..9e4163a04d6 100644 --- a/extensions-contrib/influx-extensions/pom.xml +++ b/extensions-contrib/influx-extensions/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/kafka-eight-simpleConsumer/pom.xml b/extensions-contrib/kafka-eight-simpleConsumer/pom.xml index 4e4144632d6..a0c005b3e2c 100644 --- a/extensions-contrib/kafka-eight-simpleConsumer/pom.xml +++ b/extensions-contrib/kafka-eight-simpleConsumer/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/kafka-emitter/pom.xml b/extensions-contrib/kafka-emitter/pom.xml index ef714b64097..9a7666f1438 100644 --- a/extensions-contrib/kafka-emitter/pom.xml +++ b/extensions-contrib/kafka-emitter/pom.xml @@ -25,7 +25,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/materialized-view-maintenance/pom.xml b/extensions-contrib/materialized-view-maintenance/pom.xml index 643d5005582..6c4513ad4e5 100644 --- a/extensions-contrib/materialized-view-maintenance/pom.xml +++ b/extensions-contrib/materialized-view-maintenance/pom.xml @@ -24,7 +24,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/extensions-contrib/materialized-view-selection/pom.xml b/extensions-contrib/materialized-view-selection/pom.xml index 341ce9221fa..5a3566e5f10 100644 --- a/extensions-contrib/materialized-view-selection/pom.xml +++ b/extensions-contrib/materialized-view-selection/pom.xml @@ -24,7 +24,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/extensions-contrib/opentsdb-emitter/pom.xml b/extensions-contrib/opentsdb-emitter/pom.xml index c89c4ceb14f..5f50e0092c1 100644 --- a/extensions-contrib/opentsdb-emitter/pom.xml +++ b/extensions-contrib/opentsdb-emitter/pom.xml @@ -31,7 +31,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/orc-extensions/pom.xml b/extensions-contrib/orc-extensions/pom.xml index 18ddf7827b8..944f061de7f 100644 --- a/extensions-contrib/orc-extensions/pom.xml +++ b/extensions-contrib/orc-extensions/pom.xml @@ -28,7 +28,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/extensions-contrib/rabbitmq/pom.xml b/extensions-contrib/rabbitmq/pom.xml index 7b9d375fd4a..4ef4dc98c57 100644 --- a/extensions-contrib/rabbitmq/pom.xml +++ b/extensions-contrib/rabbitmq/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/redis-cache/pom.xml b/extensions-contrib/redis-cache/pom.xml index cd06e05ccaf..4d3ea344035 100644 --- a/extensions-contrib/redis-cache/pom.xml +++ b/extensions-contrib/redis-cache/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/sqlserver-metadata-storage/pom.xml b/extensions-contrib/sqlserver-metadata-storage/pom.xml index b886f264e70..ee04cd039e7 100644 --- a/extensions-contrib/sqlserver-metadata-storage/pom.xml +++ b/extensions-contrib/sqlserver-metadata-storage/pom.xml @@ -28,7 +28,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-contrib/statsd-emitter/pom.xml b/extensions-contrib/statsd-emitter/pom.xml index b55b287bec7..70817d4f286 100644 --- a/extensions-contrib/statsd-emitter/pom.xml +++ b/extensions-contrib/statsd-emitter/pom.xml @@ -23,7 +23,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/extensions-contrib/thrift-extensions/pom.xml b/extensions-contrib/thrift-extensions/pom.xml index 95137f9a5ac..55412c46f73 100644 --- a/extensions-contrib/thrift-extensions/pom.xml +++ b/extensions-contrib/thrift-extensions/pom.xml @@ -30,7 +30,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/extensions-contrib/time-min-max/pom.xml b/extensions-contrib/time-min-max/pom.xml index d34ba373f49..1081733dc2a 100644 --- a/extensions-contrib/time-min-max/pom.xml +++ b/extensions-contrib/time-min-max/pom.xml @@ -23,7 +23,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/extensions-contrib/virtual-columns/pom.xml b/extensions-contrib/virtual-columns/pom.xml index 0ec600db7c4..336055a9280 100644 --- a/extensions-contrib/virtual-columns/pom.xml +++ b/extensions-contrib/virtual-columns/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/avro-extensions/pom.xml b/extensions-core/avro-extensions/pom.xml index c675dad3aae..f884b4abcbb 100644 --- a/extensions-core/avro-extensions/pom.xml +++ b/extensions-core/avro-extensions/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/datasketches/pom.xml b/extensions-core/datasketches/pom.xml index 489226224b0..24096a9a7d1 100644 --- a/extensions-core/datasketches/pom.xml +++ b/extensions-core/datasketches/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/druid-basic-security/pom.xml b/extensions-core/druid-basic-security/pom.xml index ea10f325db2..36bacf8c097 100644 --- a/extensions-core/druid-basic-security/pom.xml +++ b/extensions-core/druid-basic-security/pom.xml @@ -32,7 +32,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/druid-bloom-filter/pom.xml b/extensions-core/druid-bloom-filter/pom.xml index f9ac9f90c07..89bd53d544b 100644 --- a/extensions-core/druid-bloom-filter/pom.xml +++ b/extensions-core/druid-bloom-filter/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/druid-kerberos/pom.xml b/extensions-core/druid-kerberos/pom.xml index b6fad3510f5..5b941012521 100644 --- a/extensions-core/druid-kerberos/pom.xml +++ b/extensions-core/druid-kerberos/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/hdfs-storage/pom.xml b/extensions-core/hdfs-storage/pom.xml index c60787717db..ec4f014db60 100644 --- a/extensions-core/hdfs-storage/pom.xml +++ b/extensions-core/hdfs-storage/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/histogram/pom.xml b/extensions-core/histogram/pom.xml index dcd706ad5b7..ea360e0ceee 100644 --- a/extensions-core/histogram/pom.xml +++ b/extensions-core/histogram/pom.xml @@ -28,7 +28,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/kafka-eight/pom.xml b/extensions-core/kafka-eight/pom.xml index 0c11346668c..94c25a6bf39 100644 --- a/extensions-core/kafka-eight/pom.xml +++ b/extensions-core/kafka-eight/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/kafka-extraction-namespace/pom.xml b/extensions-core/kafka-extraction-namespace/pom.xml index 60fa27fd46f..0acc2c6477b 100644 --- a/extensions-core/kafka-extraction-namespace/pom.xml +++ b/extensions-core/kafka-extraction-namespace/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/kafka-indexing-service/pom.xml b/extensions-core/kafka-indexing-service/pom.xml index 5004bd5bad4..c48091f1669 100644 --- a/extensions-core/kafka-indexing-service/pom.xml +++ b/extensions-core/kafka-indexing-service/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/kinesis-indexing-service/pom.xml b/extensions-core/kinesis-indexing-service/pom.xml index a7e576ec435..9c5b708f70e 100644 --- a/extensions-core/kinesis-indexing-service/pom.xml +++ b/extensions-core/kinesis-indexing-service/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/lookups-cached-global/pom.xml b/extensions-core/lookups-cached-global/pom.xml index 8866e3b062c..e021975f9e8 100644 --- a/extensions-core/lookups-cached-global/pom.xml +++ b/extensions-core/lookups-cached-global/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/lookups-cached-single/pom.xml b/extensions-core/lookups-cached-single/pom.xml index a3a1294c4be..1701e2b98d3 100644 --- a/extensions-core/lookups-cached-single/pom.xml +++ b/extensions-core/lookups-cached-single/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/mysql-metadata-storage/pom.xml b/extensions-core/mysql-metadata-storage/pom.xml index 37b0617e01d..6c2241b9989 100644 --- a/extensions-core/mysql-metadata-storage/pom.xml +++ b/extensions-core/mysql-metadata-storage/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/parquet-extensions/pom.xml b/extensions-core/parquet-extensions/pom.xml index 586b66d5dd4..189bb8db615 100644 --- a/extensions-core/parquet-extensions/pom.xml +++ b/extensions-core/parquet-extensions/pom.xml @@ -29,7 +29,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/extensions-core/postgresql-metadata-storage/pom.xml b/extensions-core/postgresql-metadata-storage/pom.xml index a6bed8c96a4..14bda9e84a6 100644 --- a/extensions-core/postgresql-metadata-storage/pom.xml +++ b/extensions-core/postgresql-metadata-storage/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/protobuf-extensions/pom.xml b/extensions-core/protobuf-extensions/pom.xml index c004982b715..4e50f926c57 100644 --- a/extensions-core/protobuf-extensions/pom.xml +++ b/extensions-core/protobuf-extensions/pom.xml @@ -31,7 +31,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/s3-extensions/pom.xml b/extensions-core/s3-extensions/pom.xml index 8d05b309dd3..ff856c43f7d 100644 --- a/extensions-core/s3-extensions/pom.xml +++ b/extensions-core/s3-extensions/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/extensions-core/simple-client-sslcontext/pom.xml b/extensions-core/simple-client-sslcontext/pom.xml index b6286c237c5..401163bd8d3 100644 --- a/extensions-core/simple-client-sslcontext/pom.xml +++ b/extensions-core/simple-client-sslcontext/pom.xml @@ -24,7 +24,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml 4.0.0 diff --git a/extensions-core/stats/pom.xml b/extensions-core/stats/pom.xml index 704f2543462..f934efd8547 100644 --- a/extensions-core/stats/pom.xml +++ b/extensions-core/stats/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml diff --git a/hll/pom.xml b/hll/pom.xml index ebdfe8de677..84ae4021c70 100644 --- a/hll/pom.xml +++ b/hll/pom.xml @@ -24,7 +24,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT druid-hll diff --git a/indexing-hadoop/pom.xml b/indexing-hadoop/pom.xml index 1fd923c99bf..2baf5226059 100644 --- a/indexing-hadoop/pom.xml +++ b/indexing-hadoop/pom.xml @@ -28,7 +28,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml index f664dd47123..496bc3340d6 100644 --- a/indexing-service/pom.xml +++ b/indexing-service/pom.xml @@ -28,7 +28,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml index 9c89d38b50a..24c910c1880 100644 --- a/integration-tests/pom.xml +++ b/integration-tests/pom.xml @@ -28,7 +28,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/pom.xml b/pom.xml index 4e2f85e173d..6c6b1945f6f 100644 --- a/pom.xml +++ b/pom.xml @@ -30,7 +30,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT pom Druid diff --git a/processing/pom.xml b/processing/pom.xml index ccaf9c53429..9f781b0588b 100644 --- a/processing/pom.xml +++ b/processing/pom.xml @@ -28,7 +28,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/server/pom.xml b/server/pom.xml index 9bdafbb7580..67d62e6e8e7 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -28,7 +28,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/services/pom.xml b/services/pom.xml index 1220492fdd1..e6bf012ba8f 100644 --- a/services/pom.xml +++ b/services/pom.xml @@ -27,7 +27,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/sql/pom.xml b/sql/pom.xml index 57c0d5f0487..ea301eb8157 100644 --- a/sql/pom.xml +++ b/sql/pom.xml @@ -29,7 +29,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT diff --git a/web-console/pom.xml b/web-console/pom.xml index 936830d98be..fccd92f7dc3 100644 --- a/web-console/pom.xml +++ b/web-console/pom.xml @@ -28,7 +28,7 @@ org.apache.druid druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT From b3dcbe70ad206b703713398fa39bd18e7e2f01c2 Mon Sep 17 00:00:00 2001 From: Don Bowman Date: Fri, 8 Feb 2019 07:12:28 -0500 Subject: [PATCH 03/25] Add docker container for druid (#6896) * Add docker container for druid This container is an 'omnibus' (since there is such a high overlap with the various services). It includes all contrib extension as well as the msql connector. It is intended to be run as `docker run NAME SERVICE` (e.g. docker run druid:latest broker) * Add Apache license header * Resolve issues from Pull Request review * Add comments at top of script per PR comments * Revert BUILDKIT. Not available everywhere. * Don't set hostname, allow default (IP) Some environments (e.g. Kubernetes Deployments) don't resolve hostname to IP. * Switch to amd64 glibc-based busybox from 32-bit uclibc * Override service-specific configuration * Replace MAINTAINER w/ LABEL * Add mysql connector to application classpath This works around issue #3770 https://github.com/apache/incubator-druid/issues/3770 * Add docker-compose and sample environment Signed-off-by: Don Bowman --- .dockerignore | 37 +++++ distribution/docker/Dockerfile | 56 +++++++ distribution/docker/README.md | 26 ++++ distribution/docker/docker-compose.yml | 124 ++++++++++++++++ distribution/docker/druid.sh | 137 ++++++++++++++++++ distribution/docker/environment | 51 +++++++ distribution/docker/sha256sums.txt | 19 +++ distribution/src/assembly/source-assembly.xml | 1 + 8 files changed, 451 insertions(+) create mode 100644 .dockerignore create mode 100644 distribution/docker/Dockerfile create mode 100644 distribution/docker/README.md create mode 100644 distribution/docker/docker-compose.yml create mode 100755 distribution/docker/druid.sh create mode 100644 distribution/docker/environment create mode 100644 distribution/docker/sha256sums.txt diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000000..d7ae5a9cf59 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,37 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +.git +**/*.jar +**/*.class +dist +target +*.iml +*.ipr +*.iws +*.tar.gz +*.swp +*.swo +.classpath +.idea +.project +.settings/ +*.log +*.DS_Store +_site +dependency-reduced-pom.xml diff --git a/distribution/docker/Dockerfile b/distribution/docker/Dockerfile new file mode 100644 index 00000000000..13c53e76077 --- /dev/null +++ b/distribution/docker/Dockerfile @@ -0,0 +1,56 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +FROM maven:3-jdk-8 as builder + +COPY . /src +WORKDIR /src +RUN mvn install -ff -DskipTests -Dforbiddenapis.skip=true -Pdist -Pbundle-contrib-exts + +RUN \ + VER=$(mvn -B org.apache.maven.plugins:maven-help-plugin:3.1.1:evaluate -Dexpression=project.version -q -DforceStdout=true -f pom.xml 2>/dev/null) \ + && tar -zxf ./distribution/target/apache-druid-${VER}-bin.tar.gz -C /opt \ + && ln -s /opt/apache-druid-${VER} /opt/druid + +RUN wget -O /opt/druid/extensions/mysql-metadata-storage/mysql-connector-java-5.1.38.jar http://central.maven.org/maven2/mysql/mysql-connector-java/5.1.38/mysql-connector-java-5.1.38.jar \ + && sha256sum --ignore-missing -c /src/distribution/docker/sha256sums.txt \ + && ln -s /opt/druid/extensions/mysql-metadata-storage/mysql-connector-java-5.1.38.jar /opt/druid/lib + +RUN addgroup --gid 1000 druid \ + && adduser --home /opt/druid --shell /bin/sh --no-create-home --uid 1000 --gecos '' --gid 1000 --disabled-password druid \ + && mkdir -p /opt/druid/var \ + && chown -R druid:druid /opt/druid \ + && chmod 775 /opt/druid/var + +FROM amd64/busybox:1.30.0-glibc as busybox +FROM gcr.io/distroless/java +LABEL maintainer="Don Bowman " + +COPY --from=busybox /bin/busybox /busybox/busybox +RUN ["/busybox/busybox", "--install", "/bin"] +COPY --from=builder /etc/passwd /etc/passwd +COPY --from=builder /etc/group /etc/group +COPY --from=builder --chown=druid /opt /opt +COPY distribution/docker/druid.sh /druid.sh +RUN chown -R druid:druid /opt/druid +USER druid +VOLUME /opt/druid/var +WORKDIR /opt/druid + +ENTRYPOINT ["/druid.sh"] diff --git a/distribution/docker/README.md b/distribution/docker/README.md new file mode 100644 index 00000000000..fdc580c79e7 --- /dev/null +++ b/distribution/docker/README.md @@ -0,0 +1,26 @@ + + +## Build + +From the root of the repo, run `docker build -t druid:tag -f distribution/docker/Dockerfile .` + +## Run + +Edit `environment` to suite. Run 'docker-compose -f distribution/docker/docker-compose.yml up` diff --git a/distribution/docker/docker-compose.yml b/distribution/docker/docker-compose.yml new file mode 100644 index 00000000000..0b962414345 --- /dev/null +++ b/distribution/docker/docker-compose.yml @@ -0,0 +1,124 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +version: "2.2" + +volumes: + metadata_data: {} + middle_var: {} + historical_var: {} + broker_var: {} + coordinator_var: {} + overlord_var: {} + +services: + postgres: + container_name: postgres + image: postgres:latest + volumes: + - metadata_data:/var/lib/postgresql/data + environment: + - POSTGRES_PASSWORD=FoolishPassword + - POSTGRES_USER=druid + - POSTGRES_DB=druid + + # Need 3.5 or later for container nodes + zookeeper: + container_name: zookeeper + image: zookeeper:3.5 + environment: + - ZOO_MY_ID=1 + + coordinator: + image: druid + container_name: coordinator + volumes: + - coordinator_var:/opt/druid/var + depends_on: + - zookeeper + - postgres + ports: + - "3001:8081" + command: + - coordinator + env_file: + - environment + + broker: + image: druid + container_name: broker + volumes: + - broker_var:/opt/druid/var + depends_on: + - zookeeper + - postgres + - coordinator + ports: + - "3002:8082" + command: + - broker + env_file: + - environment + + historical: + image: druid + container_name: historical + volumes: + - historical_var:/opt/druid/var + depends_on: + - zookeeper + - postgres + - coordinator + ports: + - "3003:8083" + command: + - historical + env_file: + - environment + + overlord: + image: druid + container_name: overlord + volumes: + - overlord_var:/opt/druid/var + depends_on: + - zookeeper + - postgres + ports: + - "4000:8090" + command: + - overlord + env_file: + - environment + + middlemanager: + image: druid + container_name: middlemanager + volumes: + - middle_var:/opt/druid/var + depends_on: + - zookeeper + - postgres + - coordinator + ports: + - "4001:8091" + command: + - middleManager + env_file: + - environment + diff --git a/distribution/docker/druid.sh b/distribution/docker/druid.sh new file mode 100755 index 00000000000..b142ec6962d --- /dev/null +++ b/distribution/docker/druid.sh @@ -0,0 +1,137 @@ +#!/bin/sh + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# NOTE: this is a 'run' script for the stock tarball +# It takes 1 required argument (the name of the service, +# e.g. 'broker', 'historical' etc). Any additional arguments +# are passed to that service. +# +# It accepts 'JAVA_OPTS' as an environment variable +# +# Additional env vars: +# - DRUID_LOG4J -- set the entire log4j.xml verbatim +# - DRUID_LOG_LEVEL -- override the default log level in default log4j +# - DRUID_XMX -- set Java Xmx +# - DRUID_XMS -- set Java Xms +# - DRUID_MAXNEWSIZE -- set Java max new size +# - DRUID_NEWSIZE -- set Java new size +# - DRUID_MAXDIRECTMEMORYSIZE -- set Java max direct memory size +# +# - DRUID_CONFIG -- full path to a file for druid 'common' properties +# - DRUID_CONFIG_${service} -- full path to a file for druid 'service' properties + +set -e +SERVICE="$1" + +echo "$(date -Is) startup service $SERVICE" + +# We put all the config in /tmp/conf to allow for a +# read-only root filesystem +cp -r /opt/druid/conf /tmp/conf + +# Delete the old key (if existing) and append new key=value +setKey() { + service="$1" + key="$2" + value="$3" + case "$service" in + _common) + fname=common.runtime.properties ;; + *) + fname=runtime.properties ;; + esac + # Delete from all + sed -ri "/$key=/d" /tmp/conf/druid/_common/common.runtime.properties + [ -f /tmp/conf/druid/$service/$fname ] && sed -ri "/$key=/d" /tmp/conf/druid/$service/$fname + [ -f /tmp/conf/druid/$service/$fname ] && echo "$key=$value" >> /tmp/conf/druid/$service/$fname + [ -f /tmp/conf/druid/$service/$fname ] || echo "$key=$value" >> /tmp/conf/druid/_common/$fname +} + +setJavaKey() { + service="$1" + key=$2 + value=$3 + file=/tmp/conf/druid/$service/jvm.config + sed -ri "/$key/d" $file + echo $value >> $file +} + +## Setup host names +if [ -n "${ZOOKEEPER}" ] +then + setKey _common druid.zk.service.host "${ZOOKEEPER}" +fi + +setKey $SERVICE druid.host $(ip r get 1 | awk '{print $7;exit}') + + +env |grep ^druid_ | while read evar +do + # Can't use IFS='=' to parse since var might have = in it (e.g. password) + val=$(echo "$evar" | sed -e 's?[^=]*=??') + var=$(echo "$evar" | sed -e 's?^\([^=]*\)=.*?\1?g' -e 's?_?.?g') + setKey $SERVICE "$var" "$val" +done + +env |grep ^s3service | while read evar +do + val=$(echo "$evar" | sed -e 's?[^=]*=??') + var=$(echo "$evar" | sed -e 's?^\([^=]*\)=.*?\1?g' -e 's?_?.?' -e 's?_?-?g') + echo "$var=$val" >> /tmp/conf/druid/_common/jets3t.properties +done + +# This is to allow configuration via a Kubernetes configMap without +# e.g. using subPath (you can also mount the configMap on /tmp/conf/druid) +if [ -n "$DRUID_CONFIG_COMMON" ] +then + cp -f "$DRUID_CONFIG_COMMON" /tmp/conf/druid/_common/common.runtime.properties +fi + +SCONFIG=$(printf "%s_%s" DRUID_CONFIG ${SERVICE}) +SCONFIG=$(eval echo \$$(echo $SCONFIG)) + +if [ -n "${SCONFIG}" ] +then + cp -f "${SCONFIG}" /tmp/conf/druid/${SERVICE}/runtime.properties +fi + +# Now do the java options + +if [ -n "$DRUID_XMX" ]; then setJavaKey ${SERVICE} -Xmx -Xmx${DRUID_XMX}; fi +if [ -n "$DRUID_XMS" ]; then setJavaKey ${SERVICE} -Xms -Xms${DRUID_XMS}; fi +if [ -n "$DRUID_MAXNEWSIZE" ]; then setJavaKey ${SERVICE} -XX:MaxNewSize -XX:MaxNewSize=${DRUID_MAXNEWSIZE}; fi +if [ -n "$DRUID_NEWSIZE" ]; then setJavaKey ${SERVICE} -XX:NewSize -XX:MaxNewSize=${DRUID_NEWSIZE}; fi +if [ -n "$DRUID_MAXDIRECTMEMORYSIZE" ]; then setJavaKey ${SERVICE} -XX:MaxDirectMemorySize -XX:MaxDirectMemorySize=${DRUID_MAXDIRECTMEMORYSIZE}; fi + +JAVA_OPTS="$JAVA_OPTS $(cat /tmp/conf/druid/${SERVICE}/jvm.config | xargs)" + +if [ -n "$DRUID_LOG_LEVEL" ] +then + sed -ri 's/"info"/"'$DRUID_LOG_LEVEL'"/g' /tmp/conf/druid/_common/log4j2.xml +fi + +if [ -n "$DRUID_LOG4J" ] +then + echo "$DRUID_LOG4J" > /tmp/conf/druid/_common/log4j2.xml +fi + +mkdir -p var/tmp var/druid/segments var/druid/indexing-logs var/druid/task var/druid/hadoop-tmp var/druid/segment-cache +exec java ${JAVA_OPTS} -cp /tmp/conf/druid/_common:/tmp/conf/druid/${SERVICE}:lib/*: org.apache.druid.cli.Main server $@ diff --git a/distribution/docker/environment b/distribution/docker/environment new file mode 100644 index 00000000000..e61a9f307dc --- /dev/null +++ b/distribution/docker/environment @@ -0,0 +1,51 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Java tuning +DRUID_XMX=1g +DRUID_XMS=1g +DRUID_MAXNEWSIZE=250m +DRUID_NEWSIZE=250m +DRUID_MAXDIRECTMEMORYSIZE=6172m + +druid_emitter_logging_logLevel=debug + +druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "druid-azure-extensions", "postgresql-metadata-storage"] + +druid_zk_service_host=zookeeper + +druid_metadata_storage_host= +druid_metadata_storage_type=postgresql +druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid +druid_metadata_storage_connector_user=druid +druid_metadata_storage_connector_password=FoolishPassword + +druid_coordinator_balancer_strategy=cachingCost + +druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"] +druid_indexer_fork_property_druid_processing_buffer_sizeBytes=268435456 + +druid_storage_type=azure +druid_azure_account=YOURACCOUNT +druid_azure_key=YOURKEY +druid_azure_container=druid +druid_azure_protocol=https +druid_azure_maxTries=3 + +DRUID_LOG4J= diff --git a/distribution/docker/sha256sums.txt b/distribution/docker/sha256sums.txt new file mode 100644 index 00000000000..6f858cd512e --- /dev/null +++ b/distribution/docker/sha256sums.txt @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +b95bf9fe25cb5428f378a62fc842e177ca004b4ae1f9054968b2a396dcc1ec22 /opt/druid/extensions/mysql-metadata-storage/mysql-connector-java-5.1.38.jar diff --git a/distribution/src/assembly/source-assembly.xml b/distribution/src/assembly/source-assembly.xml index 59903d4fe98..0d7e0199331 100644 --- a/distribution/src/assembly/source-assembly.xml +++ b/distribution/src/assembly/source-assembly.xml @@ -45,6 +45,7 @@ %regex[(?!((?!${project.build.directory}/)[^/]+/)*src/)(.*/)?release\.properties] .gitignore + .dockerignore .travis.yml publications/** upload.sh From c9f21bc78246c72df946c7333f616591959e6767 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Fri, 8 Feb 2019 10:03:02 -0800 Subject: [PATCH 04/25] Fix filterSegments for TimeBoundary and DataSourceMetadata queries (#7023) * Fix filterSegments for TimeBoundary and DataSourceMetadata queries * add javadoc * fix build --- .../apache/druid/timeline/LogicalSegment.java | 18 ++ .../druid/timeline/TimelineObjectHolder.java | 17 ++ .../timeline/VersionedIntervalTimeline.java | 12 +- .../DataSourceQueryQueryToolChest.java | 20 +- .../TimeBoundaryQueryQueryToolChest.java | 21 +- .../DataSourceMetadataQueryTest.java | 179 ++++++++++++++++++ ...egmentMetadataQueryQueryToolChestTest.java | 16 +- .../metadata/SegmentMetadataQueryTest.java | 90 +++++++++ .../TimeBoundaryQueryQueryToolChestTest.java | 97 ++++++++++ 9 files changed, 433 insertions(+), 37 deletions(-) diff --git a/core/src/main/java/org/apache/druid/timeline/LogicalSegment.java b/core/src/main/java/org/apache/druid/timeline/LogicalSegment.java index 673abb01b8e..714eb671568 100644 --- a/core/src/main/java/org/apache/druid/timeline/LogicalSegment.java +++ b/core/src/main/java/org/apache/druid/timeline/LogicalSegment.java @@ -22,8 +22,26 @@ package org.apache.druid.timeline; import org.apache.druid.guice.annotations.PublicApi; import org.joda.time.Interval; +/** + * A logical segment can represent an entire segment or a part of a segment. As a result, it can have a different + * interval from its actual base segment. {@link #getInterval()} and {@link #getTrueInterval()} return the interval of + * this logical segment and the interval of the base segment, respectively. + * + * For example, suppose we have 2 segments as below: + * + * - Segment A has an interval of 2017/2018. + * - Segment B has an interval of 2017-08-01/2017-08-02. + * + * For these segments, {@link VersionedIntervalTimeline#lookup} returns 3 segments as below: + * + * - interval of 2017/2017-08-01 (trueInterval: 2017/2018) + * - interval of 2017-08-01/2017-08-02 (trueInterval: 2017-08-01/2017-08-02) + * - interval of 2017-08-02/2018 (trueInterval: 2017/2018) + */ @PublicApi public interface LogicalSegment { Interval getInterval(); + + Interval getTrueInterval(); } diff --git a/core/src/main/java/org/apache/druid/timeline/TimelineObjectHolder.java b/core/src/main/java/org/apache/druid/timeline/TimelineObjectHolder.java index 8e95fc623f7..3feca88495c 100644 --- a/core/src/main/java/org/apache/druid/timeline/TimelineObjectHolder.java +++ b/core/src/main/java/org/apache/druid/timeline/TimelineObjectHolder.java @@ -19,6 +19,7 @@ package org.apache.druid.timeline; +import com.google.common.annotations.VisibleForTesting; import org.apache.druid.timeline.partition.PartitionHolder; import org.joda.time.Interval; @@ -27,16 +28,25 @@ import org.joda.time.Interval; public class TimelineObjectHolder implements LogicalSegment { private final Interval interval; + private final Interval trueInterval; private final VersionType version; private final PartitionHolder object; + @VisibleForTesting + public TimelineObjectHolder(Interval interval, VersionType version, PartitionHolder object) + { + this(interval, interval, version, object); + } + public TimelineObjectHolder( Interval interval, + Interval trueInterval, VersionType version, PartitionHolder object ) { this.interval = interval; + this.trueInterval = trueInterval; this.version = version; this.object = object; } @@ -47,6 +57,12 @@ public class TimelineObjectHolder implements LogicalSeg return interval; } + @Override + public Interval getTrueInterval() + { + return trueInterval; + } + public VersionType getVersion() { return version; @@ -62,6 +78,7 @@ public class TimelineObjectHolder implements LogicalSeg { return "TimelineObjectHolder{" + "interval=" + interval + + ", trueInterval=" + trueInterval + ", version=" + version + ", object=" + object + '}'; diff --git a/core/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java b/core/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java index 85855ad8ee0..36c177dfb0f 100644 --- a/core/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java +++ b/core/src/main/java/org/apache/druid/timeline/VersionedIntervalTimeline.java @@ -300,6 +300,7 @@ public class VersionedIntervalTimeline implements Timel private TimelineObjectHolder timelineEntryToObjectHolder(TimelineEntry entry) { return new TimelineObjectHolder<>( + entry.getTrueInterval(), entry.getTrueInterval(), entry.getVersion(), new PartitionHolder<>(entry.getPartitionHolder()) @@ -586,10 +587,11 @@ public class VersionedIntervalTimeline implements Timel if (timelineInterval.overlaps(interval)) { retVal.add( - new TimelineObjectHolder( + new TimelineObjectHolder<>( timelineInterval, + val.getTrueInterval(), val.getVersion(), - new PartitionHolder(val.getPartitionHolder()) + new PartitionHolder<>(val.getPartitionHolder()) ) ); } @@ -604,8 +606,9 @@ public class VersionedIntervalTimeline implements Timel .isAfter(firstEntry.getInterval().getStart())) { retVal.set( 0, - new TimelineObjectHolder( + new TimelineObjectHolder<>( new Interval(interval.getStart(), firstEntry.getInterval().getEnd()), + firstEntry.getTrueInterval(), firstEntry.getVersion(), firstEntry.getObject() ) @@ -616,8 +619,9 @@ public class VersionedIntervalTimeline implements Timel if (interval.overlaps(lastEntry.getInterval()) && interval.getEnd().isBefore(lastEntry.getInterval().getEnd())) { retVal.set( retVal.size() - 1, - new TimelineObjectHolder( + new TimelineObjectHolder<>( new Interval(lastEntry.getInterval().getStart(), interval.getEnd()), + lastEntry.getTrueInterval(), lastEntry.getVersion(), lastEntry.getObject() ) diff --git a/processing/src/main/java/org/apache/druid/query/datasourcemetadata/DataSourceQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/datasourcemetadata/DataSourceQueryQueryToolChest.java index e1befab71ad..7b951c83647 100644 --- a/processing/src/main/java/org/apache/druid/query/datasourcemetadata/DataSourceQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/datasourcemetadata/DataSourceQueryQueryToolChest.java @@ -22,9 +22,6 @@ package org.apache.druid.query.datasourcemetadata; import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.base.Function; import com.google.common.base.Functions; -import com.google.common.base.Predicate; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import com.google.inject.Inject; import org.apache.druid.java.util.common.guava.Sequence; import org.apache.druid.java.util.common.guava.Sequences; @@ -42,6 +39,7 @@ import org.apache.druid.timeline.LogicalSegment; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** */ @@ -68,19 +66,9 @@ public class DataSourceQueryQueryToolChest final T max = segments.get(segments.size() - 1); - return Lists.newArrayList( - Iterables.filter( - segments, - new Predicate() - { - @Override - public boolean apply(T input) - { - return max != null && input.getInterval().overlaps(max.getInterval()); - } - } - ) - ); + return segments.stream() + .filter(input -> max != null && input.getInterval().overlaps(max.getTrueInterval())) + .collect(Collectors.toList()); } @Override diff --git a/processing/src/main/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java b/processing/src/main/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java index 13d5521b2c2..351903799f8 100644 --- a/processing/src/main/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java +++ b/processing/src/main/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChest.java @@ -23,8 +23,6 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Functions; -import com.google.common.base.Predicate; -import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.inject.Inject; import org.apache.druid.java.util.common.DateTimes; @@ -46,6 +44,7 @@ import org.apache.druid.timeline.LogicalSegment; import java.nio.ByteBuffer; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** */ @@ -85,20 +84,10 @@ public class TimeBoundaryQueryQueryToolChest final T min = query.isMaxTime() ? null : segments.get(0); final T max = query.isMinTime() ? null : segments.get(segments.size() - 1); - return Lists.newArrayList( - Iterables.filter( - segments, - new Predicate() - { - @Override - public boolean apply(T input) - { - return (min != null && input.getInterval().overlaps(min.getInterval())) || - (max != null && input.getInterval().overlaps(max.getInterval())); - } - } - ) - ); + return segments.stream() + .filter(input -> (min != null && input.getInterval().overlaps(min.getTrueInterval())) || + (max != null && input.getInterval().overlaps(max.getTrueInterval()))) + .collect(Collectors.toList()); } @Override diff --git a/processing/src/test/java/org/apache/druid/query/datasourcemetadata/DataSourceMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/datasourcemetadata/DataSourceMetadataQueryTest.java index 2a23b547f66..3f20c25f87c 100644 --- a/processing/src/test/java/org/apache/druid/query/datasourcemetadata/DataSourceMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/datasourcemetadata/DataSourceMetadataQueryTest.java @@ -164,6 +164,12 @@ public class DataSourceMetadataQueryTest { return Intervals.of("2012-01-01/P1D"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -172,6 +178,12 @@ public class DataSourceMetadataQueryTest { return Intervals.of("2012-01-01T01/PT1H"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -180,6 +192,12 @@ public class DataSourceMetadataQueryTest { return Intervals.of("2013-01-01/P1D"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -188,6 +206,12 @@ public class DataSourceMetadataQueryTest { return Intervals.of("2013-01-01T01/PT1H"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -196,6 +220,12 @@ public class DataSourceMetadataQueryTest { return Intervals.of("2013-01-01T02/PT1H"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } } ) ); @@ -210,6 +240,12 @@ public class DataSourceMetadataQueryTest { return Intervals.of("2013-01-01/P1D"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -218,6 +254,12 @@ public class DataSourceMetadataQueryTest { return Intervals.of("2013-01-01T02/PT1H"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } } ); @@ -226,6 +268,143 @@ public class DataSourceMetadataQueryTest } } + @Test + public void testFilterOverlappingSegments() + { + final GenericQueryMetricsFactory queryMetricsFactory = DefaultGenericQueryMetricsFactory.instance(); + final DataSourceQueryQueryToolChest toolChest = new DataSourceQueryQueryToolChest(queryMetricsFactory); + final List segments = toolChest + .filterSegments( + null, + ImmutableList.of( + new LogicalSegment() + { + @Override + public Interval getInterval() + { + return Intervals.of("2015/2016-08-01"); + } + + @Override + public Interval getTrueInterval() + { + return Intervals.of("2015/2016-08-01"); + } + }, + new LogicalSegment() + { + @Override + public Interval getInterval() + { + return Intervals.of("2016-08-01/2017"); + } + + @Override + public Interval getTrueInterval() + { + return Intervals.of("2016-08-01/2017"); + } + }, + new LogicalSegment() + { + @Override + public Interval getInterval() + { + return Intervals.of("2017/2017-08-01"); + } + + @Override + public Interval getTrueInterval() + { + return Intervals.of("2017/2018"); + } + }, + new LogicalSegment() + { + + @Override + public Interval getInterval() + { + return Intervals.of("2017-08-01/2017-08-02"); + } + + @Override + public Interval getTrueInterval() + { + return Intervals.of("2017-08-01/2017-08-02"); + } + }, + new LogicalSegment() + { + @Override + public Interval getInterval() + { + return Intervals.of("2017-08-02/2018"); + } + + @Override + public Interval getTrueInterval() + { + return Intervals.of("2017/2018"); + } + } + ) + ); + + final List expected = ImmutableList.of( + new LogicalSegment() + { + @Override + public Interval getInterval() + { + return Intervals.of("2017/2017-08-01"); + } + + @Override + public Interval getTrueInterval() + { + return Intervals.of("2017/2018"); + } + }, + new LogicalSegment() + { + + @Override + public Interval getInterval() + { + return Intervals.of("2017-08-01/2017-08-02"); + } + + @Override + public Interval getTrueInterval() + { + return Intervals.of("2017-08-01/2017-08-02"); + } + }, + new LogicalSegment() + { + @Override + public Interval getInterval() + { + return Intervals.of("2017-08-02/2018"); + } + + @Override + public Interval getTrueInterval() + { + return Intervals.of("2017/2018"); + } + } + ); + + Assert.assertEquals(expected.size(), segments.size()); + + for (int i = 0; i < expected.size(); i++) { + Assert.assertEquals(expected.get(i).getInterval(), segments.get(i).getInterval()); + Assert.assertEquals(expected.get(i).getTrueInterval(), segments.get(i).getTrueInterval()); + } + } + @Test public void testResultSerialization() { diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java index c841af498d0..f823937ef03 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryQueryToolChestTest.java @@ -39,6 +39,7 @@ import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; import org.apache.druid.query.spec.LegacySegmentSpec; import org.apache.druid.segment.column.ValueType; import org.apache.druid.timeline.LogicalSegment; +import org.joda.time.Interval; import org.joda.time.Period; import org.junit.Assert; import org.junit.Test; @@ -292,7 +293,20 @@ public class SegmentMetadataQueryQueryToolChestTest "2000-01-09/P1D" ) .stream() - .map(interval -> (LogicalSegment) () -> Intervals.of(interval)) + .map(interval -> new LogicalSegment() + { + @Override + public Interval getInterval() + { + return Intervals.of(interval); + } + + @Override + public Interval getTrueInterval() + { + return Intervals.of(interval); + } + }) .collect(Collectors.toList()) ); diff --git a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java index c5afad436d6..31e70d37664 100644 --- a/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/metadata/SegmentMetadataQueryTest.java @@ -923,6 +923,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2012-01-01/P1D"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -931,6 +937,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2012-01-01T01/PT1H"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -939,6 +951,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2013-01-05/P1D"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -947,6 +965,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2013-05-20/P1D"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -955,6 +979,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2014-01-05/P1D"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -963,6 +993,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2014-02-05/P1D"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -971,6 +1007,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2015-01-19T01/PT1H"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -979,6 +1021,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2015-01-20T02/PT1H"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } } ); @@ -998,6 +1046,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2015-01-19T01/PT1H"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -1006,6 +1060,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2015-01-20T02/PT1H"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } } ); @@ -1031,6 +1091,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2013-05-20/P1D"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -1039,6 +1105,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2014-01-05/P1D"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -1047,6 +1119,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2014-02-05/P1D"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -1055,6 +1133,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2015-01-19T01/PT1H"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } }, new LogicalSegment() { @@ -1063,6 +1147,12 @@ public class SegmentMetadataQueryTest { return Intervals.of("2015-01-20T02/PT1H"); } + + @Override + public Interval getTrueInterval() + { + return getInterval(); + } } ); diff --git a/processing/src/test/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChestTest.java b/processing/src/test/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChestTest.java index b62fb283c7f..6ab886fe084 100644 --- a/processing/src/test/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChestTest.java +++ b/processing/src/test/java/org/apache/druid/query/timeboundary/TimeBoundaryQueryQueryToolChestTest.java @@ -73,6 +73,11 @@ public class TimeBoundaryQueryQueryToolChestTest .build(); private static LogicalSegment createLogicalSegment(final Interval interval) + { + return createLogicalSegment(interval, interval); + } + + private static LogicalSegment createLogicalSegment(final Interval interval, final Interval trueInterval) { return new LogicalSegment() { @@ -81,6 +86,12 @@ public class TimeBoundaryQueryQueryToolChestTest { return interval; } + + @Override + public Interval getTrueInterval() + { + return trueInterval; + } }; } @@ -116,6 +127,35 @@ public class TimeBoundaryQueryQueryToolChestTest } } + @Test + public void testFilterOverlapingSegments() + { + final List actual = new TimeBoundaryQueryQueryToolChest().filterSegments( + TIME_BOUNDARY_QUERY, + Arrays.asList( + createLogicalSegment(Intervals.of("2015/2016-08-01")), + createLogicalSegment(Intervals.of("2016-08-01/2017")), + createLogicalSegment(Intervals.of("2017/2017-08-01"), Intervals.of("2017/2018")), + createLogicalSegment(Intervals.of("2017-08-01/2017-08-02")), + createLogicalSegment(Intervals.of("2017-08-02/2018"), Intervals.of("2017/2018")) + ) + ); + + final List expected = Arrays.asList( + createLogicalSegment(Intervals.of("2015/2016-08-01")), + createLogicalSegment(Intervals.of("2017/2017-08-01"), Intervals.of("2017/2018")), + createLogicalSegment(Intervals.of("2017-08-01/2017-08-02")), + createLogicalSegment(Intervals.of("2017-08-02/2018"), Intervals.of("2017/2018")) + ); + + Assert.assertEquals(expected.size(), actual.size()); + + for (int i = 0; i < actual.size(); i++) { + Assert.assertEquals(expected.get(i).getInterval(), actual.get(i).getInterval()); + Assert.assertEquals(expected.get(i).getTrueInterval(), actual.get(i).getTrueInterval()); + } + } + @Test public void testMaxTimeFilterSegments() { @@ -145,6 +185,62 @@ public class TimeBoundaryQueryQueryToolChestTest } } + @Test + public void testMaxTimeFilterOverlapingSegments() + { + final List actual = new TimeBoundaryQueryQueryToolChest().filterSegments( + MAXTIME_BOUNDARY_QUERY, + Arrays.asList( + createLogicalSegment(Intervals.of("2015/2016-08-01")), + createLogicalSegment(Intervals.of("2016-08-01/2017")), + createLogicalSegment(Intervals.of("2017/2017-08-01"), Intervals.of("2017/2018")), + createLogicalSegment(Intervals.of("2017-08-01/2017-08-02")), + createLogicalSegment(Intervals.of("2017-08-02/2018"), Intervals.of("2017/2018")) + ) + ); + + final List expected = Arrays.asList( + createLogicalSegment(Intervals.of("2017/2017-08-01"), Intervals.of("2017/2018")), + createLogicalSegment(Intervals.of("2017-08-01/2017-08-02")), + createLogicalSegment(Intervals.of("2017-08-02/2018"), Intervals.of("2017/2018")) + ); + + Assert.assertEquals(expected.size(), actual.size()); + + for (int i = 0; i < actual.size(); i++) { + Assert.assertEquals(expected.get(i).getInterval(), actual.get(i).getInterval()); + Assert.assertEquals(expected.get(i).getTrueInterval(), actual.get(i).getTrueInterval()); + } + } + + @Test + public void testMinTimeFilterOverlapingSegments() + { + final List actual = new TimeBoundaryQueryQueryToolChest().filterSegments( + MINTIME_BOUNDARY_QUERY, + Arrays.asList( + createLogicalSegment(Intervals.of("2017/2017-08-01"), Intervals.of("2017/2018")), + createLogicalSegment(Intervals.of("2017-08-01/2017-08-02")), + createLogicalSegment(Intervals.of("2017-08-02/2018"), Intervals.of("2017/2018")), + createLogicalSegment(Intervals.of("2018/2018-08-01")), + createLogicalSegment(Intervals.of("2018-08-01/2019")) + ) + ); + + final List expected = Arrays.asList( + createLogicalSegment(Intervals.of("2017/2017-08-01"), Intervals.of("2017/2018")), + createLogicalSegment(Intervals.of("2017-08-01/2017-08-02")), + createLogicalSegment(Intervals.of("2017-08-02/2018"), Intervals.of("2017/2018")) + ); + + Assert.assertEquals(expected.size(), actual.size()); + + for (int i = 0; i < actual.size(); i++) { + Assert.assertEquals(expected.get(i).getInterval(), actual.get(i).getInterval()); + Assert.assertEquals(expected.get(i).getTrueInterval(), actual.get(i).getTrueInterval()); + } + } + @Test public void testMinTimeFilterSegments() { @@ -192,6 +288,7 @@ public class TimeBoundaryQueryQueryToolChestTest Assert.assertEquals(7, segments.size()); } + @Test public void testCacheStrategy() throws Exception { From d42de574d66c6188ec10a3e0acd6a3d8694b9463 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Fri, 8 Feb 2019 11:05:59 -0800 Subject: [PATCH 05/25] Add an api to get all lookup specs (#7025) * Add an api to get all lookup specs * add doc --- docs/content/querying/lookups.md | 5 +- .../http/LookupCoordinatorResource.java | 50 +++++++++++--- .../http/LookupCoordinatorResourceTest.java | 65 +++++++++++++++++++ 3 files changed, 109 insertions(+), 11 deletions(-) diff --git a/docs/content/querying/lookups.md b/docs/content/querying/lookups.md index 7c7ad81df2f..14d9fe2813c 100644 --- a/docs/content/querying/lookups.md +++ b/docs/content/querying/lookups.md @@ -260,8 +260,11 @@ For example, a post to `/druid/coordinator/v1/lookups/config/realtime_customer1/ This will replace the `site_id_customer1` lookup in the `realtime_customer1` with the definition above. +## Get All Lookups +A `GET` to `/druid/coordinator/v1/lookups/config/all` will return all known lookup specs for all tiers. + ## Get Lookup -A `GET` to a particular lookup extractor factory is accomplished via `/druid/coordinator/v1/lookups/{tier}/{id}` +A `GET` to a particular lookup extractor factory is accomplished via `/druid/coordinator/v1/lookups/config/{tier}/{id}` Using the prior example, a `GET` to `/druid/coordinator/v1/lookups/config/realtime_customer2/site_id_customer2` should return diff --git a/server/src/main/java/org/apache/druid/server/http/LookupCoordinatorResource.java b/server/src/main/java/org/apache/druid/server/http/LookupCoordinatorResource.java index 90e56941ce7..b79acea7161 100644 --- a/server/src/main/java/org/apache/druid/server/http/LookupCoordinatorResource.java +++ b/server/src/main/java/org/apache/druid/server/http/LookupCoordinatorResource.java @@ -100,7 +100,8 @@ public class LookupCoordinatorResource if (discover) { return Response.ok().entity(lookupCoordinatorManager.discoverTiers()).build(); } - final Map> knownLookups = lookupCoordinatorManager.getKnownLookups(); + final Map> knownLookups = lookupCoordinatorManager + .getKnownLookups(); if (knownLookups == null) { return Response.status(Response.Status.NOT_FOUND).build(); } else { @@ -113,6 +114,26 @@ public class LookupCoordinatorResource } } + @GET + @Produces({MediaType.APPLICATION_JSON}) + @Path("/config/all") + public Response getAllLookupSpecs() + { + try { + final Map> knownLookups = lookupCoordinatorManager + .getKnownLookups(); + if (knownLookups == null) { + return Response.status(Response.Status.NOT_FOUND).build(); + } else { + return Response.ok().entity(knownLookups).build(); + } + } + catch (Exception ex) { + LOG.error(ex, "Error getting lookups status"); + return Response.serverError().entity(ServletResourceUtils.sanitizeException(ex)).build(); + } + } + @POST @Path("/config") @Produces({MediaType.APPLICATION_JSON, SmileMediaTypes.APPLICATION_JACKSON_SMILE}) @@ -314,14 +335,16 @@ public class LookupCoordinatorResource ) { try { - Map> configuredLookups = lookupCoordinatorManager.getKnownLookups(); + Map> configuredLookups = lookupCoordinatorManager + .getKnownLookups(); if (configuredLookups == null) { return Response.status(Response.Status.NOT_FOUND) .entity(ServletResourceUtils.jsonize("No lookups found")) .build(); } - Map> lookupsStateOnNodes = lookupCoordinatorManager.getLastKnownLookupsStateOnNodes(); + Map> lookupsStateOnNodes = lookupCoordinatorManager + .getLastKnownLookupsStateOnNodes(); Map> result = new HashMap<>(); @@ -362,7 +385,8 @@ public class LookupCoordinatorResource ) { try { - Map> configuredLookups = lookupCoordinatorManager.getKnownLookups(); + Map> configuredLookups = lookupCoordinatorManager + .getKnownLookups(); if (configuredLookups == null) { return Response.status(Response.Status.NOT_FOUND) .entity(ServletResourceUtils.jsonize("No lookups found")) @@ -380,7 +404,8 @@ public class LookupCoordinatorResource Map lookupStatusMap = new HashMap<>(); Collection hosts = lookupCoordinatorManager.discoverNodesInTier(tier); - Map> lookupsStateOnNodes = lookupCoordinatorManager.getLastKnownLookupsStateOnNodes(); + Map> lookupsStateOnNodes = lookupCoordinatorManager + .getLastKnownLookupsStateOnNodes(); for (Map.Entry lookupsEntry : tierLookups.entrySet()) { lookupStatusMap.put( @@ -407,7 +432,8 @@ public class LookupCoordinatorResource ) { try { - Map> configuredLookups = lookupCoordinatorManager.getKnownLookups(); + Map> configuredLookups = lookupCoordinatorManager + .getKnownLookups(); if (configuredLookups == null) { return Response.status(Response.Status.NOT_FOUND) .entity(ServletResourceUtils.jsonize("No lookups found")) @@ -486,7 +512,8 @@ public class LookupCoordinatorResource if (discover) { tiers = lookupCoordinatorManager.discoverTiers(); } else { - Map> configuredLookups = lookupCoordinatorManager.getKnownLookups(); + Map> configuredLookups = lookupCoordinatorManager + .getKnownLookups(); if (configuredLookups == null) { return Response.status(Response.Status.NOT_FOUND) .entity(ServletResourceUtils.jsonize("No lookups configured.")) @@ -495,7 +522,8 @@ public class LookupCoordinatorResource tiers = configuredLookups.keySet(); } - Map> lookupsStateOnHosts = lookupCoordinatorManager.getLastKnownLookupsStateOnNodes(); + Map> lookupsStateOnHosts = lookupCoordinatorManager + .getLastKnownLookupsStateOnNodes(); Map>> result = new HashMap<>(); @@ -531,7 +559,8 @@ public class LookupCoordinatorResource ) { try { - Map> lookupsStateOnHosts = lookupCoordinatorManager.getLastKnownLookupsStateOnNodes(); + Map> lookupsStateOnHosts = lookupCoordinatorManager + .getLastKnownLookupsStateOnNodes(); Map> tierNodesStatus = new HashMap<>(); @@ -563,7 +592,8 @@ public class LookupCoordinatorResource ) { try { - Map> lookupsStateOnHosts = lookupCoordinatorManager.getLastKnownLookupsStateOnNodes(); + Map> lookupsStateOnHosts = lookupCoordinatorManager + .getLastKnownLookupsStateOnNodes(); LookupsState lookupsState = lookupsStateOnHosts.get(hostAndPort); if (lookupsState == null) { diff --git a/server/src/test/java/org/apache/druid/server/http/LookupCoordinatorResourceTest.java b/server/src/test/java/org/apache/druid/server/http/LookupCoordinatorResourceTest.java index 621479b8a2e..17f3d46e11e 100644 --- a/server/src/test/java/org/apache/druid/server/http/LookupCoordinatorResourceTest.java +++ b/server/src/test/java/org/apache/druid/server/http/LookupCoordinatorResourceTest.java @@ -39,6 +39,7 @@ import org.junit.Test; import javax.servlet.http.HttpServletRequest; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; +import javax.ws.rs.core.Response.Status; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; @@ -1078,4 +1079,68 @@ public class LookupCoordinatorResourceTest EasyMock.verify(lookupCoordinatorManager); } + + @Test + public void testGetAllLookupSpecs() + { + final Map> lookups = ImmutableMap.of( + "tier1", + ImmutableMap.of( + "lookup1", + new LookupExtractorFactoryMapContainer( + "v0", + ImmutableMap.of("k1", "v2") + ), + "lookup2", + new LookupExtractorFactoryMapContainer( + "v1", + ImmutableMap.of("k", "v") + ) + ), + "tier2", + ImmutableMap.of( + "lookup1", + new LookupExtractorFactoryMapContainer( + "v0", + ImmutableMap.of("k1", "v2") + ) + ) + ); + final LookupCoordinatorManager lookupCoordinatorManager = EasyMock.createStrictMock( + LookupCoordinatorManager.class + ); + EasyMock.expect(lookupCoordinatorManager.getKnownLookups()) + .andReturn(lookups) + .once(); + EasyMock.replay(lookupCoordinatorManager); + final LookupCoordinatorResource lookupCoordinatorResource = new LookupCoordinatorResource( + lookupCoordinatorManager, + mapper, + mapper + ); + final Response response = lookupCoordinatorResource.getAllLookupSpecs(); + Assert.assertEquals(Status.OK.getStatusCode(), response.getStatus()); + Assert.assertEquals(lookups, response.getEntity()); + EasyMock.verify(lookupCoordinatorManager); + } + + @Test + public void testGetEmptyAllLookupSpecs() + { + final LookupCoordinatorManager lookupCoordinatorManager = EasyMock.createStrictMock( + LookupCoordinatorManager.class + ); + EasyMock.expect(lookupCoordinatorManager.getKnownLookups()) + .andReturn(null) + .once(); + EasyMock.replay(lookupCoordinatorManager); + final LookupCoordinatorResource lookupCoordinatorResource = new LookupCoordinatorResource( + lookupCoordinatorManager, + mapper, + mapper + ); + final Response response = lookupCoordinatorResource.getAllLookupSpecs(); + Assert.assertEquals(Status.NOT_FOUND.getStatusCode(), response.getStatus()); + EasyMock.verify(lookupCoordinatorManager); + } } From 16a4a50e9147a04dd47bf2eb897138197519b0bb Mon Sep 17 00:00:00 2001 From: Ankit Kothari Date: Fri, 8 Feb 2019 18:26:37 -0800 Subject: [PATCH 06/25] [Issue #6967] NoClassDefFoundError when using druid-hdfs-storage (#7015) * Fix: 1. hadoop-common dependency for druid-hdfs and druid-kerberos extensions Refactoring: 2. Hadoop config call in the inner static class to avoid class path conflicts for stopGracefully kill * Fix: 1. hadoop-common test dependency * Fix: 1. Avoid issue of kill command once the job is actually completed --- extensions-core/druid-kerberos/pom.xml | 1 + extensions-core/hdfs-storage/pom.xml | 138 +++++++++++++++++- indexing-hadoop/pom.xml | 32 ++-- .../indexing/common/task/HadoopIndexTask.java | 64 ++++---- 4 files changed, 185 insertions(+), 50 deletions(-) diff --git a/extensions-core/druid-kerberos/pom.xml b/extensions-core/druid-kerberos/pom.xml index 5b941012521..8740ab69b02 100644 --- a/extensions-core/druid-kerberos/pom.xml +++ b/extensions-core/druid-kerberos/pom.xml @@ -71,6 +71,7 @@ org.apache.hadoop hadoop-common ${hadoop.compile.version} + compile commons-cli diff --git a/extensions-core/hdfs-storage/pom.xml b/extensions-core/hdfs-storage/pom.xml index ec4f014db60..07d1876bcca 100644 --- a/extensions-core/hdfs-storage/pom.xml +++ b/extensions-core/hdfs-storage/pom.xml @@ -151,6 +151,130 @@ + + org.apache.hadoop + hadoop-common + ${hadoop.compile.version} + compile + + + commons-cli + commons-cli + + + commons-httpclient + commons-httpclient + + + log4j + log4j + + + commons-codec + commons-codec + + + commons-logging + commons-logging + + + commons-io + commons-io + + + commons-lang + commons-lang + + + org.apache.httpcomponents + httpclient + + + org.apache.httpcomponents + httpcore + + + org.codehaus.jackson + jackson-core-asl + + + org.codehaus.jackson + jackson-mapper-asl + + + org.apache.zookeeper + zookeeper + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + javax.ws.rs + jsr311-api + + + com.google.code.findbugs + jsr305 + + + org.mortbay.jetty + jetty-util + + + org.apache.hadoop + hadoop-annotations + + + com.google.protobuf + protobuf-java + + + com.sun.jersey + jersey-core + + + org.apache.curator + curator-client + + + org.apache.commons + commons-math3 + + + com.google.guava + guava + + + org.apache.avro + avro + + + net.java.dev.jets3t + jets3t + + + com.sun.jersey + jersey-json + + + com.jcraft + jsch + + + org.mortbay.jetty + jetty + + + com.sun.jersey + jersey-server + + + org.apache.hadoop hadoop-aws @@ -164,6 +288,13 @@ + + org.apache.hadoop + hadoop-common + ${hadoop.compile.version} + tests + test + junit junit @@ -189,13 +320,6 @@ tests test - - org.apache.hadoop - hadoop-common - ${hadoop.compile.version} - tests - test - org.apache.hadoop hadoop-hdfs diff --git a/indexing-hadoop/pom.xml b/indexing-hadoop/pom.xml index 2baf5226059..a762e21b255 100644 --- a/indexing-hadoop/pom.xml +++ b/indexing-hadoop/pom.xml @@ -83,6 +83,22 @@ com.google.code.findbugs jsr305 + + org.apache.hadoop + hadoop-common + provided + + + org.apache.hadoop + hadoop-mapreduce-client-core + provided + + + javax.servlet + servlet-api + + + @@ -130,22 +146,6 @@ ${hadoop.compile.version} test - - org.apache.hadoop - hadoop-common - provided - - - org.apache.hadoop - hadoop-mapreduce-client-core - provided - - - javax.servlet - servlet-api - - - org.apache.druid druid-server diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java index 398ed96a2f3..62c23e734a5 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/task/HadoopIndexTask.java @@ -430,21 +430,12 @@ public class HadoopIndexTask extends HadoopTask implements ChatHandler @Override public void stopGracefully(TaskConfig taskConfig) { - final ClassLoader oldLoader = Thread.currentThread().getContextClassLoader(); - File hadoopJobIdFile = new File(getHadoopJobIdFileName()); - String jobId = null; + // To avoid issue of kill command once the ingestion task is actually completed + if (!ingestionState.equals(IngestionState.COMPLETED)) { + final ClassLoader oldLoader = Thread.currentThread().getContextClassLoader(); + String hadoopJobIdFile = getHadoopJobIdFileName(); - try { - if (hadoopJobIdFile.exists()) { - jobId = HadoopDruidIndexerConfig.JSON_MAPPER.readValue(hadoopJobIdFile, String.class); - } - } - catch (Exception e) { - log.warn(e, "exeption while reading Hadoop Job ID from: %s", hadoopJobIdFile); - } - - try { - if (jobId != null) { + try { ClassLoader loader = HadoopTask.buildClassLoader(getHadoopDependencyCoordinates(), taskConfig.getDefaultHadoopCoordinates()); @@ -452,28 +443,28 @@ public class HadoopIndexTask extends HadoopTask implements ChatHandler "org.apache.druid.indexing.common.task.HadoopIndexTask$HadoopKillMRJobIdProcessingRunner", loader ); + String[] buildKillJobInput = new String[]{ - "-kill", - jobId + hadoopJobIdFile }; Class buildKillJobRunnerClass = killMRJobInnerProcessingRunner.getClass(); Method innerProcessingRunTask = buildKillJobRunnerClass.getMethod("runTask", buildKillJobInput.getClass()); Thread.currentThread().setContextClassLoader(loader); - final String killStatusString = (String) innerProcessingRunTask.invoke( + final String killStatusString[] = (String[]) innerProcessingRunTask.invoke( killMRJobInnerProcessingRunner, new Object[]{buildKillJobInput} ); - log.info(StringUtils.format("Tried killing job %s , status: %s", jobId, killStatusString)); + log.info(StringUtils.format("Tried killing job: [%s], status: [%s]", killStatusString[0], killStatusString[1])); + } + catch (Exception e) { + throw new RuntimeException(e); + } + finally { + Thread.currentThread().setContextClassLoader(oldLoader); } - } - catch (Exception e) { - throw new RuntimeException(e); - } - finally { - Thread.currentThread().setContextClassLoader(oldLoader); } } @@ -722,10 +713,29 @@ public class HadoopIndexTask extends HadoopTask implements ChatHandler @SuppressWarnings("unused") public static class HadoopKillMRJobIdProcessingRunner { - public String runTask(String[] args) throws Exception + public String[] runTask(String[] args) throws Exception { - int res = ToolRunner.run(new JobClient(), args); - return res == 0 ? "Success" : "Fail"; + File hadoopJobIdFile = new File(args[0]); + String jobId = null; + + try { + if (hadoopJobIdFile.exists()) { + jobId = HadoopDruidIndexerConfig.JSON_MAPPER.readValue(hadoopJobIdFile, String.class); + } + } + catch (Exception e) { + log.warn(e, "exeption while reading hadoop job id from: [%s]", hadoopJobIdFile); + } + + if (jobId != null) { + int res = ToolRunner.run(new JobClient(), new String[]{ + "-kill", + jobId + }); + + return new String[] {jobId, (res == 0 ? "Success" : "Fail")}; + } + return new String[] {jobId, "Fail"}; } } From 02ef14f262c9ddceb3c8c9a4fcc44da078b67727 Mon Sep 17 00:00:00 2001 From: Surekha Date: Mon, 11 Feb 2019 16:21:19 -0800 Subject: [PATCH 07/25] Fix num_rows in sys.segments (#6888) * Fix the bug with num_rows in sys.segments * Fix segmentMetadataInfo update in DruidSchema * Add numRows to SegmentMetadataHolder builder's constructor, so it's not overwritten * Rename SegSegmentSignature to setSegmentMetadataHolder and fix it so nested map is appended instead of recreated * Replace Map> segmentServerMap with Set for num_replica * Remove unnecessary code and update test * Add unit test for num_rows * PR comments * change access modifier to default package level * minor changes to comments * PR comments --- .../druid/sql/calcite/schema/DruidSchema.java | 53 +++++++------ .../calcite/schema/SegmentMetadataHolder.java | 45 +++++++---- .../sql/calcite/schema/SystemSchema.java | 2 +- .../sql/calcite/schema/DruidSchemaTest.java | 77 ++++++++++++++++++- .../sql/calcite/schema/SystemSchemaTest.java | 22 +++++- .../calcite/util/TestServerInventoryView.java | 12 ++- 6 files changed, 162 insertions(+), 49 deletions(-) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java index 2929229b4c5..4aaa95e12a6 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/DruidSchema.java @@ -19,6 +19,7 @@ package org.apache.druid.sql.calcite.schema; +import com.amazonaws.annotation.GuardedBy; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Predicates; @@ -63,7 +64,6 @@ import org.apache.druid.sql.calcite.table.RowSignature; import org.apache.druid.sql.calcite.view.DruidViewMacro; import org.apache.druid.sql.calcite.view.ViewManager; import org.apache.druid.timeline.DataSegment; -import org.apache.druid.timeline.SegmentId; import java.io.IOException; import java.util.Comparator; @@ -95,8 +95,9 @@ public class DruidSchema extends AbstractSchema private static final EmittingLogger log = new EmittingLogger(DruidSchema.class); private static final int MAX_SEGMENTS_PER_QUERY = 15000; - private static final long IS_PUBLISHED = 0; - private static final long IS_AVAILABLE = 1; + private static final long DEFAULT_IS_PUBLISHED = 0; + private static final long DEFAULT_IS_AVAILABLE = 1; + private static final long DEFAULT_NUM_ROWS = 0; private final QueryLifecycleFactory queryLifecycleFactory; private final PlannerConfig config; @@ -107,12 +108,12 @@ public class DruidSchema extends AbstractSchema // For awaitInitialization. private final CountDownLatch initialized = new CountDownLatch(1); - // Protects access to segmentSignatures, mutableSegments, segmentsNeedingRefresh, lastRefresh, isServerViewInitialized + // Protects access to segmentSignatures, mutableSegments, segmentsNeedingRefresh, lastRefresh, isServerViewInitialized, segmentMetadata private final Object lock = new Object(); // DataSource -> Segment -> SegmentMetadataHolder(contains RowSignature) for that segment. // Use TreeMap for segments so they are merged in deterministic order, from older to newer. - // This data structure need to be accessed in a thread-safe way since SystemSchema accesses it + @GuardedBy("lock") private final Map> segmentMetadataInfo = new HashMap<>(); private int totalSegments = 0; @@ -351,7 +352,8 @@ public class DruidSchema extends AbstractSchema return builder.build(); } - private void addSegment(final DruidServerMetadata server, final DataSegment segment) + @VisibleForTesting + void addSegment(final DruidServerMetadata server, final DataSegment segment) { synchronized (lock) { final Map knownSegments = segmentMetadataInfo.get(segment.getDataSource()); @@ -360,16 +362,18 @@ public class DruidSchema extends AbstractSchema // segmentReplicatable is used to determine if segments are served by realtime servers or not final long isRealtime = server.segmentReplicatable() ? 0 : 1; - final Map> serverSegmentMap = ImmutableMap.of( + final Set servers = ImmutableSet.of(server.getName()); + holder = SegmentMetadataHolder.builder( segment.getId(), - ImmutableSet.of(server.getName()) - ); - - holder = SegmentMetadataHolder - .builder(segment.getId(), IS_PUBLISHED, IS_AVAILABLE, isRealtime, serverSegmentMap) - .build(); + DEFAULT_IS_PUBLISHED, + DEFAULT_IS_AVAILABLE, + isRealtime, + servers, + null, + DEFAULT_NUM_ROWS + ).build(); // Unknown segment. - setSegmentSignature(segment, holder); + setSegmentMetadataHolder(segment, holder); segmentsNeedingRefresh.add(segment); if (!server.segmentReplicatable()) { log.debug("Added new mutable segment[%s].", segment.getId()); @@ -378,14 +382,14 @@ public class DruidSchema extends AbstractSchema log.debug("Added new immutable segment[%s].", segment.getId()); } } else { - final Map> segmentServerMap = holder.getReplicas(); + final Set segmentServers = holder.getReplicas(); final ImmutableSet servers = new ImmutableSet.Builder() - .addAll(segmentServerMap.get(segment.getId())) + .addAll(segmentServers) .add(server.getName()) .build(); final SegmentMetadataHolder holderWithNumReplicas = SegmentMetadataHolder .from(holder) - .withReplicas(ImmutableMap.of(segment.getId(), servers)) + .withReplicas(servers) .build(); knownSegments.put(segment, holderWithNumReplicas); if (server.segmentReplicatable()) { @@ -404,7 +408,7 @@ public class DruidSchema extends AbstractSchema } @VisibleForTesting - protected void removeSegment(final DataSegment segment) + void removeSegment(final DataSegment segment) { synchronized (lock) { log.debug("Segment[%s] is gone.", segment.getId()); @@ -435,13 +439,13 @@ public class DruidSchema extends AbstractSchema log.debug("Segment[%s] is gone from server[%s]", segment.getId(), server.getName()); final Map knownSegments = segmentMetadataInfo.get(segment.getDataSource()); final SegmentMetadataHolder holder = knownSegments.get(segment); - final Map> segmentServerMap = holder.getReplicas(); - final ImmutableSet servers = FluentIterable.from(segmentServerMap.get(segment.getId())) + final Set segmentServers = holder.getReplicas(); + final ImmutableSet servers = FluentIterable.from(segmentServers) .filter(Predicates.not(Predicates.equalTo(server.getName()))) .toSet(); final SegmentMetadataHolder holderWithNumReplicas = SegmentMetadataHolder .from(holder) - .withReplicas(ImmutableMap.of(segment.getId(), servers)) + .withReplicas(servers) .build(); knownSegments.put(segment, holderWithNumReplicas); lock.notifyAll(); @@ -453,7 +457,7 @@ public class DruidSchema extends AbstractSchema * which may be a subset of the asked-for set. */ @VisibleForTesting - protected Set refreshSegments(final Set segments) throws IOException + Set refreshSegments(final Set segments) throws IOException { final Set retVal = new HashSet<>(); @@ -525,7 +529,7 @@ public class DruidSchema extends AbstractSchema .withNumRows(analysis.getNumRows()) .build(); dataSourceSegments.put(segment, updatedHolder); - setSegmentSignature(segment, updatedHolder); + setSegmentMetadataHolder(segment, updatedHolder); retVal.add(segment); } } @@ -550,7 +554,8 @@ public class DruidSchema extends AbstractSchema return retVal; } - private void setSegmentSignature(final DataSegment segment, final SegmentMetadataHolder segmentMetadataHolder) + @VisibleForTesting + void setSegmentMetadataHolder(final DataSegment segment, final SegmentMetadataHolder segmentMetadataHolder) { synchronized (lock) { TreeMap dataSourceSegments = segmentMetadataInfo.computeIfAbsent( diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataHolder.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataHolder.java index f2d5ab313b5..38ff92858ec 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataHolder.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataHolder.java @@ -23,7 +23,6 @@ import org.apache.druid.sql.calcite.table.RowSignature; import org.apache.druid.timeline.SegmentId; import javax.annotation.Nullable; -import java.util.Map; import java.util.Set; /** @@ -36,15 +35,25 @@ public class SegmentMetadataHolder long isPublished, long isAvailable, long isRealtime, - Map> segmentServerMap + Set segmentServers, + RowSignature rowSignature, + long numRows ) { - return new Builder(segmentId, isPublished, isAvailable, isRealtime, segmentServerMap); + return new Builder(segmentId, isPublished, isAvailable, isRealtime, segmentServers, rowSignature, numRows); } public static Builder from(SegmentMetadataHolder h) { - return new Builder(h.getSegmentId(), h.isPublished(), h.isAvailable(), h.isRealtime(), h.getReplicas()); + return new Builder( + h.getSegmentId(), + h.isPublished(), + h.isAvailable(), + h.isRealtime(), + h.getReplicas(), + h.getRowSignature(), + h.getNumRows() + ); } private final SegmentId segmentId; @@ -54,8 +63,8 @@ public class SegmentMetadataHolder private final long isPublished; private final long isAvailable; private final long isRealtime; - //segmentId -> set of servers that contain the segment - private final Map> segmentServerMap; + // set of servers that contain the segment + private final Set segmentServers; private final long numRows; @Nullable private final RowSignature rowSignature; @@ -66,7 +75,7 @@ public class SegmentMetadataHolder this.isPublished = builder.isPublished; this.isAvailable = builder.isAvailable; this.isRealtime = builder.isRealtime; - this.segmentServerMap = builder.segmentServerMap; + this.segmentServers = builder.segmentServers; this.numRows = builder.numRows; this.segmentId = builder.segmentId; } @@ -91,14 +100,14 @@ public class SegmentMetadataHolder return segmentId; } - public Map> getReplicas() + public Set getReplicas() { - return segmentServerMap; + return segmentServers; } - public long getNumReplicas(SegmentId segmentId) + public long getNumReplicas() { - return segmentServerMap.get(segmentId).size(); + return segmentServers.size(); } public long getNumRows() @@ -119,7 +128,7 @@ public class SegmentMetadataHolder private final long isAvailable; private final long isRealtime; - private Map> segmentServerMap; + private Set segmentServers; @Nullable private RowSignature rowSignature; private long numRows; @@ -129,14 +138,18 @@ public class SegmentMetadataHolder long isPublished, long isAvailable, long isRealtime, - Map> segmentServerMap + Set servers, + RowSignature rowSignature, + long numRows ) { this.segmentId = segmentId; this.isPublished = isPublished; this.isAvailable = isAvailable; this.isRealtime = isRealtime; - this.segmentServerMap = segmentServerMap; + this.segmentServers = servers; + this.rowSignature = rowSignature; + this.numRows = numRows; } public Builder withRowSignature(RowSignature rowSignature) @@ -151,9 +164,9 @@ public class SegmentMetadataHolder return this; } - public Builder withReplicas(Map> segmentServerMap) + public Builder withReplicas(Set servers) { - this.segmentServerMap = segmentServerMap; + this.segmentServers = servers; return this; } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java index d0599f86190..e895113f850 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/schema/SystemSchema.java @@ -224,7 +224,7 @@ public class SystemSchema extends AbstractSchema Maps.newHashMapWithExpectedSize(druidSchema.getTotalSegments()); for (SegmentMetadataHolder h : availableSegmentMetadata.values()) { PartialSegmentData partialSegmentData = - new PartialSegmentData(h.isAvailable(), h.isRealtime(), h.getNumReplicas(h.getSegmentId()), h.getNumRows()); + new PartialSegmentData(h.isAvailable(), h.isRealtime(), h.getNumReplicas(), h.getNumRows()); partialSegmentDataMap.put(h.getSegmentId(), partialSegmentData); } diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/schema/DruidSchemaTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/schema/DruidSchemaTest.java index 30b99e59ab4..e707c40aeb5 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/schema/DruidSchemaTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/schema/DruidSchemaTest.java @@ -27,6 +27,8 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.schema.Table; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.druid.client.ImmutableDruidServer; +import org.apache.druid.client.TimelineServerView; import org.apache.druid.data.input.InputRow; import org.apache.druid.java.util.common.Intervals; import org.apache.druid.java.util.common.Pair; @@ -40,6 +42,7 @@ import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.incremental.IncrementalIndexSchema; import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory; +import org.apache.druid.server.coordination.DruidServerMetadata; import org.apache.druid.server.security.NoopEscalator; import org.apache.druid.sql.calcite.planner.PlannerConfig; import org.apache.druid.sql.calcite.table.DruidTable; @@ -84,6 +87,8 @@ public class DruidSchemaTest extends CalciteTestBase private static QueryRunnerFactoryConglomerate conglomerate; private static Closer resourceCloser; + private List druidServers; + @BeforeClass public static void setUpClass() { @@ -163,10 +168,12 @@ public class DruidSchemaTest extends CalciteTestBase index2 ); + final TimelineServerView serverView = new TestServerInventoryView(walker.getSegments()); + druidServers = serverView.getDruidServers(); schema = new DruidSchema( CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), - new TestServerInventoryView(walker.getSegments()), + serverView, PLANNER_CONFIG_DEFAULT, new NoopViewManager(), new NoopEscalator() @@ -239,6 +246,62 @@ public class DruidSchemaTest extends CalciteTestBase Assert.assertEquals(SqlTypeName.BIGINT, fields.get(2).getType().getSqlTypeName()); } + /** + * This tests that {@link SegmentMetadataHolder#getNumRows()} is correct in case + * of multiple replicas i.e. when {@link DruidSchema#addSegment(DruidServerMetadata, DataSegment)} + * is called more than once for same segment + */ + @Test + public void testSegmentMetadataHolderNumRows() + { + Map segmentsMetadata = schema.getSegmentMetadata(); + final Set segments = segmentsMetadata.keySet(); + Assert.assertEquals(3, segments.size()); + // find the only segment with datasource "foo2" + final DataSegment existingSegment = segments.stream() + .filter(segment -> segment.getDataSource().equals("foo2")) + .findFirst() + .orElse(null); + Assert.assertNotNull(existingSegment); + final SegmentMetadataHolder existingHolder = segmentsMetadata.get(existingSegment); + // update SegmentMetadataHolder of existingSegment with numRows=5 + SegmentMetadataHolder updatedHolder = SegmentMetadataHolder.from(existingHolder).withNumRows(5).build(); + schema.setSegmentMetadataHolder(existingSegment, updatedHolder); + // find a druidServer holding existingSegment + final Pair pair = druidServers.stream() + .flatMap(druidServer -> druidServer.getSegments() + .stream() + .filter(segment -> segment + .equals( + existingSegment)) + .map(segment -> Pair + .of( + druidServer, + segment + ))) + .findAny() + .orElse(null); + Assert.assertNotNull(pair); + final ImmutableDruidServer server = pair.lhs; + Assert.assertNotNull(server); + final DruidServerMetadata druidServerMetadata = server.getMetadata(); + // invoke DruidSchema#addSegment on existingSegment + schema.addSegment(druidServerMetadata, existingSegment); + segmentsMetadata = schema.getSegmentMetadata(); + // get the only segment with datasource "foo2" + final DataSegment currentSegment = segments.stream() + .filter(segment -> segment.getDataSource().equals("foo2")) + .findFirst() + .orElse(null); + final SegmentMetadataHolder currentHolder = segmentsMetadata.get(currentSegment); + Assert.assertEquals(updatedHolder.getSegmentId(), currentHolder.getSegmentId()); + Assert.assertEquals(updatedHolder.getNumRows(), currentHolder.getNumRows()); + // numreplicas do not change here since we addSegment with the same server which was serving existingSegment before + Assert.assertEquals(updatedHolder.getNumReplicas(), currentHolder.getNumReplicas()); + Assert.assertEquals(updatedHolder.isAvailable(), currentHolder.isAvailable()); + Assert.assertEquals(updatedHolder.isPublished(), currentHolder.isPublished()); + } + @Test public void testNullDatasource() throws IOException { @@ -247,7 +310,10 @@ public class DruidSchemaTest extends CalciteTestBase Assert.assertEquals(segments.size(), 3); // segments contains two segments with datasource "foo" and one with datasource "foo2" // let's remove the only segment with datasource "foo2" - final DataSegment segmentToRemove = segments.stream().filter(segment -> segment.getDataSource().equals("foo2")).findFirst().orElse(null); + final DataSegment segmentToRemove = segments.stream() + .filter(segment -> segment.getDataSource().equals("foo2")) + .findFirst() + .orElse(null); Assert.assertFalse(segmentToRemove == null); schema.removeSegment(segmentToRemove); schema.refreshSegments(segments); // can cause NPE without dataSourceSegments null check in DruidSchema#refreshSegmentsForDataSource @@ -262,8 +328,11 @@ public class DruidSchemaTest extends CalciteTestBase Map segmentMetadatas = schema.getSegmentMetadata(); Set segments = segmentMetadatas.keySet(); Assert.assertEquals(segments.size(), 3); - //remove one of the segments with datasource "foo" - final DataSegment segmentToRemove = segments.stream().filter(segment -> segment.getDataSource().equals("foo")).findFirst().orElse(null); + // remove one of the segments with datasource "foo" + final DataSegment segmentToRemove = segments.stream() + .filter(segment -> segment.getDataSource().equals("foo")) + .findFirst() + .orElse(null); Assert.assertFalse(segmentToRemove == null); schema.removeSegment(segmentToRemove); schema.refreshSegments(segments); // can cause NPE without holder null check in SegmentMetadataHolder#from diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java index 7d8cdaad572..d354f141d9a 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/schema/SystemSchemaTest.java @@ -118,6 +118,11 @@ public class SystemSchemaTest extends CalciteTestBase CalciteTests.createRow(ImmutableMap.of("t", "2001-01-03", "m1", "6.0")) ); + private static final List ROWS3 = ImmutableList.of( + CalciteTests.createRow(ImmutableMap.of("t", "2001-01-01", "m1", "7.0", "dim3", ImmutableList.of("x"))), + CalciteTests.createRow(ImmutableMap.of("t", "2001-01-02", "m1", "8.0", "dim3", ImmutableList.of("xyz"))) + ); + private SystemSchema schema; private SpecificSegmentsQuerySegmentWalker walker; private DruidLeaderClient client; @@ -204,11 +209,22 @@ public class SystemSchemaTest extends CalciteTestBase ) .rows(ROWS2) .buildMMappedIndex(); + final QueryableIndex index3 = IndexBuilder.create() + .tmpDir(new File(tmpDir, "3")) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withMetrics(new LongSumAggregatorFactory("m1", "m1")) + .withRollup(false) + .build() + ) + .rows(ROWS3) + .buildMMappedIndex(); walker = new SpecificSegmentsQuerySegmentWalker(conglomerate) .add(segment1, index1) .add(segment2, index2) - .add(segment3, index2); + .add(segment3, index3); druidSchema = new DruidSchema( CalciteTests.createMockQueryLifecycleFactory(walker, conglomerate), @@ -469,7 +485,7 @@ public class SystemSchemaTest extends CalciteTestBase 100L, 2L, //partition_num 1L, //num_replicas - 3L, //numRows + 2L, //numRows 0L, //is_published 1L, //is_available 0L //is_realtime @@ -481,7 +497,7 @@ public class SystemSchemaTest extends CalciteTestBase 100L, 0L, //partition_num 1L, //num_replicas - 0L, //numRows = 3 + 0L, //numRows 0L, //is_published 1L, //is_available 1L //is_realtime diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestServerInventoryView.java b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestServerInventoryView.java index 6718b1bd1f8..2dcc5695983 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/util/TestServerInventoryView.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/util/TestServerInventoryView.java @@ -20,7 +20,9 @@ package org.apache.druid.sql.calcite.util; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import org.apache.druid.client.DruidServer; +import org.apache.druid.client.ImmutableDruidDataSource; import org.apache.druid.client.ImmutableDruidServer; import org.apache.druid.client.TimelineServerView; import org.apache.druid.client.selector.ServerSelector; @@ -33,6 +35,7 @@ import org.apache.druid.timeline.TimelineLookup; import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.Executor; @@ -83,7 +86,14 @@ public class TestServerInventoryView implements TimelineServerView @Override public List getDruidServers() { - throw new UnsupportedOperationException(); + final ImmutableDruidDataSource dataSource = new ImmutableDruidDataSource("DUMMY", Collections.emptyMap(), segments); + final ImmutableDruidServer server = new ImmutableDruidServer( + DUMMY_SERVER, + 0L, + ImmutableMap.of("src", dataSource), + 1 + ); + return ImmutableList.of(server); } @Override From 8ba11591b647fb2763bb6000fe2871aa9928fd38 Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Mon, 11 Feb 2019 18:33:18 -0800 Subject: [PATCH 08/25] Add router conf to assembly.xml (#7051) --- distribution/src/assembly/assembly.xml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/distribution/src/assembly/assembly.xml b/distribution/src/assembly/assembly.xml index ab3e5e0d787..f972970eebd 100644 --- a/distribution/src/assembly/assembly.xml +++ b/distribution/src/assembly/assembly.xml @@ -112,6 +112,13 @@ quickstart/tutorial/conf/druid/middleManager + + ../examples/quickstart/tutorial/conf/druid/router + + * + + quickstart/tutorial/conf/druid/router + ../examples/quickstart/tutorial/conf/tranquility @@ -199,6 +206,13 @@ conf/druid/middleManager + + ../examples/conf/druid/router + + * + + conf/druid/router + ../examples/conf/tranquility From d0abf5c20a4c7e3a8f5ade03ca4efec2bc742094 Mon Sep 17 00:00:00 2001 From: Mingming Qiu Date: Wed, 13 Feb 2019 05:24:28 +0800 Subject: [PATCH 09/25] fix kafka index task doesn't resume when recieve duplicate request (#6990) * fix kafka index task doesn't resume when recieve duplicate request * add unit test --- .../indexing/kafka/KafkaIndexTaskTest.java | 43 +++++++++++++++++++ .../SeekableStreamIndexTaskRunner.java | 1 + 2 files changed, 44 insertions(+) diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java index 04d3802a66c..b7b389668a4 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -2127,6 +2127,49 @@ public class KafkaIndexTaskTest Assert.assertEquals(ImmutableList.of("d", "e"), readSegmentColumn("dim1", desc2)); } + @Test(timeout = 60_000L) + public void testRunWithDuplicateRequest() throws Exception + { + // Insert data + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + for (ProducerRecord record : records) { + kafkaProducer.send(record).get(); + } + } + + final KafkaIndexTask task = createTask( + null, + new KafkaIndexTaskIOConfig( + 0, + "sequence0", + new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 200L)), + new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 500L)), + kafkaServer.consumerProperties(), + KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, + true, + null, + null, + false + ) + ); + + runTask(task); + + while (!task.getRunner().getStatus().equals(Status.READING)) { + Thread.sleep(20); + } + + // first setEndOffsets request + task.getRunner().pause(); + task.getRunner().setEndOffsets(ImmutableMap.of(0, 500L), true); + Assert.assertEquals(Status.READING, task.getRunner().getStatus()); + + // duplicate setEndOffsets request + task.getRunner().pause(); + task.getRunner().setEndOffsets(ImmutableMap.of(0, 500L), true); + Assert.assertEquals(Status.READING, task.getRunner().getStatus()); + } + private ListenableFuture runTask(final Task task) { try { diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java index 857264965f5..c86a2b509c0 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskRunner.java @@ -1403,6 +1403,7 @@ public abstract class SeekableStreamIndexTaskRunner Date: Wed, 13 Feb 2019 11:20:52 -0800 Subject: [PATCH 10/25] Fix and improve doc for partitioning of local index (#7064) --- docs/content/ingestion/index.md | 4 ++-- docs/content/ingestion/native_tasks.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/content/ingestion/index.md b/docs/content/ingestion/index.md index db1edfa0172..e4b6b70dc02 100644 --- a/docs/content/ingestion/index.md +++ b/docs/content/ingestion/index.md @@ -178,7 +178,7 @@ the best one for your situation. |Method|How it works|Can append and overwrite?|Can handle late data?|Exactly-once ingestion?|Real-time queries?| |------|------------|-------------------------|---------------------|-----------------------|------------------| |[Native batch](native_tasks.html)|Druid loads data directly from S3, HTTP, NFS, or other networked storage.|Append or overwrite|Yes|Yes|No| -|[Hadoop](hadoop.html)|Druid launches Hadoop Map/Reduce jobs to load data files.|Append or overwrite|Yes|Yes|No| +|[Hadoop](hadoop.html)|Druid launches Hadoop Map/Reduce jobs to load data files.|Overwrite|Yes|Yes|No| |[Kafka indexing service](../development/extensions-core/kafka-ingestion.html)|Druid reads directly from Kafka.|Append only|Yes|Yes|Yes| |[Tranquility](stream-push.html)|You use Tranquility, a client side library, to push individual records into Druid.|Append only|No - late data is dropped|No - may drop or duplicate data|Yes| @@ -191,7 +191,7 @@ a _time chunk_, and each time chunk contains one or more [segments](../design/se particular time chunk may be partitioned further using options that vary based on the ingestion method you have chosen. * With [Hadoop](hadoop.html) you can do hash- or range-based partitioning on one or more columns. - * With [Native batch](native_tasks.html) you can partition on a hash of all dimension columns. This is useful when + * With [Native batch](native_tasks.html) you can partition on a hash of dimension columns. This is useful when rollup is enabled, since it maximizes your space savings. * With [Kafka indexing](../development/extensions-core/kafka-ingestion.html), partitioning is based on Kafka partitions, and is not configurable through Druid. You can configure it on the Kafka side by using the partitioning diff --git a/docs/content/ingestion/native_tasks.md b/docs/content/ingestion/native_tasks.md index e5b2e7d2871..5f7298363a9 100644 --- a/docs/content/ingestion/native_tasks.md +++ b/docs/content/ingestion/native_tasks.md @@ -500,7 +500,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon |indexSpec|defines segment storage format options to be used at indexing time, see [IndexSpec](#indexspec)|null|no| |maxPendingPersists|Maximum number of persists that can be pending but not started. If this limit would be exceeded by a new intermediate persist, ingestion will block until the currently-running persist finishes. Maximum heap memory usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists).|0 (meaning one persist can be running concurrently with ingestion, and none can be queued up)|no| |forceExtendableShardSpecs|Forces use of extendable shardSpecs. Experimental feature intended for use with the [Kafka indexing service extension](../development/extensions-core/kafka-ingestion.html).|false|no| -|forceGuaranteedRollup|Forces guaranteeing the [perfect rollup](../ingestion/index.html#roll-up-modes). The perfect rollup optimizes the total size of generated segments and querying time while indexing time will be increased. This flag cannot be used with either `appendToExisting` of IOConfig or `forceExtendableShardSpecs`. For more details, see the below __Segment pushing modes__ section.|false|no| +|forceGuaranteedRollup|Forces guaranteeing the [perfect rollup](../ingestion/index.html#roll-up-modes). The perfect rollup optimizes the total size of generated segments and querying time while indexing time will be increased. If this is set to true, the index task will read the entire input data twice: one for finding the optimal number of partitions per time chunk and one for generating segments. Note that the result segments would be hash-partitioned. You can set `forceExtendableShardSpecs` if you plan to append more data to the same time range in the future. This flag cannot be used with `appendToExisting` of IOConfig. For more details, see the below __Segment pushing modes__ section.|false|no| |reportParseExceptions|DEPRECATED. If true, exceptions encountered during parsing will be thrown and will halt ingestion; if false, unparseable rows and fields will be skipped. Setting `reportParseExceptions` to true will override existing configurations for `maxParseExceptions` and `maxSavedParseExceptions`, setting `maxParseExceptions` to 0 and limiting `maxSavedParseExceptions` to no more than 1.|false|no| |pushTimeout|Milliseconds to wait for pushing segments. It must be >= 0, where 0 means to wait forever.|0|no| |segmentWriteOutMediumFactory|Segment write-out medium to use when creating segments. See [SegmentWriteOutMediumFactory](#segmentWriteOutMediumFactory).|Not specified, the value from `druid.peon.defaultSegmentWriteOutMediumFactory.type` is used|no| From 1701fbcad3430a3e6e19134e870a6059864c8a80 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Wed, 13 Feb 2019 11:22:48 -0800 Subject: [PATCH 11/25] Improve error message for revoked locks (#7035) * Improve error message for revoked locks * fix test * fix test * fix test * fix toString --- .../SegmentTransactionalInsertAction.java | 7 +- .../SegmentTransactionalInsertActionTest.java | 8 +- .../indexing/common/task/IndexTaskTest.java | 5 +- ...TestIndexerMetadataStorageCoordinator.java | 2 +- .../overlord/SegmentPublishResult.java | 31 ++++++-- .../IndexerSQLMetadataStorageCoordinator.java | 4 +- .../appenderator/BaseAppenderatorDriver.java | 27 +++++-- .../overlord/SegmentPublishResultTest.java | 79 +++++++++++++++++++ ...exerSQLMetadataStorageCoordinatorTest.java | 18 ++--- .../BatchAppenderatorDriverTest.java | 2 +- .../StreamAppenderatorDriverFailTest.java | 2 +- .../StreamAppenderatorDriverTest.java | 7 +- 12 files changed, 154 insertions(+), 38 deletions(-) create mode 100644 server/src/test/java/org/apache/druid/indexing/overlord/SegmentPublishResultTest.java diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertAction.java b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertAction.java index 8a3c713fdb7..0af850e0fcd 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertAction.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertAction.java @@ -118,7 +118,12 @@ public class SegmentTransactionalInsertAction implements TaskAction SegmentPublishResult.fail( + "Invalid task locks. Maybe they are revoked by a higher priority task." + + " Please check the overlord log for details." + ) + ) .build() ); } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertActionTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertActionTest.java index e152995cc91..463916f9567 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertActionTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/actions/SegmentTransactionalInsertActionTest.java @@ -101,7 +101,7 @@ public class SegmentTransactionalInsertActionTest task, actionTestKit.getTaskActionToolbox() ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(SEGMENT1), true), result1); + Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(SEGMENT1)), result1); SegmentPublishResult result2 = new SegmentTransactionalInsertAction( ImmutableSet.of(SEGMENT2), @@ -111,7 +111,7 @@ public class SegmentTransactionalInsertActionTest task, actionTestKit.getTaskActionToolbox() ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(SEGMENT2), true), result2); + Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(SEGMENT2)), result2); Assert.assertEquals( ImmutableSet.of(SEGMENT1, SEGMENT2), @@ -143,7 +143,7 @@ public class SegmentTransactionalInsertActionTest actionTestKit.getTaskActionToolbox() ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(), false), result); + Assert.assertEquals(SegmentPublishResult.fail("java.lang.RuntimeException: Aborting transaction!"), result); } @Test @@ -157,6 +157,6 @@ public class SegmentTransactionalInsertActionTest thrown.expect(IllegalStateException.class); thrown.expectMessage(CoreMatchers.containsString("are not covered by locks")); SegmentPublishResult result = action.perform(task, actionTestKit.getTaskActionToolbox()); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(SEGMENT3), true), result); + Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(SEGMENT3)), result); } } diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java index 746b0c992ae..ef7dbce7b8d 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/common/task/IndexTaskTest.java @@ -1574,10 +1574,7 @@ public class IndexTaskTest } if (taskAction instanceof SegmentTransactionalInsertAction) { - return (RetType) new SegmentPublishResult( - ((SegmentTransactionalInsertAction) taskAction).getSegments(), - true - ); + return (RetType) SegmentPublishResult.ok(((SegmentTransactionalInsertAction) taskAction).getSegments()); } if (taskAction instanceof SegmentAllocateAction) { diff --git a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java index 0424cbf3cde..0eeecd5375b 100644 --- a/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java +++ b/indexing-service/src/test/java/org/apache/druid/indexing/test/TestIndexerMetadataStorageCoordinator.java @@ -116,7 +116,7 @@ public class TestIndexerMetadataStorageCoordinator implements IndexerMetadataSto ) { // Don't actually compare metadata, just do it! - return new SegmentPublishResult(announceHistoricalSegments(segments), true); + return SegmentPublishResult.ok(announceHistoricalSegments(segments)); } @Override diff --git a/server/src/main/java/org/apache/druid/indexing/overlord/SegmentPublishResult.java b/server/src/main/java/org/apache/druid/indexing/overlord/SegmentPublishResult.java index dc86b7268e5..a088c932e5b 100644 --- a/server/src/main/java/org/apache/druid/indexing/overlord/SegmentPublishResult.java +++ b/server/src/main/java/org/apache/druid/indexing/overlord/SegmentPublishResult.java @@ -25,6 +25,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import org.apache.druid.timeline.DataSegment; +import javax.annotation.Nullable; import java.util.Objects; import java.util.Set; @@ -42,20 +43,29 @@ public class SegmentPublishResult { private final Set segments; private final boolean success; + @Nullable + private final String errorMsg; - public static SegmentPublishResult fail() + public static SegmentPublishResult ok(Set segments) { - return new SegmentPublishResult(ImmutableSet.of(), false); + return new SegmentPublishResult(segments, true, null); + } + + public static SegmentPublishResult fail(String errorMsg) + { + return new SegmentPublishResult(ImmutableSet.of(), false, errorMsg); } @JsonCreator - public SegmentPublishResult( + private SegmentPublishResult( @JsonProperty("segments") Set segments, - @JsonProperty("success") boolean success + @JsonProperty("success") boolean success, + @JsonProperty("errorMsg") @Nullable String errorMsg ) { this.segments = Preconditions.checkNotNull(segments, "segments"); this.success = success; + this.errorMsg = errorMsg; if (!success) { Preconditions.checkArgument(segments.isEmpty(), "segments must be empty for unsuccessful publishes"); @@ -74,6 +84,13 @@ public class SegmentPublishResult return success; } + @JsonProperty + @Nullable + public String getErrorMsg() + { + return errorMsg; + } + @Override public boolean equals(Object o) { @@ -85,13 +102,14 @@ public class SegmentPublishResult } SegmentPublishResult that = (SegmentPublishResult) o; return success == that.success && - Objects.equals(segments, that.segments); + Objects.equals(segments, that.segments) && + Objects.equals(errorMsg, that.errorMsg); } @Override public int hashCode() { - return Objects.hash(segments, success); + return Objects.hash(segments, success, errorMsg); } @Override @@ -100,6 +118,7 @@ public class SegmentPublishResult return "SegmentPublishResult{" + "segments=" + segments + ", success=" + success + + ", errorMsg='" + errorMsg + '\'' + '}'; } } diff --git a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java index 9df56738912..b0aaa977e6f 100644 --- a/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java +++ b/server/src/main/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinator.java @@ -324,7 +324,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor } } - return new SegmentPublishResult(ImmutableSet.copyOf(inserted), true); + return SegmentPublishResult.ok(ImmutableSet.copyOf(inserted)); } }, 3, @@ -333,7 +333,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor } catch (CallbackFailedException e) { if (definitelyNotUpdated.get()) { - return SegmentPublishResult.fail(); + return SegmentPublishResult.fail(e.getMessage()); } else { // Must throw exception if we are not sure if we updated or not. throw e; diff --git a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java index cb6ba9085a7..76100e100e5 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/appenderator/BaseAppenderatorDriver.java @@ -36,6 +36,7 @@ import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import org.apache.druid.data.input.Committer; import org.apache.druid.data.input.InputRow; +import org.apache.druid.indexing.overlord.SegmentPublishResult; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.logger.Logger; @@ -553,16 +554,26 @@ public abstract class BaseAppenderatorDriver implements Closeable try { final Object metadata = segmentsAndMetadata.getCommitMetadata(); - final boolean published = publisher.publishSegments( + final SegmentPublishResult publishResult = publisher.publishSegments( ImmutableSet.copyOf(segmentsAndMetadata.getSegments()), metadata == null ? null : ((AppenderatorDriverMetadata) metadata).getCallerMetadata() - ).isSuccess(); + ); - if (published) { + if (publishResult.isSuccess()) { log.info("Published segments."); } else { - log.info("Transaction failure while publishing segments, removing them from deep storage " - + "and checking if someone else beat us to publishing."); + if (publishResult.getErrorMsg() == null) { + log.warn( + "Transaction failure while publishing segments. Please check the overlord log." + + " Removing them from deep storage and checking if someone else beat us to publishing." + ); + } else { + log.warn( + "Transaction failure while publishing segments because of [%s]. Please check the overlord log." + + " Removing them from deep storage and checking if someone else beat us to publishing.", + publishResult.getErrorMsg() + ); + } segmentsAndMetadata.getSegments().forEach(dataSegmentKiller::killQuietly); @@ -576,7 +587,11 @@ public abstract class BaseAppenderatorDriver implements Closeable .equals(Sets.newHashSet(segmentsAndMetadata.getSegments()))) { log.info("Our segments really do exist, awaiting handoff."); } else { - throw new ISE("Failed to publish segments."); + if (publishResult.getErrorMsg() != null) { + throw new ISE("Failed to publish segments because of [%s].", publishResult.getErrorMsg()); + } else { + throw new ISE("Failed to publish segments."); + } } } } diff --git a/server/src/test/java/org/apache/druid/indexing/overlord/SegmentPublishResultTest.java b/server/src/test/java/org/apache/druid/indexing/overlord/SegmentPublishResultTest.java new file mode 100644 index 00000000000..1772a9d9c9f --- /dev/null +++ b/server/src/test/java/org/apache/druid/indexing/overlord/SegmentPublishResultTest.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.indexing.overlord; + +import com.fasterxml.jackson.databind.InjectableValues.Std; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableSet; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.Intervals; +import org.apache.druid.timeline.DataSegment; +import org.apache.druid.timeline.DataSegment.PruneLoadSpecHolder; +import org.joda.time.Interval; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +public class SegmentPublishResultTest +{ + private final ObjectMapper objectMapper = new DefaultObjectMapper() + .setInjectableValues(new Std().addValue(PruneLoadSpecHolder.class, PruneLoadSpecHolder.DEFAULT)); + + @Test + public void testSerdeOkResult() throws IOException + { + final SegmentPublishResult original = SegmentPublishResult.ok( + ImmutableSet.of( + segment(Intervals.of("2018/2019")), + segment(Intervals.of("2019/2020")) + ) + ); + + final String json = objectMapper.writeValueAsString(original); + final SegmentPublishResult fromJson = objectMapper.readValue(json, SegmentPublishResult.class); + Assert.assertEquals(original, fromJson); + } + + @Test + public void testSerdeFailResult() throws IOException + { + final SegmentPublishResult original = SegmentPublishResult.fail("test"); + + final String json = objectMapper.writeValueAsString(original); + final SegmentPublishResult fromJson = objectMapper.readValue(json, SegmentPublishResult.class); + Assert.assertEquals(original, fromJson); + } + + private static DataSegment segment(Interval interval) + { + return new DataSegment( + "ds", + interval, + "version", + null, + null, + null, + null, + 9, + 10L + ); + } +} diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index 09b6f56f477..879ddfb13b4 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -304,7 +304,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest new ObjectMetadata(null), new ObjectMetadata(ImmutableMap.of("foo", "bar")) ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(defaultSegment), true), result1); + Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(defaultSegment)), result1); Assert.assertArrayEquals( mapper.writeValueAsString(defaultSegment).getBytes(StandardCharsets.UTF_8), @@ -322,7 +322,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest new ObjectMetadata(ImmutableMap.of("foo", "bar")), new ObjectMetadata(ImmutableMap.of("foo", "baz")) ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(defaultSegment2), true), result2); + Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(defaultSegment2)), result2); Assert.assertArrayEquals( mapper.writeValueAsString(defaultSegment2).getBytes(StandardCharsets.UTF_8), @@ -378,7 +378,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest new ObjectMetadata(null), new ObjectMetadata(ImmutableMap.of("foo", "bar")) ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(defaultSegment), true), result1); + Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(defaultSegment)), result1); Assert.assertArrayEquals( mapper.writeValueAsString(defaultSegment).getBytes(StandardCharsets.UTF_8), @@ -399,7 +399,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest new ObjectMetadata(ImmutableMap.of("foo", "bar")), new ObjectMetadata(ImmutableMap.of("foo", "baz")) ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(defaultSegment2), true), result2); + Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(defaultSegment2)), result2); Assert.assertArrayEquals( mapper.writeValueAsString(defaultSegment2).getBytes(StandardCharsets.UTF_8), @@ -429,7 +429,7 @@ public class IndexerSQLMetadataStorageCoordinatorTest new ObjectMetadata(ImmutableMap.of("foo", "bar")), new ObjectMetadata(ImmutableMap.of("foo", "baz")) ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(), false), result1); + Assert.assertEquals(SegmentPublishResult.fail("java.lang.RuntimeException: Aborting transaction!"), result1); // Should only be tried once. Assert.assertEquals(1, metadataUpdateCounter.get()); @@ -443,14 +443,14 @@ public class IndexerSQLMetadataStorageCoordinatorTest new ObjectMetadata(null), new ObjectMetadata(ImmutableMap.of("foo", "baz")) ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(defaultSegment), true), result1); + Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(defaultSegment)), result1); final SegmentPublishResult result2 = coordinator.announceHistoricalSegments( ImmutableSet.of(defaultSegment2), new ObjectMetadata(null), new ObjectMetadata(ImmutableMap.of("foo", "baz")) ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(), false), result2); + Assert.assertEquals(SegmentPublishResult.fail("java.lang.RuntimeException: Aborting transaction!"), result2); // Should only be tried once per call. Assert.assertEquals(2, metadataUpdateCounter.get()); @@ -464,14 +464,14 @@ public class IndexerSQLMetadataStorageCoordinatorTest new ObjectMetadata(null), new ObjectMetadata(ImmutableMap.of("foo", "baz")) ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(defaultSegment), true), result1); + Assert.assertEquals(SegmentPublishResult.ok(ImmutableSet.of(defaultSegment)), result1); final SegmentPublishResult result2 = coordinator.announceHistoricalSegments( ImmutableSet.of(defaultSegment2), new ObjectMetadata(ImmutableMap.of("foo", "qux")), new ObjectMetadata(ImmutableMap.of("foo", "baz")) ); - Assert.assertEquals(new SegmentPublishResult(ImmutableSet.of(), false), result2); + Assert.assertEquals(SegmentPublishResult.fail("java.lang.RuntimeException: Aborting transaction!"), result2); // Should only be tried once per call. Assert.assertEquals(2, metadataUpdateCounter.get()); diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderatorDriverTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderatorDriverTest.java index 66e136aaf7d..6536cb6d4a3 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderatorDriverTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/BatchAppenderatorDriverTest.java @@ -195,6 +195,6 @@ public class BatchAppenderatorDriverTest extends EasyMockSupport static TransactionalSegmentPublisher makeOkPublisher() { - return (segments, commitMetadata) -> new SegmentPublishResult(ImmutableSet.of(), true); + return (segments, commitMetadata) -> SegmentPublishResult.ok(ImmutableSet.of()); } } diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java index 79b575434bc..9d922537159 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverFailTest.java @@ -239,7 +239,7 @@ public class StreamAppenderatorDriverFailTest extends EasyMockSupport { expectedException.expect(ExecutionException.class); expectedException.expectCause(CoreMatchers.instanceOf(ISE.class)); - expectedException.expectMessage("Failed to publish segments."); + expectedException.expectMessage("Failed to publish segments because of [test]."); testFailDuringPublishInternal(false); } diff --git a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java index e1692177fa9..3a491e95ad8 100644 --- a/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java +++ b/server/src/test/java/org/apache/druid/segment/realtime/appenderator/StreamAppenderatorDriverTest.java @@ -361,16 +361,17 @@ public class StreamAppenderatorDriverTest extends EasyMockSupport static TransactionalSegmentPublisher makeOkPublisher() { - return (segments, commitMetadata) -> new SegmentPublishResult(Collections.emptySet(), true); + return (segments, commitMetadata) -> SegmentPublishResult.ok(Collections.emptySet()); } static TransactionalSegmentPublisher makeFailingPublisher(boolean failWithException) { return (segments, commitMetadata) -> { + final RuntimeException exception = new RuntimeException("test"); if (failWithException) { - throw new RuntimeException("test"); + throw exception; } - return SegmentPublishResult.fail(); + return SegmentPublishResult.fail(exception.getMessage()); }; } From 970308463d7c26f6675c7c5d91c2233e3f68d7c3 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Wed, 13 Feb 2019 11:23:08 -0800 Subject: [PATCH 12/25] Add doc for Hadoop-based ingestion vs Native batch ingestion (#7044) * Add doc for Hadoop-based ingestion vs Native batch ingestion * add links * add links --- .../ingestion/hadoop-vs-native-batch.md | 43 +++++++++++++++++++ docs/content/ingestion/hadoop.md | 4 +- docs/content/ingestion/native_tasks.md | 2 + docs/content/ingestion/tasks.md | 4 ++ 4 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 docs/content/ingestion/hadoop-vs-native-batch.md diff --git a/docs/content/ingestion/hadoop-vs-native-batch.md b/docs/content/ingestion/hadoop-vs-native-batch.md new file mode 100644 index 00000000000..ce2c97e603b --- /dev/null +++ b/docs/content/ingestion/hadoop-vs-native-batch.md @@ -0,0 +1,43 @@ +--- +layout: doc_page +title: "Hadoop-based Batch Ingestion VS Native Batch Ingestion" +--- + + + +# Comparison of Batch Ingestion Methods + +Druid basically supports three types of batch ingestion: Hadoop-based +batch ingestion, native parallel batch ingestion, and native local batch +ingestion. The below table shows what features are supported by each +ingestion method. + + +| |Hadoop-based ingestion|Native parallel ingestion|Native local ingestion| +|---|----------------------|-------------------------|----------------------| +| Parallel indexing | Always parallel | Parallel if firehose is splittable | Always sequential | +| Supported indexing modes | Replacing mode | Both appending and replacing modes | Both appending and replacing modes | +| External dependency | Hadoop (it internally submits Hadoop jobs) | No dependency | No dependency | +| Supported [rollup modes](http://druid.io/docs/latest/ingestion/index.html#roll-up-modes) | Perfect rollup | Best-effort rollup | Both perfect and best-effort rollup | +| Supported partitioning methods | [Both Hash-based and range partitioning](http://druid.io/docs/latest/ingestion/hadoop.html#partitioning-specification) | N/A | Hash-based partitioning (when `forceGuaranteedRollup` = true) | +| Supported input locations | All locations accessible via HDFS client or Druid dataSource | All implemented [firehoses](./firehose.html) | All implemented [firehoses](./firehose.html) | +| Supported file formats | All implemented Hadoop InputFormats | Currently only text file format (CSV, TSV, JSON) | Currently only text file format (CSV, TSV, JSON) | +| Saving parse exceptions in ingestion report | Currently not supported | Currently not supported | Supported | +| Custom segment version | Supported, but this is NOT recommended | N/A | N/A | diff --git a/docs/content/ingestion/hadoop.md b/docs/content/ingestion/hadoop.md index 4f8174c40a9..c824fd0809c 100644 --- a/docs/content/ingestion/hadoop.md +++ b/docs/content/ingestion/hadoop.md @@ -25,7 +25,9 @@ title: "Hadoop-based Batch Ingestion" # Hadoop-based Batch Ingestion Hadoop-based batch ingestion in Druid is supported via a Hadoop-ingestion task. These tasks can be posted to a running -instance of a Druid [Overlord](../design/overlord.html). +instance of a Druid [Overlord](../design/overlord.html). + +Please check [Hadoop-based Batch Ingestion VS Native Batch Ingestion](./hadoop-vs-native-batch.html) for differences between native batch ingestion and Hadoop-based ingestion. ## Command Line Hadoop Indexer diff --git a/docs/content/ingestion/native_tasks.md b/docs/content/ingestion/native_tasks.md index 5f7298363a9..963adeae21d 100644 --- a/docs/content/ingestion/native_tasks.md +++ b/docs/content/ingestion/native_tasks.md @@ -28,6 +28,8 @@ Druid currently has two types of native batch indexing tasks, `index_parallel` w in parallel on multiple MiddleManager nodes, and `index` which will run a single indexing task locally on a single MiddleManager. +Please check [Hadoop-based Batch Ingestion VS Native Batch Ingestion](./hadoop-vs-native-batch.html) for differences between native batch ingestion and Hadoop-based ingestion. + Parallel Index Task -------------------------------- diff --git a/docs/content/ingestion/tasks.md b/docs/content/ingestion/tasks.md index 41f7b52444b..4653d6ba2ed 100644 --- a/docs/content/ingestion/tasks.md +++ b/docs/content/ingestion/tasks.md @@ -41,6 +41,10 @@ See [batch ingestion](../ingestion/hadoop.html). Druid provides a native index task which doesn't need any dependencies on other systems. See [native index tasks](./native_tasks.html) for more details. +
+Please check [Hadoop-based Batch Ingestion VS Native Batch Ingestion](./hadoop-vs-native-batch.html) for differences between native batch ingestion and Hadoop-based ingestion. +
+ ### Kafka Indexing Tasks Kafka Indexing tasks are automatically created by a Kafka Supervisor and are responsible for pulling data from Kafka streams. These tasks are not meant to be created/submitted directly by users. See [Kafka Indexing Service](../development/extensions-core/kafka-ingestion.html) for more details. From 673396ae74ead086fd9e748de7fe685466e97715 Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Wed, 13 Feb 2019 13:43:31 -0800 Subject: [PATCH 13/25] Add proposal template (#7062) * Add proposal template Adds a proposal template based on the discussion in https://lists.apache.org/thread.html/bb9c5e1f8ce9b3148a5c26f95059f9b6629fae3bf8c617121d671395@%3Cdev.druid.apache.org%3E * Add license --- .github/ISSUE_TEMPLATE/proposal.md | 61 ++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/proposal.md diff --git a/.github/ISSUE_TEMPLATE/proposal.md b/.github/ISSUE_TEMPLATE/proposal.md new file mode 100644 index 00000000000..3a93808ff2d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/proposal.md @@ -0,0 +1,61 @@ +--- +name: Proposal +about: A template for major Druid change proposals +title: "[PROPOSAL]" +labels: Proposal +assignees: '' + +--- + + + +# Motivation + +A description of the problem. + +# Proposed changes + +This section should provide a detailed description of the changes being proposed. This will usually be the longest section; please feel free to split this section or other sections into subsections if needed. + +This section should include any changes made to user-facing interfaces, for example: +- Parameters +- JSON query/ingest specs +- SQL language +- Emitted metrics + +# Rationale + +A discussion of why this particular solution is the best one. One good way to approach this is to discuss other alternative solutions that you considered and decided against. This should also include a discussion of any specific benefits or drawbacks you are aware of. + +# Operational impact + +This section should describe how the proposed changes will impact the operation of existing clusters. It should answer questions such as: + +- Is anything going to be deprecated or removed by this change? How will we phase out old behavior? +- Is there a migration path that cluster operators need to be aware of? +- Will there be any effect on the ability to do a rolling upgrade, or to do a rolling _downgrade_ if an operator wants to switch back to a previous version? + +# Test Plan (optional) + +An optional discussion of how the proposed changes will be tested. This section should focus on higher level system test strategy and not unit tests (as UTs will be implementation dependent). + +# Future work (optional) + +An optional discussion of things that you believe are out of scope for the particular proposal but would be nice follow-ups. It helps show where a particular change could be leading us. There isn't any commitment that the proposal author will actually work on the items discussed in this section. From 90c1a54b868a7e32a43895d77c79895bf242d8c4 Mon Sep 17 00:00:00 2001 From: Edward Gan Date: Wed, 13 Feb 2019 14:03:47 -0800 Subject: [PATCH 14/25] Moments Sketch custom aggregator (#6581) * Moments Sketch Integration with Druid * updates, add documentation, fix warnings * nits * disallowed base64 * update to druid 0.14 --- .../momentsketch-quantiles.md | 120 ++++++ extensions-contrib/momentsketch/pom.xml | 100 +++++ .../MomentSketchComplexMetricSerde.java | 92 ++++ .../MomentSketchJsonSerializer.java | 39 ++ .../momentsketch/MomentSketchModule.java | 82 ++++ .../MomentSketchObjectStrategy.java | 62 +++ .../momentsketch/MomentSketchWrapper.java | 189 +++++++++ .../MomentSketchAggregatorFactory.java | 294 +++++++++++++ .../MomentSketchBuildAggregator.java | 82 ++++ .../MomentSketchBuildBufferAggregator.java | 94 ++++ .../MomentSketchMaxPostAggregator.java | 130 ++++++ .../MomentSketchMergeAggregator.java | 75 ++++ .../MomentSketchMergeAggregatorFactory.java | 62 +++ .../MomentSketchMergeBufferAggregator.java | 110 +++++ .../MomentSketchMinPostAggregator.java | 129 ++++++ .../MomentSketchQuantilePostAggregator.java | 148 +++++++ .../momentsketch/MomentSketchWrapperTest.java | 53 +++ .../MomentsSketchAggregatorTest.java | 204 +++++++++ .../src/test/resources/doubles_build_data.tsv | 400 ++++++++++++++++++ pom.xml | 1 + .../query/aggregation/AggregatorUtil.java | 4 + .../aggregation/post/PostAggregatorIds.java | 3 + .../druid/query/cache/CacheKeyBuilder.java | 14 + .../druid/segment/serde/ComplexMetrics.java | 8 + 24 files changed, 2495 insertions(+) create mode 100644 docs/content/development/extensions-contrib/momentsketch-quantiles.md create mode 100644 extensions-contrib/momentsketch/pom.xml create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchComplexMetricSerde.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchJsonSerializer.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchModule.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchObjectStrategy.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchWrapper.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchAggregatorFactory.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchBuildAggregator.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchBuildBufferAggregator.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMaxPostAggregator.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeAggregator.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeAggregatorFactory.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeBufferAggregator.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMinPostAggregator.java create mode 100644 extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchQuantilePostAggregator.java create mode 100644 extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchWrapperTest.java create mode 100644 extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java create mode 100644 extensions-contrib/momentsketch/src/test/resources/doubles_build_data.tsv diff --git a/docs/content/development/extensions-contrib/momentsketch-quantiles.md b/docs/content/development/extensions-contrib/momentsketch-quantiles.md new file mode 100644 index 00000000000..69bb93cffa1 --- /dev/null +++ b/docs/content/development/extensions-contrib/momentsketch-quantiles.md @@ -0,0 +1,120 @@ +--- +layout: doc_page +title: "Moment Sketches for Approximate Quantiles module" +--- + + + +# MomentSketch Quantiles Sketch module + +This module provides Druid aggregators for approximate quantile queries using the [momentsketch](https://github.com/stanford-futuredata/momentsketch) library. +The momentsketch provides coarse quantile estimates with less space and aggregation time overheads than traditional sketches, approaching the performance of counts and sums by reconstructing distributions from computed statistics. + +To use this aggregator, make sure you [include](../../operations/including-extensions.html) the extension in your config file: + +``` +druid.extensions.loadList=["druid-momentsketch"] +``` + +### Aggregator + +The result of the aggregation is a momentsketch that is the union of all sketches either built from raw data or read from the segments. + +The `momentSketch` aggregator operates over raw data while the `momentSketchMerge` aggregator should be used when aggregating pre-computed sketches. +```json +{ + "type" : , + "name" : , + "fieldName" : , + "k" : , + "compress" : + } +``` + +|property|description|required?| +|--------|-----------|---------| +|type|Type of aggregator desired. Either "momentSketch" or "momentSketchMerge" |yes| +|name|A String for the output (result) name of the calculation.|yes| +|fieldName|A String for the name of the input field (can contain sketches or raw numeric values).|yes| +|k|Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Usable range is generally [3,15] |no, defaults to 13.| +|compress|Flag for whether the aggregator compresses numeric values using arcsinh. Can improve robustness to skewed and long-tailed distributions, but reduces accuracy slightly on more uniform distributions.| no, defaults to true + +### Post Aggregators + +Users can query for a set of quantiles using the `momentSketchSolveQuantiles` post-aggregator on the sketches created by the `momentSketch` or `momentSketchMerge` aggregators. +```json +{ + "type" : "momentSketchSolveQuantiles", + "name" : , + "field" : , + "fractions" : +} +``` + +Users can also query for the min/max of a distribution: +```json +{ + "type" : "momentSketchMin" | "momentSketchMax", + "name" : , + "field" : , +} +``` + +### Example +As an example of a query with sketches pre-aggregated at ingestion time, one could set up the following aggregator at ingest: +```json +{ + "type": "momentSketch", + "name": "sketch", + "fieldName": "value", + "k": 10, + "compress": true, +} +``` +and make queries using the following aggregator + post-aggregator: +```json +{ + "aggregations": [{ + "type": "momentSketchMerge", + "name": "sketch", + "fieldName": "sketch", + "k": 10, + "compress": true + }], + "postAggregations": [ + { + "type": "momentSketchSolveQuantiles", + "name": "quantiles", + "fractions": [0.1, 0.5, 0.9], + "field": { + "type": "fieldAccess", + "fieldName": "sketch" + } + }, + { + "type": "momentSketchMin", + "name": "min", + "field": { + "type": "fieldAccess", + "fieldName": "sketch" + } + }] +} +``` \ No newline at end of file diff --git a/extensions-contrib/momentsketch/pom.xml b/extensions-contrib/momentsketch/pom.xml new file mode 100644 index 00000000000..b8926413a99 --- /dev/null +++ b/extensions-contrib/momentsketch/pom.xml @@ -0,0 +1,100 @@ + + + + + + druid + org.apache.druid + 0.14.0-incubating-SNAPSHOT + ../../pom.xml + + 4.0.0 + + org.apache.druid.extensions.contrib + druid-momentsketch + druid-momentsketch + Aggregators for the approximate quantile moment sketch + + + + UTF-8 + 0.12.2 + + + + + com.github.stanford-futuredata.momentsketch + momentsketch-solver + 0.1.1 + + + com.google.guava + guava + ${guava.version} + provided + + + org.apache.druid + druid-core + ${project.parent.version} + provided + + + org.apache.druid + druid-processing + ${project.parent.version} + provided + + + junit + junit + test + + + org.easymock + easymock + test + + + org.apache.druid + druid-core + ${project.parent.version} + test-jar + test + + + org.apache.druid + druid-processing + ${project.parent.version} + test-jar + test + + + org.apache.druid + druid-server + ${project.parent.version} + test + + + + + \ No newline at end of file diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchComplexMetricSerde.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchComplexMetricSerde.java new file mode 100644 index 00000000000..4e631e53525 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchComplexMetricSerde.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch; + +import org.apache.druid.data.input.InputRow; +import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchAggregatorFactory; +import org.apache.druid.segment.GenericColumnSerializer; +import org.apache.druid.segment.column.ColumnBuilder; +import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.ObjectStrategy; +import org.apache.druid.segment.serde.ComplexColumnPartSupplier; +import org.apache.druid.segment.serde.ComplexMetricExtractor; +import org.apache.druid.segment.serde.ComplexMetricSerde; +import org.apache.druid.segment.serde.LargeColumnSupportedComplexColumnSerializer; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.nio.ByteBuffer; + +public class MomentSketchComplexMetricSerde extends ComplexMetricSerde +{ + private static final MomentSketchObjectStrategy STRATEGY = new MomentSketchObjectStrategy(); + + @Override + public String getTypeName() + { + return MomentSketchAggregatorFactory.TYPE_NAME; + } + + @Override + public ComplexMetricExtractor getExtractor() + { + return new ComplexMetricExtractor() + { + @Override + public Class extractedClass() + { + return MomentSketchWrapper.class; + } + + @Override + public Object extractValue(final InputRow inputRow, final String metricName) + { + return (MomentSketchWrapper) inputRow.getRaw(metricName); + } + }; + } + + @Override + public void deserializeColumn(ByteBuffer buffer, ColumnBuilder builder) + { + final GenericIndexed column = GenericIndexed.read( + buffer, + STRATEGY, + builder.getFileMapper() + ); + builder.setComplexColumnSupplier(new ComplexColumnPartSupplier(getTypeName(), column)); + } + + @Override + public ObjectStrategy getObjectStrategy() + { + return STRATEGY; + } + + @Override + public GenericColumnSerializer getSerializer(SegmentWriteOutMedium segmentWriteOutMedium, String column) + { + return LargeColumnSupportedComplexColumnSerializer.create( + segmentWriteOutMedium, + column, + this.getObjectStrategy() + ); + } + +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchJsonSerializer.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchJsonSerializer.java new file mode 100644 index 00000000000..268c4017a7a --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchJsonSerializer.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; + +import java.io.IOException; + +public class MomentSketchJsonSerializer extends JsonSerializer +{ + @Override + public void serialize( + MomentSketchWrapper momentsSketch, + JsonGenerator jsonGenerator, + SerializerProvider serializerProvider + ) throws IOException + { + jsonGenerator.writeBinary(momentsSketch.toByteArray()); + } +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchModule.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchModule.java new file mode 100644 index 00000000000..29f21355d26 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchModule.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch; + +import com.fasterxml.jackson.databind.Module; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.google.common.collect.ImmutableList; +import com.google.inject.Binder; +import org.apache.druid.initialization.DruidModule; +import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchAggregatorFactory; +import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchMaxPostAggregator; +import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchMergeAggregatorFactory; +import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchMinPostAggregator; +import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchQuantilePostAggregator; +import org.apache.druid.segment.serde.ComplexMetrics; + +import java.util.List; + +/** + * Module defining aggregators for the moments approximate quantiles sketch + * @see MomentSketchAggregatorFactory + */ +public class MomentSketchModule implements DruidModule +{ + @Override + public List getJacksonModules() + { + return ImmutableList.of( + new SimpleModule( + getClass().getSimpleName() + ).registerSubtypes( + new NamedType( + MomentSketchAggregatorFactory.class, + MomentSketchAggregatorFactory.TYPE_NAME + ), + new NamedType( + MomentSketchMergeAggregatorFactory.class, + MomentSketchMergeAggregatorFactory.TYPE_NAME + ), + new NamedType( + MomentSketchQuantilePostAggregator.class, + MomentSketchQuantilePostAggregator.TYPE_NAME + ), + new NamedType( + MomentSketchMinPostAggregator.class, + MomentSketchMinPostAggregator.TYPE_NAME + ), + new NamedType( + MomentSketchMaxPostAggregator.class, + MomentSketchMaxPostAggregator.TYPE_NAME + ) + ).addSerializer(MomentSketchWrapper.class, new MomentSketchJsonSerializer()) + ); + } + + @Override + public void configure(Binder binder) + { + ComplexMetrics.registerSerde( + MomentSketchAggregatorFactory.TYPE_NAME, + MomentSketchComplexMetricSerde::new + ); + } +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchObjectStrategy.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchObjectStrategy.java new file mode 100644 index 00000000000..7a706f90399 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchObjectStrategy.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch; + +import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchAggregatorFactory; +import org.apache.druid.segment.data.ObjectStrategy; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class MomentSketchObjectStrategy implements ObjectStrategy +{ + private static final byte[] EMPTY_BYTES = new byte[0]; + + @Override + public Class getClazz() + { + return MomentSketchWrapper.class; + } + + @Override + public MomentSketchWrapper fromByteBuffer(ByteBuffer buffer, int numBytes) + { + if (numBytes == 0) { + return null; + } + buffer.limit(buffer.position() + numBytes); + return MomentSketchWrapper.fromBytes(buffer); + } + + @Override + public byte[] toBytes(@Nullable MomentSketchWrapper val) + { + if (val == null) { + return EMPTY_BYTES; + } + return val.toByteArray(); + } + + @Override + public int compare(MomentSketchWrapper o1, MomentSketchWrapper o2) + { + return MomentSketchAggregatorFactory.COMPARATOR.compare(o1, o2); + } +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchWrapper.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchWrapper.java new file mode 100644 index 00000000000..41ecf30fb84 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchWrapper.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch; + +import com.github.stanfordfuturedata.momentsketch.MomentSolver; +import com.github.stanfordfuturedata.momentsketch.MomentStruct; + +import java.nio.ByteBuffer; + +/** + * Class for wrapping the operations of the moments sketch for use in + * the moment sketch aggregator + * {@link org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchAggregatorFactory}. + * + * k controls the size and accuracy provided by the sketch. + * The sinh function is used to compress the range of data to allow for more robust results + * on skewed and long-tailed metrics, but slightly reducing accuracy on metrics with more uniform + * distributions. + */ +public class MomentSketchWrapper +{ + // The MomentStruct object stores the relevant statistics about a metric distribution. + protected MomentStruct data; + // Whether we use arcsinh to compress the range + protected boolean useArcSinh = true; + + public MomentSketchWrapper(int k) + { + data = new MomentStruct(k); + } + + public MomentSketchWrapper(MomentStruct data) + { + this.data = data; + } + + public void setCompressed(boolean flag) + { + useArcSinh = flag; + } + + public boolean getCompressed() + { + return useArcSinh; + } + + public int getK() + { + return data.power_sums.length; + } + + public double[] getPowerSums() + { + return data.power_sums; + } + + public double getMin() + { + if (useArcSinh) { + return Math.sinh(data.min); + } else { + return data.min; + } + } + + public double getMax() + { + if (useArcSinh) { + return Math.sinh(data.max); + } else { + return data.max; + } + } + + public void add(double rawX) + { + double x = rawX; + if (useArcSinh) { + // Since Java does not have a native arcsinh implementation we + // compute it manually using the following formula. + // This is the inverse operation of Math.sinh + x = Math.log(rawX + Math.sqrt(1 + rawX * rawX)); + } + data.add(x); + } + + public void merge(MomentSketchWrapper other) + { + data.merge(other.data); + } + + public byte[] toByteArray() + { + ByteBuffer bb = ByteBuffer.allocate(2 * Integer.BYTES + (data.power_sums.length + 2) * Double.BYTES); + return toBytes(bb).array(); + } + + public MomentSolver getSolver() + { + MomentSolver ms = new MomentSolver(data); + return ms; + } + + /** + * Estimates quantiles given the statistics in a moments sketch. + * @param fractions real values between [0,1] for which we want to estimate quantiles + * + * @return estimated quantiles. + */ + public double[] getQuantiles(double[] fractions) + { + // The solver attempts to construct a distribution estimate which matches the + // statistics tracked by the moments sketch. We can then read off quantile estimates + // from the reconstructed distribution. + // This operation can be relatively expensive (~1 ms) so we set the parameters from distribution + // reconstruction to conservative values. + MomentSolver ms = new MomentSolver(data); + // Constants here are chosen to yield maximum precision while keeping solve times ~1ms on 2Ghz cpu + // Grid size can be increased if longer solve times are acceptable + ms.setGridSize(1024); + ms.setMaxIter(15); + ms.solve(); + double[] rawQuantiles = ms.getQuantiles(fractions); + for (int i = 0; i < fractions.length; i++) { + if (useArcSinh) { + rawQuantiles[i] = Math.sinh(rawQuantiles[i]); + } + } + return rawQuantiles; + } + + public ByteBuffer toBytes(ByteBuffer bb) + { + int compressedInt = getCompressed() ? 1 : 0; + bb.putInt(data.power_sums.length); + bb.putInt(compressedInt); + bb.putDouble(data.min); + bb.putDouble(data.max); + for (double x : data.power_sums) { + bb.putDouble(x); + } + return bb; + } + + public static MomentSketchWrapper fromBytes(ByteBuffer bb) + { + int k = bb.getInt(); + int compressedInt = bb.getInt(); + boolean compressed = (compressedInt > 0); + MomentStruct m = new MomentStruct(k); + m.min = bb.getDouble(); + m.max = bb.getDouble(); + for (int i = 0; i < k; i++) { + m.power_sums[i] = bb.getDouble(); + } + MomentSketchWrapper mw = new MomentSketchWrapper(m); + mw.setCompressed(compressed); + return mw; + } + + public static MomentSketchWrapper fromByteArray(byte[] input) + { + ByteBuffer bb = ByteBuffer.wrap(input); + return fromBytes(bb); + } + + @Override + public String toString() + { + return data.toString(); + } +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchAggregatorFactory.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchAggregatorFactory.java new file mode 100644 index 00000000000..918ad3e04ed --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchAggregatorFactory.java @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch.aggregator; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ColumnCapabilities; +import org.apache.druid.segment.column.ValueType; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Objects; + +/** + * Aggregation operations over the moment-based quantile sketch + * available on github and described + * in the paper Moment-based quantile sketches. + * + * This sketch stores a set of (k) statistics about univariate metrics that can be used to + * solve for approximate quantiles of the original distribution at query time after aggregating + * the statistics. + */ +public class MomentSketchAggregatorFactory extends AggregatorFactory +{ + // Default number of moments (k) chosen for ~1% quantile error. + public static final int DEFAULT_K = 13; + // Safer to compress data with unknown ranges by default, but reduces accuracy on uniform data + public static final boolean DEFAULT_COMPRESS = true; + + private final String name; + private final String fieldName; + // Number of moments tracked. Larger k allows for better estimates but greater resource usage + private final int k; + // Controls whether or not data is compressed onto a smaller range using arcsinh + private final boolean compress; + private final byte cacheTypeId; + + public static final String TYPE_NAME = "momentSketch"; + + @JsonCreator + public MomentSketchAggregatorFactory( + @JsonProperty("name") final String name, + @JsonProperty("fieldName") final String fieldName, + @Nullable @JsonProperty("k") final Integer k, + @Nullable @JsonProperty("compress") final Boolean compress + ) + { + this(name, fieldName, k, compress, AggregatorUtil.MOMENTS_SKETCH_BUILD_CACHE_TYPE_ID); + } + + MomentSketchAggregatorFactory( + final String name, + final String fieldName, + @Nullable final Integer k, + @Nullable final Boolean compress, + final byte cacheTypeId + ) + { + Objects.requireNonNull(name, "Must have a valid, non-null aggregator name"); + this.name = name; + Objects.requireNonNull(fieldName, "Parameter fieldName must be specified"); + this.fieldName = fieldName; + this.k = k == null ? DEFAULT_K : k; + this.compress = compress == null ? DEFAULT_COMPRESS : compress; + this.cacheTypeId = cacheTypeId; + } + + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder( + cacheTypeId + ).appendString(fieldName).appendInt(k).appendBoolean(compress).build(); + } + + + @Override + public Aggregator factorize(ColumnSelectorFactory metricFactory) + { + ColumnCapabilities cap = metricFactory.getColumnCapabilities(fieldName); + if (cap == null || ValueType.isNumeric(cap.getType())) { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); + return new MomentSketchBuildAggregator(selector, k, getCompress()); + } else { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); + return new MomentSketchMergeAggregator(selector, k, getCompress()); + } + } + + @Override + public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) + { + ColumnCapabilities cap = metricFactory.getColumnCapabilities(fieldName); + if (cap == null || ValueType.isNumeric(cap.getType())) { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); + return new MomentSketchBuildBufferAggregator(selector, k, getCompress()); + } else { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector(fieldName); + return new MomentSketchMergeBufferAggregator(selector, k, getCompress()); + } + } + + public static final Comparator COMPARATOR = Comparator.nullsFirst( + Comparator.comparingDouble(a -> a.getPowerSums()[0]) + ); + + @Override + public Comparator getComparator() + { + return COMPARATOR; + } + + @Override + public Object combine(@Nullable Object lhs, @Nullable Object rhs) + { + if (lhs == null) { + return rhs; + } + if (rhs == null) { + return lhs; + } + MomentSketchWrapper union = (MomentSketchWrapper) lhs; + union.merge((MomentSketchWrapper) rhs); + return union; + } + + @Override + public AggregatorFactory getCombiningFactory() + { + return new MomentSketchMergeAggregatorFactory(name, k, compress); + } + + @Override + public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException + { + if (other.getName().equals(this.getName()) && this.getClass() == other.getClass()) { + return getCombiningFactory(); + } else { + throw new AggregatorFactoryNotMergeableException(this, other); + } + } + + @Override + public List getRequiredColumns() + { + return Collections.singletonList( + new MomentSketchAggregatorFactory( + fieldName, + fieldName, + k, + compress + ) + ); + } + + private MomentSketchWrapper deserializeFromByteArray(byte[] bytes) + { + return MomentSketchWrapper.fromByteArray(bytes); + } + + @Override + public Object deserialize(Object serializedSketch) + { + if (serializedSketch instanceof String) { + String str = (String) serializedSketch; + return deserializeFromByteArray(StringUtils.decodeBase64(StringUtils.toUtf8(str))); + } else if (serializedSketch instanceof byte[]) { + return deserializeFromByteArray((byte[]) serializedSketch); + } else if (serializedSketch instanceof MomentSketchWrapper) { + return serializedSketch; + } + throw new ISE( + "Object cannot be deserialized to a Moments Sketch: " + + serializedSketch.getClass() + ); + } + + @Override + public Object finalizeComputation(Object object) + { + return object; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public String getFieldName() + { + return fieldName; + } + + @JsonProperty + public int getK() + { + return k; + } + + @JsonProperty + public boolean getCompress() + { + return compress; + } + + @Override + public List requiredFields() + { + return Collections.singletonList(fieldName); + } + + @Override + public String getTypeName() + { + return TYPE_NAME; + } + + @Override + public int getMaxIntermediateSize() + { + // k double precision moments, 2 doubles for the min and max + // one integer to specify the number of moments + // one integer to specify whether data range is compressed + return (k + 2) * Double.BYTES + 2 * Integer.BYTES; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || !getClass().equals(o.getClass())) { + return false; + } + final MomentSketchAggregatorFactory that = (MomentSketchAggregatorFactory) o; + + return Objects.equals(name, that.name) && + Objects.equals(fieldName, that.fieldName) && + k == that.k && + compress == that.compress; + } + + @Override + public int hashCode() + { + return Objects.hash(name, fieldName, k, compress); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name=" + name + + ", fieldName=" + fieldName + + ", k=" + k + + ", compress=" + compress + + "}"; + } +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchBuildAggregator.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchBuildAggregator.java new file mode 100644 index 00000000000..db3f4409831 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchBuildAggregator.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch.aggregator; + +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper; +import org.apache.druid.segment.BaseDoubleColumnValueSelector; + +public class MomentSketchBuildAggregator implements Aggregator +{ + private final BaseDoubleColumnValueSelector valueSelector; + private final int k; + private final boolean compress; + + private MomentSketchWrapper momentsSketch; + + public MomentSketchBuildAggregator( + final BaseDoubleColumnValueSelector valueSelector, + final int k, + final boolean compress + ) + { + this.valueSelector = valueSelector; + this.k = k; + this.compress = compress; + momentsSketch = new MomentSketchWrapper(k); + momentsSketch.setCompressed(compress); + } + + @Override + public void aggregate() + { + momentsSketch.add(valueSelector.getDouble()); + } + + @Override + public Object get() + { + return momentsSketch; + } + + @Override + public float getFloat() + { + throw new UnsupportedOperationException("not implemented"); + } + + @Override + public long getLong() + { + throw new UnsupportedOperationException("not implemented"); + } + + @Override + public Aggregator clone() + { + return new MomentSketchBuildAggregator(valueSelector, k, compress); + } + + @Override + public void close() + { + momentsSketch = null; + } +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchBuildBufferAggregator.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchBuildBufferAggregator.java new file mode 100644 index 00000000000..7f049cf1534 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchBuildBufferAggregator.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch.aggregator; + +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper; +import org.apache.druid.segment.BaseDoubleColumnValueSelector; + +import java.nio.ByteBuffer; + +public class MomentSketchBuildBufferAggregator implements BufferAggregator +{ + private final BaseDoubleColumnValueSelector selector; + private final int k; + private final boolean compress; + + public MomentSketchBuildBufferAggregator( + final BaseDoubleColumnValueSelector valueSelector, + final int k, + final boolean compress + ) + { + this.selector = valueSelector; + this.k = k; + this.compress = compress; + } + + @Override + public synchronized void init(final ByteBuffer buffer, final int position) + { + ByteBuffer mutationBuffer = buffer.duplicate(); + mutationBuffer.position(position); + + MomentSketchWrapper emptyStruct = new MomentSketchWrapper(k); + emptyStruct.setCompressed(compress); + emptyStruct.toBytes(mutationBuffer); + } + + @Override + public synchronized void aggregate(final ByteBuffer buffer, final int position) + { + ByteBuffer mutationBuffer = buffer.duplicate(); + mutationBuffer.position(position); + + MomentSketchWrapper ms0 = MomentSketchWrapper.fromBytes(mutationBuffer); + double x = selector.getDouble(); + ms0.add(x); + + mutationBuffer.position(position); + ms0.toBytes(mutationBuffer); + } + + @Override + public synchronized Object get(final ByteBuffer buffer, final int position) + { + ByteBuffer mutationBuffer = buffer.duplicate(); + mutationBuffer.position(position); + return MomentSketchWrapper.fromBytes(mutationBuffer); + } + + @Override + public float getFloat(final ByteBuffer buffer, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public long getLong(final ByteBuffer buffer, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public void close() + { + } +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMaxPostAggregator.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMaxPostAggregator.java new file mode 100644 index 00000000000..38755a6b702 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMaxPostAggregator.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch.aggregator; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; + +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +public class MomentSketchMaxPostAggregator implements PostAggregator +{ + private final String name; + private final PostAggregator field; + + public static final String TYPE_NAME = "momentSketchMax"; + + @JsonCreator + public MomentSketchMaxPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field + ) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final MomentSketchWrapper sketch = (MomentSketchWrapper) field.compute(combinedAggregators); + return sketch.getMax(); + } + + @Override + public Comparator getComparator() + { + throw new IAE("Comparing arrays of quantiles is not supported"); + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + "}"; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final MomentSketchMaxPostAggregator that = (MomentSketchMaxPostAggregator) o; + if (!name.equals(that.name)) { + return false; + } + return field.equals(that.field); + } + + @Override + public int hashCode() + { + return (name.hashCode() * 31 + field.hashCode()); + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.MOMENTS_SKETCH_TO_MAX_CACHE_TYPE_ID + ).appendCacheable(field); + return builder.build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeAggregator.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeAggregator.java new file mode 100644 index 00000000000..c03dd369065 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeAggregator.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch.aggregator; + +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper; +import org.apache.druid.segment.ColumnValueSelector; + +public class MomentSketchMergeAggregator implements Aggregator +{ + private final ColumnValueSelector selector; + private MomentSketchWrapper momentsSketch; + + public MomentSketchMergeAggregator( + ColumnValueSelector selector, + final int k, + final boolean compress + ) + { + this.selector = selector; + this.momentsSketch = new MomentSketchWrapper(k); + momentsSketch.setCompressed(compress); + } + + @Override + public void aggregate() + { + final MomentSketchWrapper sketch = selector.getObject(); + if (sketch == null) { + return; + } + this.momentsSketch.merge(sketch); + } + + @Override + public Object get() + { + return momentsSketch; + } + + @Override + public float getFloat() + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public long getLong() + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public void close() + { + momentsSketch = null; + } +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeAggregatorFactory.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeAggregatorFactory.java new file mode 100644 index 00000000000..27444589798 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeAggregatorFactory.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch.aggregator; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; + +public class MomentSketchMergeAggregatorFactory extends MomentSketchAggregatorFactory +{ + public static final String TYPE_NAME = "momentSketchMerge"; + + @JsonCreator + public MomentSketchMergeAggregatorFactory( + @JsonProperty("name") final String name, + @JsonProperty("k") final Integer k, + @JsonProperty("compress") final Boolean compress + ) + { + super(name, name, k, compress, AggregatorUtil.MOMENTS_SKETCH_MERGE_CACHE_TYPE_ID); + } + + @Override + public Aggregator factorize(final ColumnSelectorFactory metricFactory) + { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector( + getFieldName()); + return new MomentSketchMergeAggregator(selector, getK(), getCompress()); + } + + @Override + public BufferAggregator factorizeBuffered(final ColumnSelectorFactory metricFactory) + { + final ColumnValueSelector selector = metricFactory.makeColumnValueSelector( + getFieldName() + ); + return new MomentSketchMergeBufferAggregator(selector, getK(), getCompress()); + } + +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeBufferAggregator.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeBufferAggregator.java new file mode 100644 index 00000000000..505d1ebed97 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMergeBufferAggregator.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch.aggregator; + +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; + +import java.nio.ByteBuffer; + +public class MomentSketchMergeBufferAggregator implements BufferAggregator +{ + private final ColumnValueSelector selector; + private final int size; + private final boolean compress; + + public MomentSketchMergeBufferAggregator( + ColumnValueSelector selector, + int size, + boolean compress + ) + { + this.selector = selector; + this.size = size; + this.compress = compress; + } + + @Override + public void init(ByteBuffer buf, int position) + { + MomentSketchWrapper h = new MomentSketchWrapper(size); + h.setCompressed(compress); + + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + h.toBytes(mutationBuffer); + } + + @Override + public void aggregate(ByteBuffer buf, int position) + { + MomentSketchWrapper msNext = selector.getObject(); + if (msNext == null) { + return; + } + ByteBuffer mutationBuffer = buf.duplicate(); + mutationBuffer.position(position); + + MomentSketchWrapper ms0 = MomentSketchWrapper.fromBytes(mutationBuffer); + ms0.merge(msNext); + + mutationBuffer.position(position); + ms0.toBytes(mutationBuffer); + } + + @Override + public Object get(ByteBuffer buf, int position) + { + ByteBuffer mutationBuffer = buf.asReadOnlyBuffer(); + mutationBuffer.position(position); + return MomentSketchWrapper.fromBytes(mutationBuffer); + } + + @Override + public float getFloat(ByteBuffer buf, int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public long getLong(ByteBuffer buf, int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public double getDouble(ByteBuffer buf, int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public void close() + { + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("selector", selector); + } +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMinPostAggregator.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMinPostAggregator.java new file mode 100644 index 00000000000..b244243b505 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchMinPostAggregator.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch.aggregator; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; + +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +public class MomentSketchMinPostAggregator implements PostAggregator +{ + private final String name; + private final PostAggregator field; + public static final String TYPE_NAME = "momentSketchMin"; + + @JsonCreator + public MomentSketchMinPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field + ) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final MomentSketchWrapper sketch = (MomentSketchWrapper) field.compute(combinedAggregators); + return sketch.getMin(); + } + + @Override + public Comparator getComparator() + { + throw new IAE("Comparing arrays of quantiles is not supported"); + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + "}"; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final MomentSketchMinPostAggregator that = (MomentSketchMinPostAggregator) o; + if (!name.equals(that.name)) { + return false; + } + return field.equals(that.field); + } + + @Override + public int hashCode() + { + return (name.hashCode() * 31 + field.hashCode()); + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.MOMENTS_SKETCH_TO_MIN_CACHE_TYPE_ID + ).appendCacheable(field); + return builder.build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } +} diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchQuantilePostAggregator.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchQuantilePostAggregator.java new file mode 100644 index 00000000000..81f5b63acf0 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentSketchQuantilePostAggregator.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch.aggregator; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +public class MomentSketchQuantilePostAggregator implements PostAggregator +{ + private final String name; + private final PostAggregator field; + private final double[] fractions; + + public static final String TYPE_NAME = "momentSketchSolveQuantiles"; + + @JsonCreator + public MomentSketchQuantilePostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("fractions") final double[] fractions + ) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.fractions = Preconditions.checkNotNull(fractions, "array of fractions is null"); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public double[] getFractions() + { + return fractions; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final MomentSketchWrapper sketch = (MomentSketchWrapper) field.compute(combinedAggregators); + double[] quantiles = sketch.getQuantiles(fractions); + return quantiles; + } + + @Override + public Comparator getComparator() + { + throw new IAE("Comparing arrays of quantiles is not supported"); + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", fractions=" + Arrays.toString(fractions) + + "}"; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final MomentSketchQuantilePostAggregator that = (MomentSketchQuantilePostAggregator) o; + if (!name.equals(that.name)) { + return false; + } + if (!Arrays.equals(fractions, that.fractions)) { + return false; + } + return field.equals(that.field); + } + + @Override + public int hashCode() + { + return (name.hashCode() * 31 + field.hashCode()) * 31 + Arrays.hashCode(fractions); + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.MOMENTS_SKETCH_TO_QUANTILES_CACHE_TYPE_ID + ) + .appendCacheable(field) + .appendDoubleArray(fractions); + return builder.build(); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } + +} diff --git a/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchWrapperTest.java b/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchWrapperTest.java new file mode 100644 index 00000000000..74eadd0241e --- /dev/null +++ b/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchWrapperTest.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class MomentSketchWrapperTest +{ + @Test + public void testDeserialize() + { + MomentSketchWrapper mw = new MomentSketchWrapper(10); + mw.setCompressed(false); + mw.add(10); + byte[] bs = mw.toByteArray(); + MomentSketchWrapper mw2 = MomentSketchWrapper.fromByteArray(bs); + + assertEquals(10, mw2.getPowerSums()[1], 1e-10); + } + + @Test + public void testSimpleSolve() + { + MomentSketchWrapper mw = new MomentSketchWrapper(13); + mw.setCompressed(true); + for (int x = 0; x < 101; x++) { + mw.add((double) x); + } + double[] ps = {0.0, 0.5, 1.0}; + double[] qs = mw.getQuantiles(ps); + assertEquals(0, qs[0], 1.0); + assertEquals(50, qs[1], 1.0); + } +} diff --git a/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java b/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java new file mode 100644 index 00000000000..2d5293b7191 --- /dev/null +++ b/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.aggregation.momentsketch.aggregator; + + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.druid.data.input.Row; +import org.apache.druid.initialization.DruidModule; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.momentsketch.MomentSketchModule; +import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.GroupByQueryRunnerTest; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +@RunWith(Parameterized.class) +public class MomentsSketchAggregatorTest +{ + + private final AggregationTestHelper helper; + + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + public MomentsSketchAggregatorTest(final GroupByQueryConfig config) + { + DruidModule module = new MomentSketchModule(); + module.configure(null); + helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( + module.getJacksonModules(), config, tempFolder); + } + + @Parameterized.Parameters(name = "{0}") + public static Collection constructorFeeder() + { + final List constructors = new ArrayList<>(); + for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) { + constructors.add(new Object[]{config}); + } + return constructors; + } + + // this is to test Json properties and equals + @Test + public void serializeDeserializeFactoryWithFieldName() throws Exception + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + MomentSketchAggregatorFactory factory = new MomentSketchAggregatorFactory( + "name", "fieldName", 128, true + ); + + MomentSketchAggregatorFactory other = objectMapper.readValue( + objectMapper.writeValueAsString(factory), + MomentSketchAggregatorFactory.class + ); + + assertEquals(factory, other); + } + + @Test + public void buildingSketchesAtIngestionTime() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("doubles_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\"],", + " \"dimensionExclusions\": [ \"sequenceNumber\"],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", + " }", + "}" + ), + "[{\"type\": \"momentSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 10, \"compress\": true}]", + 0, + // minTimestamp + Granularities.NONE, + 10, + // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"momentSketchMerge\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 10, \"compress\": true}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"momentSketchSolveQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"momentSketchMin\", \"name\": \"min\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", + " {\"type\": \"momentSketchMax\", \"name\": \"max\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + assertEquals(1, results.size()); + Row row = results.get(0); + double[] quantilesArray = (double[]) row.getRaw("quantiles"); + assertEquals(0, quantilesArray[0], 0.05); + assertEquals(.5, quantilesArray[1], 0.05); + assertEquals(1.0, quantilesArray[2], 0.05); + + Double minValue = (Double) row.getRaw("min"); + assertEquals(0.0011, minValue, 0.0001); + + Double maxValue = (Double) row.getRaw("max"); + assertEquals(0.9969, maxValue, 0.0001); + + MomentSketchWrapper sketchObject = (MomentSketchWrapper) row.getRaw("sketch"); + assertEquals(400.0, sketchObject.getPowerSums()[0], 1e-10); + } + + @Test + public void buildingSketchesAtQueryTime() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("doubles_build_data.tsv").getFile()), + String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [ \"product\"],", + " \"dimensionExclusions\": [\"sequenceNumber\"],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", + " }", + "}" + ), + "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [],", + " \"aggregations\": [", + " {\"type\": \"momentSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 10}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + + List results = seq.toList(); + assertEquals(1, results.size()); + Row row = results.get(0); + + MomentSketchWrapper sketchObject = (MomentSketchWrapper) row.getRaw("sketch"); + // 9 total products since we pre-sum the values. + assertEquals(9.0, sketchObject.getPowerSums()[0], 1e-10); + } +} + diff --git a/extensions-contrib/momentsketch/src/test/resources/doubles_build_data.tsv b/extensions-contrib/momentsketch/src/test/resources/doubles_build_data.tsv new file mode 100644 index 00000000000..bb59faf3da8 --- /dev/null +++ b/extensions-contrib/momentsketch/src/test/resources/doubles_build_data.tsv @@ -0,0 +1,400 @@ +2016010101 0 0 0.6529403005319299 +2016010101 1 0 0.9270214958987323 +2016010101 2 0 0.6383273609981486 +2016010101 3 0 0.8088289215633632 +2016010101 4 0 0.8163864917598281 +2016010101 5 0 0.38484848588530784 +2016010101 6 0 0.7690020468986823 +2016010101 7 0 0.6212078833139824 +2016010101 8 0 0.4915825094949512 +2016010101 9 0 0.688004059332008 +2016010101 10 0 0.2536908275250508 +2016010101 11 0 0.6618435914290263 +2016010101 12 0 0.7892773595797635 +2016010101 13 0 0.08857624134076048 +2016010101 14 0 0.11992633801904151 +2016010101 15 0 0.4959192800105586 +2016010101 16 0 0.5564893557708243 +2016010101 17 0 0.7755547456799993 +2016010101 18 0 0.06420706406984311 +2016010101 19 0 0.23085639094262378 +2016010101 20 7 0.012013916725163498 +2016010101 21 7 0.34077219818209503 +2016010101 22 7 0.8445966884204918 +2016010101 23 7 0.6466142718287953 +2016010101 24 7 0.43959032391415487 +2016010101 25 7 0.7768829233737787 +2016010101 26 7 0.5899544206136442 +2016010101 27 7 0.017782361911801825 +2016010101 28 7 0.5431916165782864 +2016010101 29 7 0.8218253174439416 +2016010101 30 7 0.6372788284951859 +2016010101 31 7 0.41403671834680933 +2016010101 32 7 0.042508330730374855 +2016010101 33 7 0.7416290691530969 +2016010101 34 7 0.6990557213726277 +2016010101 35 7 0.6302154208823348 +2016010101 36 7 0.021053567154993402 +2016010101 37 7 0.770280353784988 +2016010101 38 7 0.08205576978448703 +2016010101 39 7 0.2049660800682488 +2016010101 40 5 0.08129304678049831 +2016010101 41 5 0.17754747271638005 +2016010101 42 5 0.8441702357096768 +2016010101 43 5 0.9060464737257796 +2016010101 44 5 0.5970595512785409 +2016010101 45 5 0.843859346312315 +2016010101 46 5 0.1649847892987305 +2016010101 47 5 0.5279903496999094 +2016010101 48 5 0.08758749830556767 +2016010101 49 5 0.6088480522002063 +2016010101 50 5 0.31079133043670004 +2016010101 51 5 0.43062105356651226 +2016010101 52 5 0.8542989852099488 +2016010101 53 5 0.42443162807834045 +2016010101 54 5 0.5020327054358468 +2016010101 55 5 0.36453920012074237 +2016010101 56 5 0.9884597580348689 +2016010101 57 5 0.3770559586575706 +2016010101 58 5 0.5989237303385875 +2016010101 59 5 0.9926342802399872 +2016010101 60 4 0.7813961047849703 +2016010101 61 4 0.062171533805525425 +2016010101 62 4 0.5284977503473608 +2016010101 63 4 0.5924687065581794 +2016010101 64 4 0.06305234223879275 +2016010101 65 4 0.4959562731747129 +2016010101 66 4 0.6336733165353365 +2016010101 67 4 0.48860263540869875 +2016010101 68 4 0.9387610528974851 +2016010101 69 4 0.3391271652731308 +2016010101 70 4 0.5962837638971421 +2016010101 71 4 0.9190447294921896 +2016010101 72 4 0.33082943548872534 +2016010101 73 4 0.6236359023672029 +2016010101 74 4 0.27134427542016615 +2016010101 75 4 0.11665530238761901 +2016010101 76 4 0.10469260335277608 +2016010101 77 4 0.6824658847771211 +2016010101 78 4 0.6131047630496756 +2016010101 79 4 0.9838171536972515 +2016010101 80 4 0.7484669110852756 +2016010101 81 4 0.797620888697219 +2016010101 82 4 0.7166673353657907 +2016010101 83 4 0.46968710353176557 +2016010101 84 4 0.3998491199643106 +2016010101 85 4 0.6314883585976869 +2016010101 86 4 0.8305617875577815 +2016010101 87 4 0.6867651870284084 +2016010101 88 4 0.9961677044887979 +2016010101 89 4 0.19745766301180412 +2016010101 90 4 0.2737652043079263 +2016010101 91 4 0.2954503444695358 +2016010101 92 4 0.6191902196833489 +2016010101 93 4 0.6828058006233482 +2016010101 94 4 0.7967115641510757 +2016010101 95 4 0.5485460823820962 +2016010101 96 4 0.4278132830938558 +2016010101 97 4 0.32194908458166194 +2016010101 98 4 0.07094920295725238 +2016010101 99 4 0.4351839393889565 +2016010101 100 1 0.6160833396611648 +2016010101 101 1 0.4652667787803648 +2016010101 102 1 0.5026953463132913 +2016010101 103 1 0.4103237191034753 +2016010101 104 1 0.3298554666697301 +2016010101 105 1 0.16907537273919138 +2016010101 106 1 0.6945260598989513 +2016010101 107 1 0.917138530496438 +2016010101 108 1 0.8810129148605083 +2016010101 109 1 0.11845626048380542 +2016010101 110 1 0.8848971155827816 +2016010101 111 1 0.9969103769603667 +2016010101 112 1 0.06274198529295416 +2016010101 113 1 0.2923616769686519 +2016010101 114 1 0.12621083638328634 +2016010101 115 1 0.9655188575577313 +2016010101 116 1 0.6074995164352884 +2016010101 117 1 0.5501887988201414 +2016010101 118 1 0.9406914128003497 +2016010101 119 1 0.03264873659277656 +2016010101 120 6 0.004852543443656487 +2016010101 121 6 0.11161194329252788 +2016010101 122 6 0.9403527002796559 +2016010101 123 6 0.8951866979503953 +2016010101 124 6 0.07629846897033454 +2016010101 125 6 0.9898485014275873 +2016010101 126 6 0.42827377712188075 +2016010101 127 6 0.4274796777951825 +2016010101 128 6 0.5569522946332676 +2016010101 129 6 0.028195121559112635 +2016010101 130 6 0.8599127909482382 +2016010101 131 6 0.3516112293128607 +2016010101 132 6 0.3888868189342449 +2016010101 133 6 0.644589126160206 +2016010101 134 6 0.7398741071492928 +2016010101 135 6 0.1998479248216123 +2016010101 136 6 0.8803215884594476 +2016010101 137 6 0.7079531966558515 +2016010101 138 6 0.7904290564015343 +2016010101 139 6 0.475671788742007 +2016010101 140 3 0.034708334899357096 +2016010101 141 3 0.4134637419532796 +2016010101 142 3 0.9757934592902832 +2016010101 143 3 0.37422347371609666 +2016010101 144 3 0.5904996168737154 +2016010101 145 3 0.5883259679727514 +2016010101 146 3 0.3380286015499171 +2016010101 147 3 0.42174393035143043 +2016010101 148 3 0.4764900074141757 +2016010101 149 3 0.01864239537224921 +2016010101 150 3 0.9124007087743986 +2016010101 151 3 0.8951275235699193 +2016010101 152 3 0.7037272142266654 +2016010101 153 3 0.5685506209266902 +2016010101 154 3 0.4104883958833594 +2016010101 155 3 0.7794005551450208 +2016010101 156 3 0.2879354697088996 +2016010101 157 3 0.5243215707259823 +2016010101 158 3 0.22238840286136063 +2016010101 159 3 0.11336472553284738 +2016010101 160 4 0.9800770037725316 +2016010101 161 4 0.7628237317889158 +2016010101 162 4 0.5355335935170453 +2016010101 163 4 0.9676939330565402 +2016010101 164 4 0.657825753108034 +2016010101 165 4 0.9175328548944673 +2016010101 166 4 0.6834666043257283 +2016010101 167 4 0.08580759367942314 +2016010101 168 4 0.3134740602060899 +2016010101 169 4 0.3218818254752742 +2016010101 170 4 0.6119297354994999 +2016010101 171 4 0.07086832750773142 +2016010101 172 4 0.2700864307032772 +2016010101 173 4 0.7497315076673637 +2016010101 174 4 0.4959921300968493 +2016010101 175 4 0.09294825796093753 +2016010101 176 4 0.4954515904444161 +2016010101 177 4 0.8820366880191506 +2016010101 178 4 0.17978298283728522 +2016010101 179 4 0.05259679741524781 +2016010101 180 5 0.4711892966981096 +2016010101 181 5 0.5965662941715105 +2016010101 182 5 0.4775201668966973 +2016010101 183 5 0.05084576687030873 +2016010101 184 5 0.16680660677593928 +2016010101 185 5 0.9342287333653685 +2016010101 186 5 0.8153161893769392 +2016010101 187 5 0.9362517669519288 +2016010101 188 5 0.10865218471840699 +2016010101 189 5 0.44665378915111065 +2016010101 190 5 0.8804454791937898 +2016010101 191 5 0.20666928346935398 +2016010101 192 5 0.7052479677101612 +2016010101 193 5 0.5006205470200923 +2016010101 194 5 0.23220501028575968 +2016010101 195 5 0.11776507130391467 +2016010101 196 5 0.592011744069295 +2016010101 197 5 0.7089191450076786 +2016010101 198 5 0.7269340552231702 +2016010101 199 5 0.7049554871226075 +2016010101 200 1 0.44078367400761076 +2016010101 201 1 0.7715264806037321 +2016010101 202 1 0.10151701902103971 +2016010101 203 1 0.661891806135609 +2016010101 204 1 0.23095745116331567 +2016010101 205 1 0.46625278601359255 +2016010101 206 1 0.5912486124707177 +2016010101 207 1 0.963946871892115 +2016010101 208 1 0.8172596270687692 +2016010101 209 1 0.05745699928199144 +2016010101 210 1 0.40612684342877337 +2016010101 211 1 0.6330844777969608 +2016010101 212 1 0.3148973406065705 +2016010101 213 1 0.23230462811318175 +2016010101 214 1 0.9960772952945196 +2016010101 215 1 0.4581376339786414 +2016010101 216 1 0.7181494575770677 +2016010101 217 1 0.04277917580280799 +2016010101 218 1 0.11137419446625674 +2016010101 219 1 0.014716278313423037 +2016010101 220 2 0.8988603727313186 +2016010101 221 2 0.8192124226306603 +2016010101 222 2 0.9304683598956597 +2016010101 223 2 0.4375546733938238 +2016010101 224 2 0.7676359685332207 +2016010101 225 2 0.30977859822027964 +2016010101 226 2 0.008595955287459267 +2016010101 227 2 0.6790605343724216 +2016010101 228 2 0.36949588946147993 +2016010101 229 2 0.3826798435706562 +2016010101 230 2 0.13836513167087128 +2016010101 231 2 0.4451570472364902 +2016010101 232 2 0.8944067771338549 +2016010101 233 2 0.6068095655362902 +2016010101 234 2 0.7084870042917992 +2016010101 235 2 0.5867363290655241 +2016010101 236 2 0.6903863088381504 +2016010101 237 2 0.30984947936089124 +2016010101 238 2 0.31561088279452665 +2016010101 239 2 0.006286479849849758 +2016010101 240 5 0.34397466439693725 +2016010101 241 5 0.052476003295899964 +2016010101 242 5 0.726106045184451 +2016010101 243 5 0.01559115401009159 +2016010101 244 5 0.9219270739836661 +2016010101 245 5 0.5147917330760431 +2016010101 246 5 0.41919804470784205 +2016010101 247 5 0.4145101775865617 +2016010101 248 5 0.34153038022995796 +2016010101 249 5 0.9503817180587767 +2016010101 250 5 0.6958354849389804 +2016010101 251 5 0.46000811480536297 +2016010101 252 5 0.18379911670616378 +2016010101 253 5 0.20973108758556713 +2016010101 254 5 0.5979201603287885 +2016010101 255 5 0.5552419362393491 +2016010101 256 5 0.10996555307297629 +2016010101 257 5 0.3591453585622102 +2016010101 258 5 0.06098055111386691 +2016010101 259 5 0.5227270267924988 +2016010101 260 0 0.8492702312836989 +2016010101 261 0 0.5941242001151825 +2016010101 262 0 0.6840733026822607 +2016010101 263 0 0.8109777000249937 +2016010101 264 0 0.8599286045013937 +2016010101 265 0 0.7828806670746145 +2016010101 266 0 0.8102260971867188 +2016010101 267 0 0.38306094770114385 +2016010101 268 0 0.7093609268723879 +2016010101 269 0 0.4806583187577358 +2016010101 270 0 0.5766489331365172 +2016010101 271 0 0.7565067278238041 +2016010101 272 0 0.8262768908267573 +2016010101 273 0 0.7951015619138146 +2016010101 274 0 0.1938448910588796 +2016010101 275 0 0.8884608583839426 +2016010101 276 0 0.7046203516594505 +2016010101 277 0 0.5951074760704175 +2016010101 278 0 0.38207409719784036 +2016010101 279 0 0.2445271560830221 +2016010101 280 7 0.6032919624054952 +2016010101 281 7 0.1473220747987144 +2016010101 282 7 0.38396643099307604 +2016010101 283 7 0.4431561135554619 +2016010101 284 7 0.896578318093225 +2016010101 285 7 0.6729206122043515 +2016010101 286 7 0.8498821349478478 +2016010101 287 7 0.48231924024179784 +2016010101 288 7 0.005379480238994816 +2016010101 289 7 0.8017936717647264 +2016010101 290 7 0.08193232952990348 +2016010101 291 7 0.3422943366454193 +2016010101 292 7 0.6081556855207957 +2016010101 293 7 0.641193222941943 +2016010101 294 7 0.3716858024654186 +2016010101 295 7 0.0011169303830090849 +2016010101 296 7 0.4698784438339285 +2016010101 297 7 0.958198841287214 +2016010101 298 7 0.730945048929339 +2016010101 299 7 0.1858601884405512 +2016010101 300 5 0.1020825694779407 +2016010101 301 5 0.5742385074938443 +2016010101 302 5 0.9846817584978909 +2016010101 303 5 0.3858694391491331 +2016010101 304 5 0.9822246873202894 +2016010101 305 5 0.39822015482143314 +2016010101 306 5 0.6575924137957005 +2016010101 307 5 0.02359557062746842 +2016010101 308 5 0.42059510563039115 +2016010101 309 5 0.5970764856116284 +2016010101 310 5 0.2817399870096221 +2016010101 311 5 0.5334091165258412 +2016010101 312 5 0.31199853410796585 +2016010101 313 5 0.3156991306990594 +2016010101 314 5 0.9560285139855889 +2016010101 315 5 0.7846951771498516 +2016010101 316 5 0.009731486767097897 +2016010101 317 5 0.22625857375026215 +2016010101 318 5 0.8580955944724618 +2016010101 319 5 0.9622008926137687 +2016010101 320 5 0.023872302930851297 +2016010101 321 5 0.3580981601151092 +2016010101 322 5 0.9120442264954038 +2016010101 323 5 0.5968491989965334 +2016010101 324 5 0.5028516120506729 +2016010101 325 5 0.30590552314314 +2016010101 326 5 0.5566430714368423 +2016010101 327 5 0.6441099124064397 +2016010101 328 5 0.8765287851559298 +2016010101 329 5 0.38405928947408385 +2016010101 330 5 0.29654203975364 +2016010101 331 5 0.3606921959261904 +2016010101 332 5 0.9617038824842609 +2016010101 333 5 0.3103700669261584 +2016010101 334 5 0.4935170174690311 +2016010101 335 5 0.34757561267296444 +2016010101 336 5 0.1236918485545484 +2016010101 337 5 0.24925258973306597 +2016010101 338 5 0.4104821367672965 +2016010101 339 5 0.3621850216936935 +2016010101 340 6 0.3816099229918041 +2016010101 341 6 0.9496667754823915 +2016010101 342 6 0.5594605720642025 +2016010101 343 6 0.8537860901562698 +2016010101 344 6 0.74787202967909 +2016010101 345 6 0.29699361421249604 +2016010101 346 6 0.035943527086235605 +2016010101 347 6 0.20106098029261277 +2016010101 348 6 0.6589994525818863 +2016010101 349 6 0.3851541727199762 +2016010101 350 6 0.12262059605539744 +2016010101 351 6 0.33383436408012057 +2016010101 352 6 0.5087733967157267 +2016010101 353 6 0.34978350071897446 +2016010101 354 6 0.9171509423859847 +2016010101 355 6 0.6395164525815664 +2016010101 356 6 0.659637993918835 +2016010101 357 6 0.5689746534857604 +2016010101 358 6 0.03266513163571427 +2016010101 359 6 0.5863675010868861 +2016010101 360 9 0.8665167898047901 +2016010101 361 9 0.7933960420424948 +2016010101 362 9 0.8409667771425247 +2016010101 363 9 0.9544310598825743 +2016010101 364 9 0.36206869840549716 +2016010101 365 9 0.253957983880155 +2016010101 366 9 0.08496022679431525 +2016010101 367 9 0.5483782518766319 +2016010101 368 9 0.41440902281408365 +2016010101 369 9 0.2947889064970717 +2016010101 370 9 0.659477180019486 +2016010101 371 9 0.9016744422830162 +2016010101 372 9 0.4692828259677926 +2016010101 373 9 0.4221974527778145 +2016010101 374 9 0.26318360778150285 +2016010101 375 9 0.10064081807071767 +2016010101 376 9 0.7781802619858804 +2016010101 377 9 0.529215767115243 +2016010101 378 9 0.21094147073619007 +2016010101 379 9 0.18894985078463877 +2016010101 380 5 0.20683422198832369 +2016010101 381 5 0.9506923735546904 +2016010101 382 5 0.25734447316063913 +2016010101 383 5 0.6439025323539892 +2016010101 384 5 0.9099080819805052 +2016010101 385 5 0.9331714165375404 +2016010101 386 5 0.24979840404324272 +2016010101 387 5 0.40270120064812764 +2016010101 388 5 0.35895113537427137 +2016010101 389 5 0.44814114645480074 +2016010101 390 5 0.437368419580639 +2016010101 391 5 0.2777496228001308 +2016010101 392 5 0.09350862521048608 +2016010101 393 5 0.10366624548706516 +2016010101 394 5 0.8715309310993357 +2016010101 395 5 0.8953111125914557 +2016010101 396 5 0.9410866942183567 +2016010101 397 5 0.16367286942347592 +2016010101 398 5 0.6995415361957786 +2016010101 399 5 0.7170527361072194 diff --git a/pom.xml b/pom.xml index 6c6b1945f6f..351e735b0e5 100644 --- a/pom.xml +++ b/pom.xml @@ -168,6 +168,7 @@ extensions-contrib/opentsdb-emitter extensions-contrib/materialized-view-maintenance extensions-contrib/materialized-view-selection + extensions-contrib/momentsketch distribution diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java index 19cf467b546..dddafbafd82 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java @@ -117,6 +117,10 @@ public class AggregatorUtil public static final byte BLOOM_FILTER_CACHE_TYPE_ID = 0x34; public static final byte BLOOM_FILTER_MERGE_CACHE_TYPE_ID = 0x35; + // Quantiles sketch in momentsketch extension + public static final byte MOMENTS_SKETCH_BUILD_CACHE_TYPE_ID = 0x36; + public static final byte MOMENTS_SKETCH_MERGE_CACHE_TYPE_ID = 0x37; + /** * returns the list of dependent postAggregators that should be calculated in order to calculate given postAgg * diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java b/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java index 6d576a4a783..ea9fe883b03 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java @@ -45,4 +45,7 @@ public class PostAggregatorIds public static final byte ZTEST = 21; public static final byte PVALUE_FROM_ZTEST = 22; public static final byte THETA_SKETCH_CONSTANT = 23; + public static final byte MOMENTS_SKETCH_TO_QUANTILES_CACHE_TYPE_ID = 24; + public static final byte MOMENTS_SKETCH_TO_MIN_CACHE_TYPE_ID = 25; + public static final byte MOMENTS_SKETCH_TO_MAX_CACHE_TYPE_ID = 26; } diff --git a/processing/src/main/java/org/apache/druid/query/cache/CacheKeyBuilder.java b/processing/src/main/java/org/apache/druid/query/cache/CacheKeyBuilder.java index 03122e63335..24c65ac5f5d 100644 --- a/processing/src/main/java/org/apache/druid/query/cache/CacheKeyBuilder.java +++ b/processing/src/main/java/org/apache/druid/query/cache/CacheKeyBuilder.java @@ -64,6 +64,7 @@ public class CacheKeyBuilder static final byte STRING_LIST_KEY = 8; static final byte CACHEABLE_KEY = 9; static final byte CACHEABLE_LIST_KEY = 10; + static final byte DOUBLE_ARRAY_KEY = 11; static final byte[] STRING_SEPARATOR = new byte[]{(byte) 0xFF}; static final byte[] EMPTY_BYTES = StringUtils.EMPTY_BYTES; @@ -92,6 +93,13 @@ public class CacheKeyBuilder return buffer.array(); } + private static byte[] doubleArrayToByteArray(double[] input) + { + final ByteBuffer buffer = ByteBuffer.allocate(Double.BYTES * input.length); + buffer.asDoubleBuffer().put(input); + return buffer.array(); + } + private static byte[] cacheableToByteArray(@Nullable Cacheable cacheable) { if (cacheable == null) { @@ -254,6 +262,12 @@ public class CacheKeyBuilder return this; } + public CacheKeyBuilder appendDoubleArray(double[] input) + { + appendItem(DOUBLE_ARRAY_KEY, doubleArrayToByteArray(input)); + return this; + } + public CacheKeyBuilder appendFloatArray(float[] input) { appendItem(FLOAT_ARRAY_KEY, floatArrayToByteArray(input)); diff --git a/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java b/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java index 64f37092a71..ba416d6593a 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java @@ -24,6 +24,7 @@ import org.apache.druid.java.util.common.ISE; import javax.annotation.Nullable; import java.util.HashMap; import java.util.Map; +import java.util.function.Supplier; /** */ @@ -44,4 +45,11 @@ public class ComplexMetrics } complexSerializers.put(type, serde); } + + public static void registerSerde(String type, Supplier serdeSupplier) + { + if (ComplexMetrics.getSerdeForType(type) == null) { + ComplexMetrics.registerSerde(type, serdeSupplier.get()); + } + } } From 1f299408114d35a7101eb59fdcc3b6c9e30d8286 Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Wed, 13 Feb 2019 21:32:43 -0800 Subject: [PATCH 15/25] Fix momentsketch build issues (#7074) * Fix momentsketch build issues * Remove unused section in pom * Fix test * Remove unused method * Checkstyle --- extensions-contrib/momentsketch/pom.xml | 8 +------- .../momentsketch/MomentSketchModule.java | 14 ++++++++++---- .../org.apache.druid.initialization.DruidModule | 16 ++++++++++++++++ .../aggregator/MomentsSketchAggregatorTest.java | 2 +- .../druid/segment/serde/ComplexMetrics.java | 8 -------- 5 files changed, 28 insertions(+), 20 deletions(-) create mode 100644 extensions-contrib/momentsketch/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule diff --git a/extensions-contrib/momentsketch/pom.xml b/extensions-contrib/momentsketch/pom.xml index b8926413a99..1ec67568985 100644 --- a/extensions-contrib/momentsketch/pom.xml +++ b/extensions-contrib/momentsketch/pom.xml @@ -24,7 +24,7 @@ druid org.apache.druid - 0.14.0-incubating-SNAPSHOT + 0.15.0-incubating-SNAPSHOT ../../pom.xml 4.0.0 @@ -34,12 +34,6 @@ druid-momentsketch Aggregators for the approximate quantile moment sketch - - - UTF-8 - 0.12.2 - - com.github.stanford-futuredata.momentsketch diff --git a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchModule.java b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchModule.java index 29f21355d26..ece098e48ad 100644 --- a/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchModule.java +++ b/extensions-contrib/momentsketch/src/main/java/org/apache/druid/query/aggregation/momentsketch/MomentSketchModule.java @@ -22,6 +22,7 @@ package org.apache.druid.query.aggregation.momentsketch; import com.fasterxml.jackson.databind.Module; import com.fasterxml.jackson.databind.jsontype.NamedType; import com.fasterxml.jackson.databind.module.SimpleModule; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.inject.Binder; import org.apache.druid.initialization.DruidModule; @@ -74,9 +75,14 @@ public class MomentSketchModule implements DruidModule @Override public void configure(Binder binder) { - ComplexMetrics.registerSerde( - MomentSketchAggregatorFactory.TYPE_NAME, - MomentSketchComplexMetricSerde::new - ); + registerSerde(); + } + + @VisibleForTesting + public static void registerSerde() + { + if (ComplexMetrics.getSerdeForType(MomentSketchAggregatorFactory.TYPE_NAME) == null) { + ComplexMetrics.registerSerde(MomentSketchAggregatorFactory.TYPE_NAME, new MomentSketchComplexMetricSerde()); + } } } diff --git a/extensions-contrib/momentsketch/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule b/extensions-contrib/momentsketch/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule new file mode 100644 index 00000000000..8fb117369b0 --- /dev/null +++ b/extensions-contrib/momentsketch/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.druid.query.aggregation.momentsketch.MomentSketchModule diff --git a/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java b/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java index 2d5293b7191..3927dd4ffe4 100644 --- a/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java +++ b/extensions-contrib/momentsketch/src/test/java/org/apache/druid/query/aggregation/momentsketch/aggregator/MomentsSketchAggregatorTest.java @@ -55,8 +55,8 @@ public class MomentsSketchAggregatorTest public MomentsSketchAggregatorTest(final GroupByQueryConfig config) { + MomentSketchModule.registerSerde(); DruidModule module = new MomentSketchModule(); - module.configure(null); helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( module.getJacksonModules(), config, tempFolder); } diff --git a/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java b/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java index ba416d6593a..64f37092a71 100644 --- a/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java +++ b/processing/src/main/java/org/apache/druid/segment/serde/ComplexMetrics.java @@ -24,7 +24,6 @@ import org.apache.druid.java.util.common.ISE; import javax.annotation.Nullable; import java.util.HashMap; import java.util.Map; -import java.util.function.Supplier; /** */ @@ -45,11 +44,4 @@ public class ComplexMetrics } complexSerializers.put(type, serde); } - - public static void registerSerde(String type, Supplier serdeSupplier) - { - if (ComplexMetrics.getSerdeForType(type) == null) { - ComplexMetrics.registerSerde(type, serdeSupplier.get()); - } - } } From ee91e27fe7dacbce0b231de760aefdae3049d764 Mon Sep 17 00:00:00 2001 From: awelsh93 <32643586+awelsh93@users.noreply.github.com> Date: Thu, 14 Feb 2019 14:38:09 +0000 Subject: [PATCH 16/25] Update api-reference.md doc (#7065) - moving description of coordinator isLeader endpoint --- docs/content/operations/api-reference.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/content/operations/api-reference.md b/docs/content/operations/api-reference.md index 7a0b43421fc..23d118864a9 100644 --- a/docs/content/operations/api-reference.md +++ b/docs/content/operations/api-reference.md @@ -74,17 +74,15 @@ Returns the current leader Coordinator of the cluster. * `/druid/coordinator/v1/isLeader` -Returns true if the Coordinator receiving the request is the current leader. - -#### Segment Loading - -##### GET - Returns a JSON object with field "leader", either true or false, indicating if this server is the current leader Coordinator of the cluster. In addition, returns HTTP 200 if the server is the current leader and HTTP 404 if not. This is suitable for use as a load balancer status check if you only want the active leader to be considered in-service at the load balancer. +#### Segment Loading + +##### GET + * `/druid/coordinator/v1/loadstatus` Returns the percentage of segments actually loaded in the cluster versus segments that should be loaded in the cluster. From 0fa90008496926c15426710b0dd4698bdc224bac Mon Sep 17 00:00:00 2001 From: scrawfor Date: Fri, 15 Feb 2019 01:52:03 -0500 Subject: [PATCH 17/25] Add Postgresql SqlFirehose (#6813) * Add Postgresql SqlFirehose * Fix Code Style. * Fix style. * Fix Import Order. * Add Line Break before package. --- docs/content/ingestion/firehose.md | 22 +++++-- .../PostgresqlFirehoseDatabaseConnector.java | 58 +++++++++++++++++++ .../postgresql/PostgreSQLConnector.java | 20 ++++--- .../PostgreSQLMetadataStorageModule.java | 12 +++- 4 files changed, 96 insertions(+), 16 deletions(-) create mode 100644 extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/firehose/PostgresqlFirehoseDatabaseConnector.java diff --git a/docs/content/ingestion/firehose.md b/docs/content/ingestion/firehose.md index ff10206dc24..faa83f3847e 100644 --- a/docs/content/ingestion/firehose.md +++ b/docs/content/ingestion/firehose.md @@ -110,7 +110,10 @@ A sample ingest firehose spec is shown below - #### SqlFirehose SqlFirehoseFactory can be used to ingest events residing in RDBMS. The database connection information is provided as part of the ingestion spec. For each query, the results are fetched locally and indexed. If there are multiple queries from which data needs to be indexed, queries are prefetched in the background upto `maxFetchCapacityBytes` bytes. -An example is shown below: + +Requires one of the following extensions: + * [MySQL Metadata Store](../ingestion/mysql.html). + * [PostgreSQL Metadata Store](../ingestion/postgresql.html). ```json { @@ -118,20 +121,19 @@ An example is shown below: "database": { "type": "mysql", "connectorConfig" : { - "connectURI" : "jdbc:mysql://host:port/schema", - "user" : "user", - "password" : "password" + "connectURI" : "jdbc:mysql://host:port/schema", + "user" : "user", + "password" : "password" } }, "sqls" : ["SELECT * FROM table1", "SELECT * FROM table2"] } ``` - |property|description|default|required?| |--------|-----------|-------|---------| |type|This should be "sql".||Yes| -|database|Specifies the database connection details.`type` should specify the database type and `connectorConfig` should specify the database connection properties via `connectURI`, `user` and `password`||Yes| +|database|Specifies the database connection details.||Yes| |maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824|No| |maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824|No| |prefetchTriggerBytes|Threshold to trigger prefetching SQL result objects.|maxFetchCapacityBytes / 2|No| @@ -139,6 +141,14 @@ An example is shown below: |foldCase|Toggle case folding of database column names. This may be enabled in cases where the database returns case insensitive column names in query results.|false|No| |sqls|List of SQL queries where each SQL query would retrieve the data to be indexed.||Yes| +#### Database + +|property|description|default|required?| +|--------|-----------|-------|---------| +|type|The type of database to query. Valid values are `mysql` and `postgresql`_||Yes| +|connectorConfig|specify the database connection properties via `connectURI`, `user` and `password`||Yes| + + ### CombiningFirehose This firehose can be used to combine and merge data from a list of different firehoses. diff --git a/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/firehose/PostgresqlFirehoseDatabaseConnector.java b/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/firehose/PostgresqlFirehoseDatabaseConnector.java new file mode 100644 index 00000000000..e40d44439fc --- /dev/null +++ b/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/firehose/PostgresqlFirehoseDatabaseConnector.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.firehose; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import org.apache.commons.dbcp2.BasicDataSource; +import org.apache.druid.metadata.MetadataStorageConnectorConfig; +import org.apache.druid.metadata.SQLFirehoseDatabaseConnector; +import org.skife.jdbi.v2.DBI; + + +@JsonTypeName("postgresql") +public class PostgresqlFirehoseDatabaseConnector extends SQLFirehoseDatabaseConnector +{ + private final DBI dbi; + private final MetadataStorageConnectorConfig connectorConfig; + + public PostgresqlFirehoseDatabaseConnector( + @JsonProperty("connectorConfig") MetadataStorageConnectorConfig connectorConfig + ) + { + this.connectorConfig = connectorConfig; + final BasicDataSource datasource = getDatasource(connectorConfig); + datasource.setDriverClassLoader(getClass().getClassLoader()); + datasource.setDriverClassName("org.postgresql.Driver"); + this.dbi = new DBI(datasource); + } + + @JsonProperty + public MetadataStorageConnectorConfig getConnectorConfig() + { + return connectorConfig; + } + + @Override + public DBI getDBI() + { + return dbi; + } +} diff --git a/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnector.java b/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnector.java index 52e11ad36c1..e234a157989 100644 --- a/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnector.java +++ b/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLConnector.java @@ -148,10 +148,10 @@ public class PostgreSQLConnector extends SQLMetadataConnector return !handle.createQuery( "SELECT tablename FROM pg_catalog.pg_tables WHERE schemaname = 'public' AND tablename ILIKE :tableName" ) - .bind("tableName", tableName) - .map(StringMapper.FIRST) - .list() - .isEmpty(); + .bind("tableName", tableName) + .map(StringMapper.FIRST) + .list() + .isEmpty(); } @Override @@ -184,10 +184,14 @@ public class PostgreSQLConnector extends SQLMetadataConnector } else { handle.createStatement( StringUtils.format( - "BEGIN;\n" + - "LOCK TABLE %1$s IN SHARE ROW EXCLUSIVE MODE;\n" + - "WITH upsert AS (UPDATE %1$s SET %3$s=:value WHERE %2$s=:key RETURNING *)\n" + - " INSERT INTO %1$s (%2$s, %3$s) SELECT :key, :value WHERE NOT EXISTS (SELECT * FROM upsert)\n;" + + "BEGIN;\n" + + + "LOCK TABLE %1$s IN SHARE ROW EXCLUSIVE MODE;\n" + + + "WITH upsert AS (UPDATE %1$s SET %3$s=:value WHERE %2$s=:key RETURNING *)\n" + + + " INSERT INTO %1$s (%2$s, %3$s) SELECT :key, :value WHERE NOT EXISTS (SELECT * FROM upsert)\n;" + + "COMMIT;", tableName, keyColumn, diff --git a/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLMetadataStorageModule.java b/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLMetadataStorageModule.java index 6cbb09b8cc9..f10de659f5d 100644 --- a/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLMetadataStorageModule.java +++ b/extensions-core/postgresql-metadata-storage/src/main/java/org/apache/druid/metadata/storage/postgresql/PostgreSQLMetadataStorageModule.java @@ -20,9 +20,11 @@ package org.apache.druid.metadata.storage.postgresql; import com.fasterxml.jackson.databind.Module; -import com.google.common.collect.ImmutableList; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import com.fasterxml.jackson.databind.module.SimpleModule; import com.google.inject.Binder; import com.google.inject.Key; +import org.apache.druid.firehose.PostgresqlFirehoseDatabaseConnector; import org.apache.druid.guice.JsonConfigProvider; import org.apache.druid.guice.LazySingleton; import org.apache.druid.guice.PolyBind; @@ -35,6 +37,7 @@ import org.apache.druid.metadata.NoopMetadataStorageProvider; import org.apache.druid.metadata.PostgreSQLMetadataStorageActionHandlerFactory; import org.apache.druid.metadata.SQLMetadataConnector; +import java.util.Collections; import java.util.List; public class PostgreSQLMetadataStorageModule extends SQLMetadataStorageDruidModule implements DruidModule @@ -50,7 +53,12 @@ public class PostgreSQLMetadataStorageModule extends SQLMetadataStorageDruidModu @Override public List getJacksonModules() { - return ImmutableList.of(); + return Collections.singletonList( + new SimpleModule() + .registerSubtypes( + new NamedType(PostgresqlFirehoseDatabaseConnector.class, "postgresql") + ) + ); } @Override From c7eeeabf458b9a9afce086f4e5fbc456bf92d8e2 Mon Sep 17 00:00:00 2001 From: Justin Borromeo Date: Fri, 15 Feb 2019 13:36:46 -0800 Subject: [PATCH 18/25] 2528 Replace Incremental Index Global Flags with Getters (#7043) * Eliminated reportParseExceptions and deserializeComplexMetrics * Removed more global flags * Cleanup * Addressed Surekha's recommendations --- .../segment/incremental/IncrementalIndex.java | 55 ++++++++++++------- .../incremental/OffheapIncrementalIndex.java | 21 +++---- .../incremental/OnheapIncrementalIndex.java | 15 +++-- .../OnheapIncrementalIndexBenchmark.java | 13 ++--- .../druid/segment/realtime/plumber/Sink.java | 2 +- 5 files changed, 58 insertions(+), 48 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java index 865cbf45e7b..39df02406de 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndex.java @@ -97,8 +97,6 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Stream; -/** - */ public abstract class IncrementalIndex extends AbstractIndex implements Iterable, Closeable { private volatile DateTime maxIngestedEventTime; @@ -250,7 +248,7 @@ public abstract class IncrementalIndex extends AbstractIndex imp /** * Setting deserializeComplexMetrics to false is necessary for intermediate aggregation such as groupBy that * should not deserialize input columns using ComplexMetricSerde for aggregators that return complex metrics. - * + *

* Set concurrentEventAdd to true to indicate that adding of input row should be thread-safe (for example, groupBy * where the multiple threads can add concurrently to the IncrementalIndex). * @@ -482,12 +480,7 @@ public abstract class IncrementalIndex extends AbstractIndex imp // Note: This method needs to be thread safe. protected abstract AddToFactsResult addToFacts( - AggregatorFactory[] metrics, - boolean deserializeComplexMetrics, - boolean reportParseExceptions, InputRow row, - AtomicInteger numEntries, - AtomicLong sizeInBytes, IncrementalIndexRow key, ThreadLocal rowContainer, Supplier rowSupplier, @@ -608,12 +601,7 @@ public abstract class IncrementalIndex extends AbstractIndex imp { IncrementalIndexRowResult incrementalIndexRowResult = toIncrementalIndexRow(row); final AddToFactsResult addToFactsResult = addToFacts( - metrics, - deserializeComplexMetrics, - reportParseExceptions, row, - numEntries, - bytesInMemory, incrementalIndexRowResult.getIncrementalIndexRow(), in, rowSupplier, @@ -625,7 +613,11 @@ public abstract class IncrementalIndex extends AbstractIndex imp incrementalIndexRowResult.getParseExceptionMessages(), addToFactsResult.getParseExceptionMessages() ); - return new IncrementalIndexAddResult(addToFactsResult.getRowCount(), addToFactsResult.getBytesInMemory(), parseException); + return new IncrementalIndexAddResult( + addToFactsResult.getRowCount(), + addToFactsResult.getBytesInMemory(), + parseException + ); } @VisibleForTesting @@ -785,9 +777,29 @@ public abstract class IncrementalIndex extends AbstractIndex imp return numEntries.get(); } - public long getBytesInMemory() + boolean getDeserializeComplexMetrics() { - return bytesInMemory.get(); + return deserializeComplexMetrics; + } + + boolean getReportParseExceptions() + { + return reportParseExceptions; + } + + AtomicInteger getNumEntries() + { + return numEntries; + } + + AggregatorFactory[] getMetrics() + { + return metrics; + } + + public AtomicLong getBytesInMemory() + { + return bytesInMemory; } private long getMinTimeMillis() @@ -908,7 +920,10 @@ public abstract class IncrementalIndex extends AbstractIndex imp * Index dimension ordering could be changed to initialize from DimensionsSpec after resolution of * https://github.com/apache/incubator-druid/issues/2011 */ - public void loadDimensionIterable(Iterable oldDimensionOrder, Map oldColumnCapabilities) + public void loadDimensionIterable( + Iterable oldDimensionOrder, + Map oldColumnCapabilities + ) { synchronized (dimensionDescs) { if (!dimensionDescs.isEmpty()) { @@ -1289,7 +1304,9 @@ public abstract class IncrementalIndex extends AbstractIndex imp public Iterator iterator(boolean descending) { if (descending && sortFacts) { - return ((ConcurrentNavigableMap) facts).descendingMap().keySet().iterator(); + return ((ConcurrentNavigableMap) facts).descendingMap() + .keySet() + .iterator(); } return keySet().iterator(); } @@ -1387,7 +1404,7 @@ public abstract class IncrementalIndex extends AbstractIndex imp { if (descending && sortFacts) { return timeOrderedConcat(((ConcurrentNavigableMap>) facts) - .descendingMap().values(), true).iterator(); + .descendingMap().values(), true).iterator(); } return timeOrderedConcat(facts.values(), false).iterator(); } diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/OffheapIncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/OffheapIncrementalIndex.java index f18f76879c3..95c88fc9606 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/OffheapIncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/OffheapIncrementalIndex.java @@ -40,9 +40,9 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; /** + * */ public class OffheapIncrementalIndex extends IncrementalIndex { @@ -133,19 +133,15 @@ public class OffheapIncrementalIndex extends IncrementalIndex } } - aggsTotalSize = aggOffsetInBuffer[metrics.length - 1] + metrics[metrics.length - 1].getMaxIntermediateSizeWithNulls(); + aggsTotalSize = aggOffsetInBuffer[metrics.length - 1] + metrics[metrics.length + - 1].getMaxIntermediateSizeWithNulls(); return new BufferAggregator[metrics.length]; } @Override protected AddToFactsResult addToFacts( - AggregatorFactory[] metrics, - boolean deserializeComplexMetrics, - boolean reportParseExceptions, InputRow row, - AtomicInteger numEntries, - AtomicLong sizeInBytes, // ignored, added to make abstract class method impl happy IncrementalIndexRow key, ThreadLocal rowContainer, Supplier rowSupplier, @@ -157,6 +153,7 @@ public class OffheapIncrementalIndex extends IncrementalIndex int bufferOffset; synchronized (this) { + final AggregatorFactory[] metrics = getMetrics(); final int priorIndex = facts.getPriorIndex(key); if (IncrementalIndexRow.EMPTY_ROW_INDEX != priorIndex) { final int[] indexAndOffset = indexAndOffsets.get(priorIndex); @@ -202,7 +199,7 @@ public class OffheapIncrementalIndex extends IncrementalIndex } // Last ditch sanity checks - if (numEntries.get() >= maxRowCount && facts.getPriorIndex(key) == IncrementalIndexRow.EMPTY_ROW_INDEX) { + if (getNumEntries().get() >= maxRowCount && facts.getPriorIndex(key) == IncrementalIndexRow.EMPTY_ROW_INDEX) { throw new IndexSizeExceededException("Maximum number of rows [%d] reached", maxRowCount); } @@ -213,7 +210,7 @@ public class OffheapIncrementalIndex extends IncrementalIndex indexAndOffsets.add(new int[]{bufferIndex, bufferOffset}); final int prev = facts.putIfAbsent(key, rowIndex); if (IncrementalIndexRow.EMPTY_ROW_INDEX == prev) { - numEntries.incrementAndGet(); + getNumEntries().incrementAndGet(); } else { throw new ISE("WTF! we are in sychronized block."); } @@ -222,7 +219,7 @@ public class OffheapIncrementalIndex extends IncrementalIndex rowContainer.set(row); - for (int i = 0; i < metrics.length; i++) { + for (int i = 0; i < getMetrics().length; i++) { final BufferAggregator agg = getAggs()[i]; synchronized (agg) { @@ -231,7 +228,7 @@ public class OffheapIncrementalIndex extends IncrementalIndex } catch (ParseException e) { // "aggregate" can throw ParseExceptions if a selector expects something but gets something else. - if (reportParseExceptions) { + if (getReportParseExceptions()) { throw new ParseException(e, "Encountered parse error for aggregator[%s]", getMetricAggs()[i].getName()); } else { log.debug(e, "Encountered parse error, skipping aggregator[%s].", getMetricAggs()[i].getName()); @@ -240,7 +237,7 @@ public class OffheapIncrementalIndex extends IncrementalIndex } } rowContainer.set(null); - return new AddToFactsResult(numEntries.get(), 0, new ArrayList<>()); + return new AddToFactsResult(getNumEntries().get(), 0, new ArrayList<>()); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/OnheapIncrementalIndex.java b/processing/src/main/java/org/apache/druid/segment/incremental/OnheapIncrementalIndex.java index 6f49730d694..cd6579743b2 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/OnheapIncrementalIndex.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/OnheapIncrementalIndex.java @@ -46,6 +46,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; /** + * */ public class OnheapIncrementalIndex extends IncrementalIndex { @@ -105,7 +106,8 @@ public class OnheapIncrementalIndex extends IncrementalIndex { long maxAggregatorIntermediateSize = Integer.BYTES * incrementalIndexSchema.getMetrics().length; maxAggregatorIntermediateSize += Arrays.stream(incrementalIndexSchema.getMetrics()) - .mapToLong(aggregator -> aggregator.getMaxIntermediateSizeWithNulls() + Long.BYTES * 2) + .mapToLong(aggregator -> aggregator.getMaxIntermediateSizeWithNulls() + + Long.BYTES * 2) .sum(); return maxAggregatorIntermediateSize; } @@ -140,12 +142,7 @@ public class OnheapIncrementalIndex extends IncrementalIndex @Override protected AddToFactsResult addToFacts( - AggregatorFactory[] metrics, - boolean deserializeComplexMetrics, - boolean reportParseExceptions, InputRow row, - AtomicInteger numEntries, - AtomicLong sizeInBytes, IncrementalIndexRow key, ThreadLocal rowContainer, Supplier rowSupplier, @@ -156,7 +153,9 @@ public class OnheapIncrementalIndex extends IncrementalIndex final int priorIndex = facts.getPriorIndex(key); Aggregator[] aggs; - + final AggregatorFactory[] metrics = getMetrics(); + final AtomicInteger numEntries = getNumEntries(); + final AtomicLong sizeInBytes = getBytesInMemory(); if (IncrementalIndexRow.EMPTY_ROW_INDEX != priorIndex) { aggs = concurrentGet(priorIndex); parseExceptionMessages = doAggregate(metrics, aggs, rowContainer, row); @@ -301,7 +300,7 @@ public class OnheapIncrementalIndex extends IncrementalIndex { final boolean countCheck = size() < maxRowCount; // if maxBytesInMemory = -1, then ignore sizeCheck - final boolean sizeCheck = maxBytesInMemory <= 0 || getBytesInMemory() < maxBytesInMemory; + final boolean sizeCheck = maxBytesInMemory <= 0 || getBytesInMemory().get() < maxBytesInMemory; final boolean canAdd = countCheck && sizeCheck; if (!countCheck && !sizeCheck) { outOfRowsReason = StringUtils.format( diff --git a/processing/src/test/java/org/apache/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java b/processing/src/test/java/org/apache/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java index 096e4d3de43..886260a3b2c 100644 --- a/processing/src/test/java/org/apache/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java +++ b/processing/src/test/java/org/apache/druid/segment/incremental/OnheapIncrementalIndexBenchmark.java @@ -171,12 +171,7 @@ public class OnheapIncrementalIndexBenchmark extends AbstractBenchmark @Override protected AddToFactsResult addToFacts( - AggregatorFactory[] metrics, - boolean deserializeComplexMetrics, - boolean reportParseExceptions, InputRow row, - AtomicInteger numEntries, - AtomicLong sizeInBytes, IncrementalIndexRow key, ThreadLocal rowContainer, Supplier rowSupplier, @@ -187,7 +182,9 @@ public class OnheapIncrementalIndexBenchmark extends AbstractBenchmark final Integer priorIdex = getFacts().getPriorIndex(key); Aggregator[] aggs; - + final AggregatorFactory[] metrics = getMetrics(); + final AtomicInteger numEntries = getNumEntries(); + final AtomicLong sizeInBytes = getBytesInMemory(); if (null != priorIdex) { aggs = indexedMap.get(priorIdex); } else { @@ -196,7 +193,7 @@ public class OnheapIncrementalIndexBenchmark extends AbstractBenchmark for (int i = 0; i < metrics.length; i++) { final AggregatorFactory agg = metrics[i]; aggs[i] = agg.factorize( - makeColumnSelectorFactory(agg, rowSupplier, deserializeComplexMetrics) + makeColumnSelectorFactory(agg, rowSupplier, getDeserializeComplexMetrics()) ); } Integer rowIndex; @@ -233,7 +230,7 @@ public class OnheapIncrementalIndexBenchmark extends AbstractBenchmark } catch (ParseException e) { // "aggregate" can throw ParseExceptions if a selector expects something but gets something else. - if (reportParseExceptions) { + if (getReportParseExceptions()) { throw e; } } diff --git a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java index 442b893f100..d2d72bacfa6 100644 --- a/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java +++ b/server/src/main/java/org/apache/druid/segment/realtime/plumber/Sink.java @@ -279,7 +279,7 @@ public class Sink implements Iterable return 0; } - return currHydrant.getIndex().getBytesInMemory(); + return currHydrant.getIndex().getBytesInMemory().get(); } } From 61272d6daa345aae13121d4509dc05c70af74f19 Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Mon, 18 Feb 2019 02:06:47 -0800 Subject: [PATCH 19/25] Update handlebars dep to patch vulnerability (#7083) --- web-console/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web-console/package-lock.json b/web-console/package-lock.json index 6bf0577c2f1..ad3f8f7b408 100644 --- a/web-console/package-lock.json +++ b/web-console/package-lock.json @@ -4018,9 +4018,9 @@ "dev": true }, "handlebars": { - "version": "4.0.12", - "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.0.12.tgz", - "integrity": "sha512-RhmTekP+FZL+XNhwS1Wf+bTTZpdLougwt5pcgA1tuz6Jcx0fpH/7z0qd71RKnZHBCxIRBHfBOnio4gViPemNzA==", + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.1.0.tgz", + "integrity": "sha512-l2jRuU1NAWK6AW5qqcTATWQJvNPEwkM7NEKSiv/gqOsoSQbVoWyqVEY5GS+XPQ88zLNmqASRpzfdm8d79hJS+w==", "dev": true, "requires": { "async": "^2.5.0", From 80a2ef7be46c4fc807ac2a74507b8ba8b6a44049 Mon Sep 17 00:00:00 2001 From: Surekha Date: Mon, 18 Feb 2019 11:50:08 -0800 Subject: [PATCH 20/25] Support kafka transactional topics (#5404) (#6496) * Support kafka transactional topics * update kafka to version 2.0.0 * Remove the skipOffsetGaps option since it's not used anymore * Adjust kafka consumer to use transactional semantics * Update tests * Remove unused import from test * Fix compilation * Invoke transaction api to fix a unit test * temporary modification of travis.yml for debugging * another attempt to get travis tasklogs * update kafka to 2.0.1 at all places * Remove druid-kafka-eight dependency from integration-tests, remove the kafka firehose test and deprecate kafka-eight classes * Add deprecated in docs for kafka-eight and kafka-simple extensions * Remove skipOffsetGaps and code changes for transaction support * Fix indentation * remove skipOffsetGaps from kinesis * Add transaction api to KafkaRecordSupplierTest * Fix indent * Fix test * update kafka version to 2.1.0 --- .../extensions-core/kafka-ingestion.md | 1 - docs/content/development/extensions.md | 4 +- .../KafkaEightSimpleConsumerDruidModule.java | 1 + ...fkaEightSimpleConsumerFirehoseFactory.java | 1 + .../firehose/kafka/KafkaSimpleConsumer.java | 1 + .../firehose/kafka/KafkaEightDruidModule.java | 1 + .../kafka/KafkaEightFirehoseFactory.java | 2 + .../kafka-indexing-service/pom.xml | 2 +- .../druid/indexing/kafka/KafkaIndexTask.java | 1 + .../kafka/KafkaIndexTaskIOConfig.java | 5 +- .../indexing/kafka/KafkaRecordSupplier.java | 3 +- .../kafka/LegacyKafkaIndexTaskRunner.java | 19 +- .../kafka/supervisor/KafkaSupervisor.java | 3 +- .../supervisor/KafkaSupervisorIOConfig.java | 13 +- .../indexing/kafka/KafkaIOConfigTest.java | 6 +- .../indexing/kafka/KafkaIndexTaskTest.java | 678 ++++++++++-------- .../kafka/KafkaRecordSupplierTest.java | 85 ++- .../KafkaSupervisorIOConfigTest.java | 5 +- .../kafka/supervisor/KafkaSupervisorTest.java | 105 ++- .../druid/indexing/kafka/test/TestBroker.java | 11 +- .../kinesis/KinesisIndexTaskIOConfig.java | 1 - .../indexing/kinesis/KinesisIOConfigTest.java | 2 - .../SeekableStreamIndexTask.java | 2 +- .../SeekableStreamIndexTaskIOConfig.java | 8 - .../SeekableStreamIndexTaskRunner.java | 20 +- .../common/OrderedPartitionableRecord.java | 2 +- .../seekablestream/common/RecordSupplier.java | 2 +- .../supervisor/SeekableStreamSupervisor.java | 2 +- integration-tests/pom.xml | 13 +- .../druid/tests/indexer/ITKafkaTest.java | 320 --------- .../resources/indexer/kafka_index_task.json | 68 -- pom.xml | 2 +- 32 files changed, 512 insertions(+), 877 deletions(-) delete mode 100644 integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaTest.java delete mode 100644 integration-tests/src/test/resources/indexer/kafka_index_task.json diff --git a/docs/content/development/extensions-core/kafka-ingestion.md b/docs/content/development/extensions-core/kafka-ingestion.md index b24e8748538..85cebc5ae27 100644 --- a/docs/content/development/extensions-core/kafka-ingestion.md +++ b/docs/content/development/extensions-core/kafka-ingestion.md @@ -201,7 +201,6 @@ For Roaring bitmaps: |`completionTimeout`|ISO8601 Period|The length of time to wait before declaring a publishing task as failed and terminating it. If this is set too low, your tasks may never publish. The publishing clock for a task begins roughly after `taskDuration` elapses.|no (default == PT30M)| |`lateMessageRejectionPeriod`|ISO8601 Period|Configure tasks to reject messages with timestamps earlier than this period before the task was created; for example if this is set to `PT1H` and the supervisor creates a task at *2016-01-01T12:00Z*, messages with timestamps earlier than *2016-01-01T11:00Z* will be dropped. This may help prevent concurrency issues if your data stream has late messages and you have multiple pipelines that need to operate on the same segments (e.g. a realtime and a nightly batch ingestion pipeline).|no (default == none)| |`earlyMessageRejectionPeriod`|ISO8601 Period|Configure tasks to reject messages with timestamps later than this period after the task reached its taskDuration; for example if this is set to `PT1H`, the taskDuration is set to `PT1H` and the supervisor creates a task at *2016-01-01T12:00Z*, messages with timestamps later than *2016-01-01T14:00Z* will be dropped. **Note:** Tasks sometimes run past their task duration, for example, in cases of supervisor failover. Setting earlyMessageRejectionPeriod too low may cause messages to be dropped unexpectedly whenever a task runs past its originally configured task duration.|no (default == none)| -|`skipOffsetGaps`|Boolean|Whether or not to allow gaps of missing offsets in the Kafka stream. This is required for compatibility with implementations such as MapR Streams which does not guarantee consecutive offsets. If this is false, an exception will be thrown if offsets are not consecutive.|no (default == false)| ## Operations diff --git a/docs/content/development/extensions.md b/docs/content/development/extensions.md index a5b8a5b3f57..a6d3a7b3913 100644 --- a/docs/content/development/extensions.md +++ b/docs/content/development/extensions.md @@ -48,7 +48,7 @@ Core extensions are maintained by Druid committers. |druid-datasketches|Support for approximate counts and set operations with [DataSketches](http://datasketches.github.io/).|[link](../development/extensions-core/datasketches-extension.html)| |druid-hdfs-storage|HDFS deep storage.|[link](../development/extensions-core/hdfs.html)| |druid-histogram|Approximate histograms and quantiles aggregator.|[link](../development/extensions-core/approximate-histograms.html)| -|druid-kafka-eight|Kafka ingest firehose (high level consumer) for realtime nodes.|[link](../development/extensions-core/kafka-eight-firehose.html)| +|druid-kafka-eight|Kafka ingest firehose (high level consumer) for realtime nodes(deprecated).|[link](../development/extensions-core/kafka-eight-firehose.html)| |druid-kafka-extraction-namespace|Kafka-based namespaced lookup. Requires namespace lookup extension.|[link](../development/extensions-core/kafka-extraction-namespace.html)| |druid-kafka-indexing-service|Supervised exactly-once Kafka ingestion for the indexing service.|[link](../development/extensions-core/kafka-ingestion.html)| |druid-kinesis-indexing-service|Supervised exactly-once Kinesis ingestion for the indexing service.|[link](../development/extensions-core/kinesis-ingestion.html)| @@ -81,7 +81,7 @@ All of these community extensions can be downloaded using *pull-deps* with the c |druid-cassandra-storage|Apache Cassandra deep storage.|[link](../development/extensions-contrib/cassandra.html)| |druid-cloudfiles-extensions|Rackspace Cloudfiles deep storage and firehose.|[link](../development/extensions-contrib/cloudfiles.html)| |druid-distinctcount|DistinctCount aggregator|[link](../development/extensions-contrib/distinctcount.html)| -|druid-kafka-eight-simpleConsumer|Kafka ingest firehose (low level consumer).|[link](../development/extensions-contrib/kafka-simple.html)| +|druid-kafka-eight-simpleConsumer|Kafka ingest firehose (low level consumer)(deprecated).|[link](../development/extensions-contrib/kafka-simple.html)| |druid-orc-extensions|Support for data in Apache Orc data format.|[link](../development/extensions-contrib/orc.html)| |druid-rabbitmq|RabbitMQ firehose.|[link](../development/extensions-contrib/rabbitmq.html)| |druid-redis-cache|A cache implementation for Druid based on Redis.|[link](../development/extensions-contrib/redis-cache.html)| diff --git a/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaEightSimpleConsumerDruidModule.java b/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaEightSimpleConsumerDruidModule.java index a03f3204959..8fad815177d 100644 --- a/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaEightSimpleConsumerDruidModule.java +++ b/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaEightSimpleConsumerDruidModule.java @@ -28,6 +28,7 @@ import org.apache.druid.initialization.DruidModule; import java.util.List; +@Deprecated public class KafkaEightSimpleConsumerDruidModule implements DruidModule { @Override diff --git a/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaEightSimpleConsumerFirehoseFactory.java b/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaEightSimpleConsumerFirehoseFactory.java index 65c501a8e54..ca34e55d6a7 100644 --- a/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaEightSimpleConsumerFirehoseFactory.java +++ b/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaEightSimpleConsumerFirehoseFactory.java @@ -46,6 +46,7 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +@Deprecated public class KafkaEightSimpleConsumerFirehoseFactory implements FirehoseFactoryV2 { diff --git a/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaSimpleConsumer.java b/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaSimpleConsumer.java index 038fb2db90f..25fc8de15da 100644 --- a/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaSimpleConsumer.java +++ b/extensions-contrib/kafka-eight-simpleConsumer/src/main/java/org/apache/druid/firehose/kafka/KafkaSimpleConsumer.java @@ -56,6 +56,7 @@ import java.util.concurrent.TimeUnit; * This class is not thread safe, the caller must ensure all the methods be * called from single thread */ +@Deprecated public class KafkaSimpleConsumer { diff --git a/extensions-core/kafka-eight/src/main/java/org/apache/druid/firehose/kafka/KafkaEightDruidModule.java b/extensions-core/kafka-eight/src/main/java/org/apache/druid/firehose/kafka/KafkaEightDruidModule.java index 4fe379a4883..f8a5ad7d373 100644 --- a/extensions-core/kafka-eight/src/main/java/org/apache/druid/firehose/kafka/KafkaEightDruidModule.java +++ b/extensions-core/kafka-eight/src/main/java/org/apache/druid/firehose/kafka/KafkaEightDruidModule.java @@ -30,6 +30,7 @@ import java.util.List; /** */ +@Deprecated public class KafkaEightDruidModule implements DruidModule { @Override diff --git a/extensions-core/kafka-eight/src/main/java/org/apache/druid/firehose/kafka/KafkaEightFirehoseFactory.java b/extensions-core/kafka-eight/src/main/java/org/apache/druid/firehose/kafka/KafkaEightFirehoseFactory.java index 0d0ed5863dd..bb38c050a14 100644 --- a/extensions-core/kafka-eight/src/main/java/org/apache/druid/firehose/kafka/KafkaEightFirehoseFactory.java +++ b/extensions-core/kafka-eight/src/main/java/org/apache/druid/firehose/kafka/KafkaEightFirehoseFactory.java @@ -47,7 +47,9 @@ import java.util.Properties; import java.util.Set; /** + * This class is deprecated and kafka-eight module should be removed completely */ +@Deprecated public class KafkaEightFirehoseFactory implements FirehoseFactory> { private static final Logger log = new Logger(KafkaEightFirehoseFactory.class); diff --git a/extensions-core/kafka-indexing-service/pom.xml b/extensions-core/kafka-indexing-service/pom.xml index c48091f1669..b7c78169323 100644 --- a/extensions-core/kafka-indexing-service/pom.xml +++ b/extensions-core/kafka-indexing-service/pom.xml @@ -34,7 +34,7 @@ - 0.10.2.2 + 2.1.0 diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTask.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTask.java index 950441c3c59..4fcbbe10f0c 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTask.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTask.java @@ -108,6 +108,7 @@ public class KafkaIndexTask extends SeekableStreamIndexTask props.setProperty("auto.offset.reset", "none"); props.setProperty("key.deserializer", ByteArrayDeserializer.class.getName()); props.setProperty("value.deserializer", ByteArrayDeserializer.class.getName()); + props.setProperty("isolation.level", "read_committed"); return new KafkaConsumer<>(props); } diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTaskIOConfig.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTaskIOConfig.java index af84bfcd4ab..5f3681623fa 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTaskIOConfig.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/KafkaIndexTaskIOConfig.java @@ -45,8 +45,7 @@ public class KafkaIndexTaskIOConfig extends SeekableStreamIndexTaskIOConfig= (endOffsets.get(record.partition())) && assignment.remove(record.partition())) { log.info("Finished reading topic[%s], partition[%,d].", record.topic(), record.partition()); KafkaIndexTask.assignPartitions(consumer, topic, assignment); diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java index 0e9dcacc815..bb388cd55e2 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisor.java @@ -212,8 +212,7 @@ public class KafkaSupervisor extends SeekableStreamSupervisor kafkaIoConfig.getPollTimeout(), true, minimumMessageTime, - maximumMessageTime, - kafkaIoConfig.isSkipOffsetGaps() + maximumMessageTime ); } diff --git a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorIOConfig.java b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorIOConfig.java index ddd0f06d205..629daa780ed 100644 --- a/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorIOConfig.java +++ b/extensions-core/kafka-indexing-service/src/main/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorIOConfig.java @@ -38,7 +38,7 @@ public class KafkaSupervisorIOConfig extends SeekableStreamSupervisorIOConfig private final Map consumerProperties; private final long pollTimeout; - private final boolean skipOffsetGaps; + @JsonCreator public KafkaSupervisorIOConfig( @@ -53,8 +53,7 @@ public class KafkaSupervisorIOConfig extends SeekableStreamSupervisorIOConfig @JsonProperty("useEarliestOffset") Boolean useEarliestOffset, @JsonProperty("completionTimeout") Period completionTimeout, @JsonProperty("lateMessageRejectionPeriod") Period lateMessageRejectionPeriod, - @JsonProperty("earlyMessageRejectionPeriod") Period earlyMessageRejectionPeriod, - @JsonProperty("skipOffsetGaps") Boolean skipOffsetGaps + @JsonProperty("earlyMessageRejectionPeriod") Period earlyMessageRejectionPeriod ) { super( @@ -76,7 +75,6 @@ public class KafkaSupervisorIOConfig extends SeekableStreamSupervisorIOConfig StringUtils.format("consumerProperties must contain entry for [%s]", BOOTSTRAP_SERVERS_KEY) ); this.pollTimeout = pollTimeout != null ? pollTimeout : DEFAULT_POLL_TIMEOUT_MILLIS; - this.skipOffsetGaps = skipOffsetGaps != null ? skipOffsetGaps : false; } @JsonProperty @@ -103,12 +101,6 @@ public class KafkaSupervisorIOConfig extends SeekableStreamSupervisorIOConfig return isUseEarliestSequenceNumber(); } - @JsonProperty - public boolean isSkipOffsetGaps() - { - return skipOffsetGaps; - } - @Override public String toString() { @@ -125,7 +117,6 @@ public class KafkaSupervisorIOConfig extends SeekableStreamSupervisorIOConfig ", completionTimeout=" + getCompletionTimeout() + ", earlyMessageRejectionPeriod=" + getEarlyMessageRejectionPeriod() + ", lateMessageRejectionPeriod=" + getLateMessageRejectionPeriod() + - ", skipOffsetGaps=" + skipOffsetGaps + '}'; } diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIOConfigTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIOConfigTest.java index 7ce8df02444..556cba1a39b 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIOConfigTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIOConfigTest.java @@ -77,7 +77,6 @@ public class KafkaIOConfigTest Assert.assertTrue(config.isUseTransaction()); Assert.assertFalse("minimumMessageTime", config.getMinimumMessageTime().isPresent()); Assert.assertFalse("maximumMessageTime", config.getMaximumMessageTime().isPresent()); - Assert.assertFalse("skipOffsetGaps", config.isSkipOffsetGaps()); Assert.assertEquals(Collections.EMPTY_SET, config.getExclusiveStartSequenceNumberPartitions()); } @@ -93,8 +92,7 @@ public class KafkaIOConfigTest + " \"consumerProperties\": {\"bootstrap.servers\":\"localhost:9092\"},\n" + " \"useTransaction\": false,\n" + " \"minimumMessageTime\": \"2016-05-31T12:00Z\",\n" - + " \"maximumMessageTime\": \"2016-05-31T14:00Z\",\n" - + " \"skipOffsetGaps\": true\n" + + " \"maximumMessageTime\": \"2016-05-31T14:00Z\"\n" + "}"; KafkaIndexTaskIOConfig config = (KafkaIndexTaskIOConfig) mapper.readValue( @@ -115,9 +113,7 @@ public class KafkaIOConfigTest Assert.assertFalse(config.isUseTransaction()); Assert.assertEquals(DateTimes.of("2016-05-31T12:00Z"), config.getMinimumMessageTime().get()); Assert.assertEquals(DateTimes.of("2016-05-31T14:00Z"), config.getMaximumMessageTime().get()); - Assert.assertTrue("skipOffsetGaps", config.isSkipOffsetGaps()); Assert.assertEquals(Collections.EMPTY_SET, config.getExclusiveStartSequenceNumberPartitions()); - } @Test diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java index b7b389668a4..3d6308cb0a9 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaIndexTaskTest.java @@ -99,14 +99,16 @@ import org.apache.druid.metadata.DerbyMetadataStorageActionHandlerFactory; import org.apache.druid.metadata.EntryExistsException; import org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator; import org.apache.druid.metadata.TestDerbyConnector; +import org.apache.druid.query.DefaultGenericQueryMetricsFactory; import org.apache.druid.query.DefaultQueryRunnerFactoryConglomerate; import org.apache.druid.query.Druids; import org.apache.druid.query.IntervalChunkingQueryRunnerDecorator; import org.apache.druid.query.Query; -import org.apache.druid.query.QueryPlus; import org.apache.druid.query.QueryRunner; +import org.apache.druid.query.QueryRunnerFactory; import org.apache.druid.query.QueryRunnerFactoryConglomerate; import org.apache.druid.query.QueryToolChest; +import org.apache.druid.query.QueryWatcher; import org.apache.druid.query.Result; import org.apache.druid.query.SegmentDescriptor; import org.apache.druid.query.aggregation.AggregatorFactory; @@ -114,6 +116,13 @@ import org.apache.druid.query.aggregation.CountAggregatorFactory; import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.query.filter.SelectorDimFilter; +import org.apache.druid.query.scan.ScanQuery; +import org.apache.druid.query.scan.ScanQueryConfig; +import org.apache.druid.query.scan.ScanQueryEngine; +import org.apache.druid.query.scan.ScanQueryQueryToolChest; +import org.apache.druid.query.scan.ScanQueryRunnerFactory; +import org.apache.druid.query.scan.ScanResultValue; +import org.apache.druid.query.spec.QuerySegmentSpec; import org.apache.druid.query.timeseries.TimeseriesQuery; import org.apache.druid.query.timeseries.TimeseriesQueryEngine; import org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest; @@ -172,11 +181,14 @@ import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.TreeMap; +import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import static org.apache.druid.query.QueryPlus.wrap; + @RunWith(Parameterized.class) public class KafkaIndexTaskTest { @@ -372,11 +384,7 @@ public class KafkaIndexTaskTest public void testRunAfterDataInserted() throws Exception { // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); final KafkaIndexTask task = createTask( null, @@ -389,8 +397,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -432,8 +439,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -445,11 +451,7 @@ public class KafkaIndexTaskTest } // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); // Wait for task to exit Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode()); @@ -484,11 +486,7 @@ public class KafkaIndexTaskTest maxRowsPerSegment = 2; // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); Map consumerProps = kafkaServer.consumerProperties(); consumerProps.put("max.poll.records", "1"); @@ -541,8 +539,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); final ListenableFuture future = runTask(task); @@ -614,141 +611,148 @@ public class KafkaIndexTaskTest int numToAdd = records.size() - 2; try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); for (int i = 0; i < numToAdd; i++) { kafkaProducer.send(records.get(i)).get(); } + kafkaProducer.commitTransaction(); + } - Map consumerProps = kafkaServer.consumerProperties(); - consumerProps.put("max.poll.records", "1"); + Map consumerProps = kafkaServer.consumerProperties(); + consumerProps.put("max.poll.records", "1"); - final SeekableStreamPartitions startPartitions = new SeekableStreamPartitions<>( - topic, - ImmutableMap.of( - 0, - 0L, - 1, - 0L - ) - ); - final SeekableStreamPartitions checkpoint1 = new SeekableStreamPartitions<>( - topic, - ImmutableMap.of( - 0, - 3L, - 1, - 0L - ) - ); - final SeekableStreamPartitions checkpoint2 = new SeekableStreamPartitions<>( - topic, - ImmutableMap.of( - 0, - 10L, - 1, - 0L - ) - ); + final SeekableStreamPartitions startPartitions = new SeekableStreamPartitions<>( + topic, + ImmutableMap.of( + 0, + 0L, + 1, + 0L + ) + ); + final SeekableStreamPartitions checkpoint1 = new SeekableStreamPartitions<>( + topic, + ImmutableMap.of( + 0, + 3L, + 1, + 0L + ) + ); + final SeekableStreamPartitions checkpoint2 = new SeekableStreamPartitions<>( + topic, + ImmutableMap.of( + 0, + 10L, + 1, + 0L + ) + ); - final SeekableStreamPartitions endPartitions = new SeekableStreamPartitions<>( - topic, - ImmutableMap.of( - 0, - 10L, - 1, - 2L - ) - ); - final KafkaIndexTask task = createTask( - null, - new KafkaIndexTaskIOConfig( - 0, - baseSequenceName, - startPartitions, - endPartitions, - consumerProps, - KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, - true, - null, - null, - false - ) - ); - final ListenableFuture future = runTask(task); - while (task.getRunner().getStatus() != Status.PAUSED) { - Thread.sleep(10); - } - final Map currentOffsets = ImmutableMap.copyOf(task.getRunner().getCurrentOffsets()); + final SeekableStreamPartitions endPartitions = new SeekableStreamPartitions<>( + topic, + ImmutableMap.of( + 0, + 10L, + 1, + 2L + ) + ); + final KafkaIndexTask task = createTask( + null, + new KafkaIndexTaskIOConfig( + 0, + baseSequenceName, + startPartitions, + endPartitions, + consumerProps, + KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, + true, + null, + null + ) + ); + final ListenableFuture future = runTask(task); + while (task.getRunner().getStatus() != Status.PAUSED) { + Thread.sleep(10); + } + final Map currentOffsets = ImmutableMap.copyOf(task.getRunner().getCurrentOffsets()); - Assert.assertTrue(checkpoint1.getPartitionSequenceNumberMap().equals(currentOffsets)); - task.getRunner().setEndOffsets(currentOffsets, false); + Assert.assertTrue(checkpoint1.getPartitionSequenceNumberMap().equals(currentOffsets)); + task.getRunner().setEndOffsets(currentOffsets, false); - while (task.getRunner().getStatus() != Status.PAUSED) { - Thread.sleep(10); - } + while (task.getRunner().getStatus() != Status.PAUSED) { + Thread.sleep(10); + } - // add remaining records + // add remaining records + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); for (int i = numToAdd; i < records.size(); i++) { kafkaProducer.send(records.get(i)).get(); } - final Map nextOffsets = ImmutableMap.copyOf(task.getRunner().getCurrentOffsets()); - - Assert.assertTrue(checkpoint2.getPartitionSequenceNumberMap().equals(nextOffsets)); - task.getRunner().setEndOffsets(nextOffsets, false); - - Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode()); - - Assert.assertEquals(2, checkpointRequestsHash.size()); - Assert.assertTrue( - checkpointRequestsHash.contains( - Objects.hash( - DATA_SCHEMA.getDataSource(), - 0, - new KafkaDataSourceMetadata(startPartitions), - new KafkaDataSourceMetadata(new SeekableStreamPartitions<>(topic, currentOffsets)) - ) - ) - ); - Assert.assertTrue( - checkpointRequestsHash.contains( - Objects.hash( - DATA_SCHEMA.getDataSource(), - 0, - new KafkaDataSourceMetadata(new SeekableStreamPartitions<>(topic, currentOffsets)), - new KafkaDataSourceMetadata(new SeekableStreamPartitions<>(topic, nextOffsets)) - ) - ) - ); - - // Check metrics - Assert.assertEquals(8, task.getRunner().getRowIngestionMeters().getProcessed()); - Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getUnparseable()); - Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getThrownAway()); - - // Check published metadata - SegmentDescriptor desc1 = SD(task, "2008/P1D", 0); - SegmentDescriptor desc2 = SD(task, "2009/P1D", 0); - SegmentDescriptor desc3 = SD(task, "2010/P1D", 0); - SegmentDescriptor desc4 = SD(task, "2011/P1D", 0); - SegmentDescriptor desc5 = SD(task, "2011/P1D", 1); - SegmentDescriptor desc6 = SD(task, "2012/P1D", 0); - SegmentDescriptor desc7 = SD(task, "2013/P1D", 0); - Assert.assertEquals(ImmutableSet.of(desc1, desc2, desc3, desc4, desc5, desc6, desc7), publishedDescriptors()); - Assert.assertEquals( - new KafkaDataSourceMetadata(new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 10L, 1, 2L))), - metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource()) - ); - - // Check segments in deep storage - Assert.assertEquals(ImmutableList.of("a"), readSegmentColumn("dim1", desc1)); - Assert.assertEquals(ImmutableList.of("b"), readSegmentColumn("dim1", desc2)); - Assert.assertEquals(ImmutableList.of("c"), readSegmentColumn("dim1", desc3)); - Assert.assertTrue((ImmutableList.of("d", "e").equals(readSegmentColumn("dim1", desc4)) - && ImmutableList.of("h").equals(readSegmentColumn("dim1", desc5))) || - (ImmutableList.of("d", "h").equals(readSegmentColumn("dim1", desc4)) - && ImmutableList.of("e").equals(readSegmentColumn("dim1", desc5)))); - Assert.assertEquals(ImmutableList.of("g"), readSegmentColumn("dim1", desc6)); - Assert.assertEquals(ImmutableList.of("f"), readSegmentColumn("dim1", desc7)); + kafkaProducer.commitTransaction(); } + final Map nextOffsets = ImmutableMap.copyOf(task.getRunner().getCurrentOffsets()); + + Assert.assertTrue(checkpoint2.getPartitionSequenceNumberMap().equals(nextOffsets)); + task.getRunner().setEndOffsets(nextOffsets, false); + + Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode()); + + Assert.assertEquals(2, checkpointRequestsHash.size()); + Assert.assertTrue( + checkpointRequestsHash.contains( + Objects.hash( + DATA_SCHEMA.getDataSource(), + 0, + new KafkaDataSourceMetadata(startPartitions), + new KafkaDataSourceMetadata(new SeekableStreamPartitions<>(topic, currentOffsets)) + ) + ) + ); + Assert.assertTrue( + checkpointRequestsHash.contains( + Objects.hash( + DATA_SCHEMA.getDataSource(), + 0, + new KafkaDataSourceMetadata(new SeekableStreamPartitions(topic, currentOffsets)), + new KafkaDataSourceMetadata(new SeekableStreamPartitions(topic, nextOffsets)) + ) + ) + ); + + // Check metrics + Assert.assertEquals(8, task.getRunner().getRowIngestionMeters().getProcessed()); + Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getUnparseable()); + Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getThrownAway()); + + // Check published metadata + SegmentDescriptor desc1 = SD(task, "2008/P1D", 0); + SegmentDescriptor desc2 = SD(task, "2009/P1D", 0); + SegmentDescriptor desc3 = SD(task, "2010/P1D", 0); + SegmentDescriptor desc4 = SD(task, "2011/P1D", 0); + SegmentDescriptor desc5 = SD(task, "2011/P1D", 1); + SegmentDescriptor desc6 = SD(task, "2012/P1D", 0); + SegmentDescriptor desc7 = SD(task, "2013/P1D", 0); + Assert.assertEquals(ImmutableSet.of(desc1, desc2, desc3, desc4, desc5, desc6, desc7), publishedDescriptors()); + Assert.assertEquals( + new KafkaDataSourceMetadata(new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 10L, 1, 2L))), + metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource()) + ); + + // Check segments in deep storage + Assert.assertEquals(ImmutableList.of("a"), readSegmentColumn("dim1", desc1)); + Assert.assertEquals(ImmutableList.of("b"), readSegmentColumn("dim1", desc2)); + Assert.assertEquals(ImmutableList.of("c"), readSegmentColumn("dim1", desc3)); + Assert.assertTrue((ImmutableList.of("d", "e").equals(readSegmentColumn("dim1", desc4)) + && ImmutableList.of("h").equals(readSegmentColumn("dim1", desc5))) || + (ImmutableList.of("d", "h").equals(readSegmentColumn("dim1", desc4)) + && ImmutableList.of("e").equals(readSegmentColumn("dim1", desc5)))); + Assert.assertEquals(ImmutableList.of("g"), readSegmentColumn("dim1", desc6)); + Assert.assertEquals(ImmutableList.of("f"), readSegmentColumn("dim1", desc7)); } @Test(timeout = 60_000L) @@ -763,11 +767,7 @@ public class KafkaIndexTaskTest intermediateHandoffPeriod = new Period().withSeconds(0); // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records.subList(0, 2)) { - kafkaProducer.send(record).get(); - } - } + insertData(); Map consumerProps = kafkaServer.consumerProperties(); consumerProps.put("max.poll.records", "1"); @@ -810,8 +810,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); final ListenableFuture future = runTask(task); @@ -882,9 +881,12 @@ public class KafkaIndexTaskTest // Insert data try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); for (ProducerRecord record : records) { kafkaProducer.send(record).get(); } + kafkaProducer.commitTransaction(); } Map consumerProps = kafkaServer.consumerProperties(); consumerProps.put("max.poll.records", "1"); @@ -913,8 +915,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); final ListenableFuture future = runTask(task); @@ -950,8 +951,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, DateTimes.of("2010"), - null, - false + null ) ); @@ -963,11 +963,7 @@ public class KafkaIndexTaskTest } // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); // Wait for task to exit Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode()); @@ -1005,8 +1001,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - DateTimes.of("2010"), - false + DateTimes.of("2010") ) ); @@ -1018,11 +1013,7 @@ public class KafkaIndexTaskTest } // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); // Wait for task to exit Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode()); @@ -1070,8 +1061,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1083,11 +1073,7 @@ public class KafkaIndexTaskTest } // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); // Wait for task to exit Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode()); @@ -1114,11 +1100,7 @@ public class KafkaIndexTaskTest public void testRunOnNothing() throws Exception { // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); final KafkaIndexTask task = createTask( null, @@ -1131,8 +1113,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1156,11 +1137,7 @@ public class KafkaIndexTaskTest handoffConditionTimeout = 5_000; // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); final KafkaIndexTask task = createTask( null, @@ -1173,8 +1150,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1209,11 +1185,7 @@ public class KafkaIndexTaskTest handoffConditionTimeout = 100; // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); final KafkaIndexTask task = createTask( null, @@ -1226,8 +1198,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1265,11 +1236,7 @@ public class KafkaIndexTaskTest maxSavedParseExceptions = 2; // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); final KafkaIndexTask task = createTask( null, @@ -1282,8 +1249,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1310,11 +1276,7 @@ public class KafkaIndexTaskTest maxSavedParseExceptions = 6; // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); final KafkaIndexTask task = createTask( null, @@ -1327,8 +1289,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1393,11 +1354,7 @@ public class KafkaIndexTaskTest maxSavedParseExceptions = 2; // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); final KafkaIndexTask task = createTask( null, @@ -1410,8 +1367,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1471,8 +1427,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); final KafkaIndexTask task2 = createTask( @@ -1486,8 +1441,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1495,11 +1449,7 @@ public class KafkaIndexTaskTest final ListenableFuture future2 = runTask(task2); // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); // Wait for tasks to exit Assert.assertEquals(TaskState.SUCCESS, future1.get().getStatusCode()); @@ -1541,8 +1491,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); final KafkaIndexTask task2 = createTask( @@ -1556,17 +1505,12 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); // Run first task final ListenableFuture future1 = runTask(task1); @@ -1612,8 +1556,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, false, null, - null, - false + null ) ); final KafkaIndexTask task2 = createTask( @@ -1627,17 +1570,12 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, false, null, - null, - false + null ) ); // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); // Run first task final ListenableFuture future1 = runTask(task1); @@ -1688,20 +1626,14 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); final ListenableFuture future = runTask(task); // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - kafkaProducer.flush(); - } + insertData(); // Wait for tasks to exit Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode()); @@ -1754,8 +1686,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); final KafkaIndexTask task2 = createTask( @@ -1769,8 +1700,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1778,11 +1708,7 @@ public class KafkaIndexTaskTest final ListenableFuture future2 = runTask(task2); // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); // Wait for tasks to exit Assert.assertEquals(TaskState.SUCCESS, future1.get().getStatusCode()); @@ -1821,13 +1747,12 @@ public class KafkaIndexTaskTest 0, "sequence0", new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 2L)), - new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 5L)), + new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 6L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1835,9 +1760,12 @@ public class KafkaIndexTaskTest // Insert some data, but not enough for the task to finish try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); for (ProducerRecord record : Iterables.limit(records, 4)) { kafkaProducer.send(record).get(); } + kafkaProducer.commitTransaction(); } while (countEvents(task1) != 2) { @@ -1859,13 +1787,12 @@ public class KafkaIndexTaskTest 0, "sequence0", new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 2L)), - new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 5L)), + new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 6L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1873,12 +1800,16 @@ public class KafkaIndexTaskTest // Insert remaining data try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); for (ProducerRecord record : Iterables.skip(records, 4)) { kafkaProducer.send(record).get(); } + kafkaProducer.commitTransaction(); } // Wait for task to exit + Assert.assertEquals(TaskState.SUCCESS, future2.get().getStatusCode()); // Check metrics @@ -1894,7 +1825,7 @@ public class KafkaIndexTaskTest SegmentDescriptor desc2 = SD(task1, "2011/P1D", 0); Assert.assertEquals(ImmutableSet.of(desc1, desc2), publishedDescriptors()); Assert.assertEquals( - new KafkaDataSourceMetadata(new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 5L))), + new KafkaDataSourceMetadata(new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 6L))), metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource()) ); @@ -1912,13 +1843,12 @@ public class KafkaIndexTaskTest 0, "sequence0", new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 2L)), - new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 5L)), + new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 6L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -1926,10 +1856,13 @@ public class KafkaIndexTaskTest // Insert some data, but not enough for the task to finish try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); for (ProducerRecord record : Iterables.limit(records, 4)) { kafkaProducer.send(record).get(); } kafkaProducer.flush(); + kafkaProducer.commitTransaction(); } while (countEvents(task) != 2) { @@ -1946,12 +1879,14 @@ public class KafkaIndexTaskTest } ); Assert.assertEquals(Status.PAUSED, task.getRunner().getStatus()); - // Insert remaining data try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); for (ProducerRecord record : Iterables.skip(records, 4)) { kafkaProducer.send(record).get(); } + kafkaProducer.commitTransaction(); } try { @@ -1979,7 +1914,7 @@ public class KafkaIndexTaskTest SegmentDescriptor desc2 = SD(task, "2011/P1D", 0); Assert.assertEquals(ImmutableSet.of(desc1, desc2), publishedDescriptors()); Assert.assertEquals( - new KafkaDataSourceMetadata(new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 5L))), + new KafkaDataSourceMetadata(new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 6L))), metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource()) ); @@ -2002,8 +1937,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -2025,11 +1959,7 @@ public class KafkaIndexTaskTest { resetOffsetAutomatically = true; // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); final KafkaIndexTask task = createTask( null, @@ -2042,8 +1972,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -2070,11 +1999,7 @@ public class KafkaIndexTaskTest return; } // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); final TreeMap> sequences = new TreeMap<>(); // Here the sequence number is 1 meaning that one incremental handoff was done by the failed task @@ -2097,8 +2022,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ), context ); @@ -2131,11 +2055,7 @@ public class KafkaIndexTaskTest public void testRunWithDuplicateRequest() throws Exception { // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); final KafkaIndexTask task = createTask( null, @@ -2148,8 +2068,7 @@ public class KafkaIndexTaskTest KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, - null, - false + null ) ); @@ -2170,6 +2089,134 @@ public class KafkaIndexTaskTest Assert.assertEquals(Status.READING, task.getRunner().getStatus()); } + @Test(timeout = 60_000L) + public void testRunTransactionModeRollback() throws Exception + { + final KafkaIndexTask task = createTask( + null, + new KafkaIndexTaskIOConfig( + 0, + "sequence0", + new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 0L)), + new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 13L)), + kafkaServer.consumerProperties(), + KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, + true, + null, + null + ) + ); + + final ListenableFuture future = runTask(task); + + // Insert 2 records initially + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); + for (ProducerRecord record : Iterables.limit(records, 2)) { + kafkaProducer.send(record).get(); + } + kafkaProducer.commitTransaction(); + } + + while (countEvents(task) != 2) { + Thread.sleep(25); + } + + Assert.assertEquals(2, countEvents(task)); + Assert.assertEquals(Status.READING, task.getRunner().getStatus()); + + //verify the 2 indexed records + final QuerySegmentSpec firstInterval = objectMapper.readValue( + "\"2008/2010\"", QuerySegmentSpec.class + ); + Iterable scanResultValues = scanData(task, firstInterval); + Assert.assertEquals(2, Iterables.size(scanResultValues)); + + // Insert 3 more records and rollback + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); + for (ProducerRecord record : Iterables.limit(Iterables.skip(records, 2), 3)) { + kafkaProducer.send(record).get(); + } + kafkaProducer.flush(); + kafkaProducer.abortTransaction(); + } + + Assert.assertEquals(2, countEvents(task)); + Assert.assertEquals(Status.READING, task.getRunner().getStatus()); + + final QuerySegmentSpec rollbackedInterval = objectMapper.readValue( + "\"2010/2012\"", QuerySegmentSpec.class + ); + scanResultValues = scanData(task, rollbackedInterval); + //verify that there are no records indexed in the rollbacked time period + Assert.assertEquals(0, Iterables.size(scanResultValues)); + + // Insert remaining data + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); + for (ProducerRecord record : Iterables.skip(records, 5)) { + kafkaProducer.send(record).get(); + } + kafkaProducer.commitTransaction(); + } + + final QuerySegmentSpec endInterval = objectMapper.readValue( + "\"2008/2049\"", QuerySegmentSpec.class + ); + Iterable scanResultValues1 = scanData(task, endInterval); + Assert.assertEquals(2, Iterables.size(scanResultValues1)); + + Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode()); + Assert.assertEquals(task.getRunner().getEndOffsets(), task.getRunner().getCurrentOffsets()); + + // Check metrics + Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getProcessed()); + Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getUnparseable()); + Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getThrownAway()); + + // Check published metadata + SegmentDescriptor desc1 = SD(task, "2008/P1D", 0); + SegmentDescriptor desc2 = SD(task, "2009/P1D", 0); + SegmentDescriptor desc3 = SD(task, "2013/P1D", 0); + SegmentDescriptor desc4 = SD(task, "2049/P1D", 0); + Assert.assertEquals(ImmutableSet.of(desc1, desc2, desc3, desc4), publishedDescriptors()); + Assert.assertEquals( + new KafkaDataSourceMetadata(new SeekableStreamPartitions<>(topic, ImmutableMap.of(0, 13L))), + metadataStorageCoordinator.getDataSourceMetadata(DATA_SCHEMA.getDataSource()) + ); + + // Check segments in deep storage + Assert.assertEquals(ImmutableList.of("a"), readSegmentColumn("dim1", desc1)); + Assert.assertEquals(ImmutableList.of("b"), readSegmentColumn("dim1", desc2)); + Assert.assertEquals(ImmutableList.of("f"), readSegmentColumn("dim1", desc3)); + Assert.assertEquals(ImmutableList.of("f"), readSegmentColumn("dim1", desc4)); + } + + private List scanData(final Task task, QuerySegmentSpec spec) + { + ScanQuery query = new Druids.ScanQueryBuilder().dataSource( + DATA_SCHEMA.getDataSource()).intervals(spec).build(); + List results = + task.getQueryRunner(query).run(wrap(query), new HashMap<>()).toList(); + return results; + } + + private void insertData() throws ExecutionException, InterruptedException + { + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); + for (ProducerRecord record : records) { + kafkaProducer.send(record).get(); + } + kafkaProducer.commitTransaction(); + } + } + private ListenableFuture runTask(final Task task) { try { @@ -2338,7 +2385,7 @@ public class KafkaIndexTaskTest ); } - private QueryRunnerFactoryConglomerate makeTimeseriesOnlyConglomerate() + private QueryRunnerFactoryConglomerate makeTimeseriesAndScanConglomerate() { IntervalChunkingQueryRunnerDecorator queryRunnerDecorator = new IntervalChunkingQueryRunnerDecorator( null, @@ -2353,16 +2400,33 @@ public class KafkaIndexTaskTest } }; return new DefaultQueryRunnerFactoryConglomerate( - ImmutableMap.of( - TimeseriesQuery.class, - new TimeseriesQueryRunnerFactory( - new TimeseriesQueryQueryToolChest(queryRunnerDecorator), - new TimeseriesQueryEngine(), - (query, future) -> { - // do nothing - } + ImmutableMap., QueryRunnerFactory>builder() + .put( + TimeseriesQuery.class, + new TimeseriesQueryRunnerFactory( + new TimeseriesQueryQueryToolChest(queryRunnerDecorator), + new TimeseriesQueryEngine(), + new QueryWatcher() + { + @Override + public void registerQuery(Query query, ListenableFuture future) + { + // do nothing + } + } + ) ) - ) + .put( + ScanQuery.class, + new ScanQueryRunnerFactory( + new ScanQueryQueryToolChest( + new ScanQueryConfig(), + new DefaultGenericQueryMetricsFactory(TestHelper.makeJsonMapper()) + ), + new ScanQueryEngine() + ) + ) + .build() ); } @@ -2492,7 +2556,7 @@ public class KafkaIndexTaskTest new TestDataSegmentAnnouncer(), EasyMock.createNiceMock(DataSegmentServerAnnouncer.class), handoffNotifierFactory, - this::makeTimeseriesOnlyConglomerate, + this::makeTimeseriesAndScanConglomerate, Execs.directExecutor(), // queryExecutorService EasyMock.createMock(MonitorScheduler.class), new SegmentLoaderFactory( @@ -2603,7 +2667,7 @@ public class KafkaIndexTaskTest .build(); List> results = - task.getQueryRunner(query).run(QueryPlus.wrap(query), ImmutableMap.of()).toList(); + task.getQueryRunner(query).run(wrap(query), ImmutableMap.of()).toList(); return results.isEmpty() ? 0L : DimensionHandlerUtils.nullToZero(results.get(0).getValue().getLongMetric("rows")); } diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaRecordSupplierTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaRecordSupplierTest.java index 2f445aa9943..f944bf04610 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaRecordSupplierTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/KafkaRecordSupplierTest.java @@ -166,11 +166,7 @@ public class KafkaRecordSupplierTest { // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); Set> partitions = ImmutableSet.of( StreamPartition.of(topic, 0), @@ -195,11 +191,7 @@ public class KafkaRecordSupplierTest { // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); Set> partitions = ImmutableSet.of( StreamPartition.of(topic, 0), @@ -232,10 +224,13 @@ public class KafkaRecordSupplierTest public void testPollAfterMoreDataAdded() throws InterruptedException, ExecutionException { // Insert data - - KafkaProducer producer = kafkaServer.newProducer(); - for (ProducerRecord record : records.subList(0, 13)) { - producer.send(record).get(); + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); + for (ProducerRecord record : records.subList(0, 13)) { + kafkaProducer.send(record).get(); + } + kafkaProducer.commitTransaction(); } Set> partitions = ImmutableSet.of( @@ -257,8 +252,13 @@ public class KafkaRecordSupplierTest } // Insert data - for (ProducerRecord rec : records.subList(13, 15)) { - producer.send(rec).get(); + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); + for (ProducerRecord record : records.subList(13, 15)) { + kafkaProducer.send(record).get(); + } + kafkaProducer.commitTransaction(); } @@ -270,8 +270,28 @@ public class KafkaRecordSupplierTest List> initialRecords = createOrderedPartitionableRecords(); Assert.assertEquals(records.size(), polledRecords.size()); - Assert.assertTrue(initialRecords.containsAll(polledRecords)); + Assert.assertEquals(partitions, recordSupplier.getAssignment()); + final int initialRecordsPartition0Size = initialRecords.stream() + .filter(r -> r.getPartitionId().equals(0)) + .collect(Collectors.toSet()) + .size(); + final int initialRecordsPartition1Size = initialRecords.stream() + .filter(r -> r.getPartitionId().equals(1)) + .collect(Collectors.toSet()) + .size(); + + final int polledRecordsPartition0Size = polledRecords.stream() + .filter(r -> r.getPartitionId().equals(0)) + .collect(Collectors.toSet()) + .size(); + final int polledRecordsPartition1Size = polledRecords.stream() + .filter(r -> r.getPartitionId().equals(1)) + .collect(Collectors.toSet()) + .size(); + + Assert.assertEquals(initialRecordsPartition0Size, polledRecordsPartition0Size); + Assert.assertEquals(initialRecordsPartition1Size, polledRecordsPartition1Size); recordSupplier.close(); } @@ -280,11 +300,7 @@ public class KafkaRecordSupplierTest public void testSeek() throws InterruptedException, ExecutionException { // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); StreamPartition partition0 = StreamPartition.of(topic, 0); StreamPartition partition1 = StreamPartition.of(topic, 1); @@ -326,11 +342,7 @@ public class KafkaRecordSupplierTest public void testSeekToLatest() throws InterruptedException, ExecutionException { // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); StreamPartition partition0 = StreamPartition.of(topic, 0); StreamPartition partition1 = StreamPartition.of(topic, 1); @@ -388,11 +400,7 @@ public class KafkaRecordSupplierTest public void testPosition() throws ExecutionException, InterruptedException { // Insert data - try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { - for (ProducerRecord record : records) { - kafkaProducer.send(record).get(); - } - } + insertData(); StreamPartition partition0 = StreamPartition.of(topic, 0); StreamPartition partition1 = StreamPartition.of(topic, 1); @@ -420,7 +428,7 @@ public class KafkaRecordSupplierTest Assert.assertEquals(0L, (long) recordSupplier.getPosition(partition0)); recordSupplier.seekToLatest(Collections.singleton(partition0)); - Assert.assertEquals(11L, (long) recordSupplier.getPosition(partition0)); + Assert.assertEquals(12L, (long) recordSupplier.getPosition(partition0)); long prevPos = recordSupplier.getPosition(partition0); recordSupplier.getEarliestSequenceNumber(partition0); @@ -433,5 +441,16 @@ public class KafkaRecordSupplierTest recordSupplier.close(); } + private void insertData() throws ExecutionException, InterruptedException + { + try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); + for (ProducerRecord record : records) { + kafkaProducer.send(record).get(); + } + kafkaProducer.commitTransaction(); + } + } } diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorIOConfigTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorIOConfigTest.java index 3337faa080a..ee3dfe8e01c 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorIOConfigTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorIOConfigTest.java @@ -78,7 +78,6 @@ public class KafkaSupervisorIOConfigTest Assert.assertEquals(Duration.standardMinutes(30), config.getCompletionTimeout()); Assert.assertFalse("lateMessageRejectionPeriod", config.getLateMessageRejectionPeriod().isPresent()); Assert.assertFalse("earlyMessageRejectionPeriod", config.getEarlyMessageRejectionPeriod().isPresent()); - Assert.assertFalse("skipOffsetGaps", config.isSkipOffsetGaps()); } @Test @@ -97,8 +96,7 @@ public class KafkaSupervisorIOConfigTest + " \"useEarliestOffset\": true,\n" + " \"completionTimeout\": \"PT45M\",\n" + " \"lateMessageRejectionPeriod\": \"PT1H\",\n" - + " \"earlyMessageRejectionPeriod\": \"PT1H\",\n" - + " \"skipOffsetGaps\": true\n" + + " \"earlyMessageRejectionPeriod\": \"PT1H\"\n" + "}"; KafkaSupervisorIOConfig config = mapper.readValue( @@ -122,7 +120,6 @@ public class KafkaSupervisorIOConfigTest Assert.assertEquals(Duration.standardMinutes(45), config.getCompletionTimeout()); Assert.assertEquals(Duration.standardHours(1), config.getLateMessageRejectionPeriod().get()); Assert.assertEquals(Duration.standardHours(1), config.getEarlyMessageRejectionPeriod().get()); - Assert.assertTrue("skipOffsetGaps", config.isSkipOffsetGaps()); } @Test diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java index a08806acc17..f2db280c51e 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/supervisor/KafkaSupervisorTest.java @@ -258,7 +258,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testNoInitialState() throws Exception { - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); addSomeEvents(1); Capture captured = Capture.newInstance(); @@ -289,7 +289,6 @@ public class KafkaSupervisorTest extends EasyMockSupport Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction()); Assert.assertFalse("minimumMessageTime", taskConfig.getMinimumMessageTime().isPresent()); Assert.assertFalse("maximumMessageTime", taskConfig.getMaximumMessageTime().isPresent()); - Assert.assertFalse("skipOffsetGaps", taskConfig.isSkipOffsetGaps()); Assert.assertEquals(topic, taskConfig.getStartPartitions().getStream()); Assert.assertEquals(0L, (long) taskConfig.getStartPartitions().getPartitionSequenceNumberMap().get(0)); @@ -305,7 +304,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testSkipOffsetGaps() throws Exception { - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, true); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); addSomeEvents(1); Capture captured = Capture.newInstance(); @@ -328,13 +327,12 @@ public class KafkaSupervisorTest extends EasyMockSupport KafkaIndexTask task = captured.getValue(); KafkaIndexTaskIOConfig taskConfig = task.getIOConfig(); - Assert.assertTrue("skipOffsetGaps", taskConfig.isSkipOffsetGaps()); } @Test public void testMultiTask() throws Exception { - supervisor = getSupervisor(1, 2, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 2, true, "PT1H", null, null); addSomeEvents(1); Capture captured = Capture.newInstance(CaptureType.ALL); @@ -380,7 +378,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testReplicas() throws Exception { - supervisor = getSupervisor(2, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(2, 1, true, "PT1H", null, null); addSomeEvents(1); Capture captured = Capture.newInstance(CaptureType.ALL); @@ -417,7 +415,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testLateMessageRejectionPeriod() throws Exception { - supervisor = getSupervisor(2, 1, true, "PT1H", new Period("PT1H"), null, false); + supervisor = getSupervisor(2, 1, true, "PT1H", new Period("PT1H"), null); addSomeEvents(1); Capture captured = Capture.newInstance(CaptureType.ALL); @@ -456,7 +454,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testEarlyMessageRejectionPeriod() throws Exception { - supervisor = getSupervisor(2, 1, true, "PT1H", null, new Period("PT1H"), false); + supervisor = getSupervisor(2, 1, true, "PT1H", null, new Period("PT1H")); addSomeEvents(1); Capture captured = Capture.newInstance(CaptureType.ALL); @@ -498,7 +496,7 @@ public class KafkaSupervisorTest extends EasyMockSupport */ public void testLatestOffset() throws Exception { - supervisor = getSupervisor(1, 1, false, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, false, "PT1H", null, null); addSomeEvents(1100); Capture captured = Capture.newInstance(); @@ -518,9 +516,9 @@ public class KafkaSupervisorTest extends EasyMockSupport verifyAll(); KafkaIndexTask task = captured.getValue(); - Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionSequenceNumberMap().get(0)); - Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionSequenceNumberMap().get(1)); - Assert.assertEquals(1100L, (long) task.getIOConfig().getStartPartitions().getPartitionSequenceNumberMap().get(2)); + Assert.assertEquals(1101L, (long) task.getIOConfig().getStartPartitions().getPartitionSequenceNumberMap().get(0)); + Assert.assertEquals(1101L, (long) task.getIOConfig().getStartPartitions().getPartitionSequenceNumberMap().get(1)); + Assert.assertEquals(1101L, (long) task.getIOConfig().getStartPartitions().getPartitionSequenceNumberMap().get(2)); } @Test @@ -530,7 +528,7 @@ public class KafkaSupervisorTest extends EasyMockSupport */ public void testDatasourceMetadata() throws Exception { - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); addSomeEvents(100); Capture captured = Capture.newInstance(); @@ -560,7 +558,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test(expected = ISE.class) public void testBadMetadataOffsets() throws Exception { - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); addSomeEvents(1); expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes(); @@ -579,7 +577,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testKillIncompatibleTasks() throws Exception { - supervisor = getSupervisor(2, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(2, 1, true, "PT1H", null, null); addSomeEvents(1); // unexpected # of partitions (kill) @@ -684,7 +682,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testKillBadPartitionAssignment() throws Exception { - supervisor = getSupervisor(1, 2, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 2, true, "PT1H", null, null); addSomeEvents(1); Task id1 = createKafkaIndexTask( @@ -791,7 +789,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testRequeueTaskWhenFailed() throws Exception { - supervisor = getSupervisor(2, 2, true, "PT1H", null, null, false); + supervisor = getSupervisor(2, 2, true, "PT1H", null, null); addSomeEvents(1); Capture captured = Capture.newInstance(CaptureType.ALL); @@ -880,7 +878,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testRequeueAdoptedTaskWhenFailed() throws Exception { - supervisor = getSupervisor(2, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(2, 1, true, "PT1H", null, null); addSomeEvents(1); DateTime now = DateTimes.nowUtc(); @@ -981,7 +979,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testQueueNextTasksOnSuccess() throws Exception { - supervisor = getSupervisor(2, 2, true, "PT1H", null, null, false); + supervisor = getSupervisor(2, 2, true, "PT1H", null, null); addSomeEvents(1); Capture captured = Capture.newInstance(CaptureType.ALL); @@ -1083,7 +1081,7 @@ public class KafkaSupervisorTest extends EasyMockSupport { final TaskLocation location = new TaskLocation("testHost", 1234, -1); - supervisor = getSupervisor(2, 2, true, "PT1M", null, null, false); + supervisor = getSupervisor(2, 2, true, "PT1M", null, null); addSomeEvents(100); Capture captured = Capture.newInstance(CaptureType.ALL); @@ -1178,7 +1176,7 @@ public class KafkaSupervisorTest extends EasyMockSupport { final TaskLocation location = new TaskLocation("testHost", 1234, -1); - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); addSomeEvents(1); Task task = createKafkaIndexTask( @@ -1284,7 +1282,7 @@ public class KafkaSupervisorTest extends EasyMockSupport { final TaskLocation location = new TaskLocation("testHost", 1234, -1); - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); addSomeEvents(1); Task task = createKafkaIndexTask( @@ -1383,7 +1381,7 @@ public class KafkaSupervisorTest extends EasyMockSupport final TaskLocation location2 = new TaskLocation("testHost2", 145, -1); final DateTime startTime = DateTimes.nowUtc(); - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); addSomeEvents(6); Task id1 = createKafkaIndexTask( @@ -1474,23 +1472,23 @@ public class KafkaSupervisorTest extends EasyMockSupport Assert.assertEquals(startTime, activeReport.getStartTime()); Assert.assertEquals(ImmutableMap.of(0, 1L, 1, 2L, 2, 3L), activeReport.getStartingOffsets()); Assert.assertEquals(ImmutableMap.of(0, 4L, 1, 5L, 2, 6L), activeReport.getCurrentOffsets()); - Assert.assertEquals(ImmutableMap.of(0, 2L, 1, 1L, 2, 0L), activeReport.getLag()); + Assert.assertEquals(ImmutableMap.of(0, 3L, 1, 2L, 2, 1L), activeReport.getLag()); Assert.assertEquals("id1", publishingReport.getId()); Assert.assertEquals(ImmutableMap.of(0, 0L, 1, 0L, 2, 0L), publishingReport.getStartingOffsets()); Assert.assertEquals(ImmutableMap.of(0, 1L, 1, 2L, 2, 3L), publishingReport.getCurrentOffsets()); Assert.assertEquals(null, publishingReport.getLag()); - Assert.assertEquals(ImmutableMap.of(0, 6L, 1, 6L, 2, 6L), payload.getLatestOffsets()); - Assert.assertEquals(ImmutableMap.of(0, 2L, 1, 1L, 2, 0L), payload.getMinimumLag()); - Assert.assertEquals(3L, (long) payload.getAggregateLag()); + Assert.assertEquals(ImmutableMap.of(0, 7L, 1, 7L, 2, 7L), payload.getLatestOffsets()); + Assert.assertEquals(ImmutableMap.of(0, 3L, 1, 2L, 2, 1L), payload.getMinimumLag()); + Assert.assertEquals(6L, (long) payload.getAggregateLag()); Assert.assertTrue(payload.getOffsetsLastUpdated().plusMinutes(1).isAfterNow()); } @Test public void testKillUnresponsiveTasksWhileGettingStartTime() throws Exception { - supervisor = getSupervisor(2, 2, true, "PT1H", null, null, false); + supervisor = getSupervisor(2, 2, true, "PT1H", null, null); addSomeEvents(1); Capture captured = Capture.newInstance(CaptureType.ALL); @@ -1549,7 +1547,7 @@ public class KafkaSupervisorTest extends EasyMockSupport { final TaskLocation location = new TaskLocation("testHost", 1234, -1); - supervisor = getSupervisor(2, 2, true, "PT1M", null, null, false); + supervisor = getSupervisor(2, 2, true, "PT1M", null, null); addSomeEvents(100); Capture captured = Capture.newInstance(CaptureType.ALL); @@ -1635,7 +1633,7 @@ public class KafkaSupervisorTest extends EasyMockSupport { final TaskLocation location = new TaskLocation("testHost", 1234, -1); - supervisor = getSupervisor(2, 2, true, "PT1M", null, null, false); + supervisor = getSupervisor(2, 2, true, "PT1M", null, null); addSomeEvents(100); Capture captured = Capture.newInstance(CaptureType.ALL); @@ -1726,7 +1724,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test(expected = IllegalStateException.class) public void testStopNotStarted() { - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); supervisor.stop(false); } @@ -1738,7 +1736,7 @@ public class KafkaSupervisorTest extends EasyMockSupport taskRunner.unregisterListener(StringUtils.format("KafkaSupervisor-%s", DATASOURCE)); replayAll(); - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); supervisor.start(); supervisor.stop(false); @@ -1752,7 +1750,7 @@ public class KafkaSupervisorTest extends EasyMockSupport final TaskLocation location2 = new TaskLocation("testHost2", 145, -1); final DateTime startTime = DateTimes.nowUtc(); - supervisor = getSupervisor(2, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(2, 1, true, "PT1H", null, null); addSomeEvents(1); Task id1 = createKafkaIndexTask( @@ -1864,7 +1862,7 @@ public class KafkaSupervisorTest extends EasyMockSupport taskRunner.registerListener(anyObject(TaskRunnerListener.class), anyObject(Executor.class)); replayAll(); - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); supervisor.start(); supervisor.runInternal(); verifyAll(); @@ -1881,7 +1879,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testResetDataSourceMetadata() throws Exception { - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes(); expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes(); expect(taskRunner.getRunningTasks()).andReturn(Collections.EMPTY_LIST).anyTimes(); @@ -1937,7 +1935,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testResetNoDataSourceMetadata() throws Exception { - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null); expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes(); expect(taskMaster.getTaskRunner()).andReturn(Optional.of(taskRunner)).anyTimes(); expect(taskRunner.getRunningTasks()).andReturn(Collections.EMPTY_LIST).anyTimes(); @@ -1970,7 +1968,7 @@ public class KafkaSupervisorTest extends EasyMockSupport final TaskLocation location2 = new TaskLocation("testHost2", 145, -1); final DateTime startTime = DateTimes.nowUtc(); - supervisor = getSupervisor(2, 1, true, "PT1H", null, null, false); + supervisor = getSupervisor(2, 1, true, "PT1H", null, null); addSomeEvents(1); Task id1 = createKafkaIndexTask( @@ -2068,7 +2066,7 @@ public class KafkaSupervisorTest extends EasyMockSupport public void testNoDataIngestionTasks() throws Exception { final DateTime startTime = DateTimes.nowUtc(); - supervisor = getSupervisor(2, 1, true, "PT1S", null, null, false); + supervisor = getSupervisor(2, 1, true, "PT1S", null, null); //not adding any events Task id1 = createKafkaIndexTask( "id1", @@ -2164,7 +2162,7 @@ public class KafkaSupervisorTest extends EasyMockSupport public void testCheckpointForInactiveTaskGroup() throws InterruptedException, ExecutionException, TimeoutException, JsonProcessingException { - supervisor = getSupervisor(2, 1, true, "PT1S", null, null, false); + supervisor = getSupervisor(2, 1, true, "PT1S", null, null); //not adding any events final Task id1 = createKafkaIndexTask( "id1", @@ -2267,7 +2265,7 @@ public class KafkaSupervisorTest extends EasyMockSupport public void testCheckpointForUnknownTaskGroup() throws InterruptedException { - supervisor = getSupervisor(2, 1, true, "PT1S", null, null, false); + supervisor = getSupervisor(2, 1, true, "PT1S", null, null); //not adding any events final Task id1 = createKafkaIndexTask( "id1", @@ -2346,7 +2344,7 @@ public class KafkaSupervisorTest extends EasyMockSupport public void testCheckpointWithNullTaskGroupId() throws InterruptedException, ExecutionException, TimeoutException, JsonProcessingException { - supervisor = getSupervisor(1, 3, true, "PT1S", null, null, false); + supervisor = getSupervisor(1, 3, true, "PT1S", null, null); //not adding any events final Task id1 = createKafkaIndexTask( "id1", @@ -2438,7 +2436,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testSuspendedNoRunningTasks() throws Exception { - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false, true, kafkaHost); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null, true, kafkaHost); addSomeEvents(1); expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes(); @@ -2471,7 +2469,7 @@ public class KafkaSupervisorTest extends EasyMockSupport final TaskLocation location2 = new TaskLocation("testHost2", 145, -1); final DateTime startTime = DateTimes.nowUtc(); - supervisor = getSupervisor(2, 1, true, "PT1H", null, null, false, true, kafkaHost); + supervisor = getSupervisor(2, 1, true, "PT1H", null, null, true, kafkaHost); addSomeEvents(1); Task id1 = createKafkaIndexTask( @@ -2579,7 +2577,7 @@ public class KafkaSupervisorTest extends EasyMockSupport taskRunner.registerListener(anyObject(TaskRunnerListener.class), anyObject(Executor.class)); replayAll(); - supervisor = getSupervisor(1, 1, true, "PT1H", null, null, false, true, kafkaHost); + supervisor = getSupervisor(1, 1, true, "PT1H", null, null, true, kafkaHost); supervisor.start(); supervisor.runInternal(); verifyAll(); @@ -2604,7 +2602,6 @@ public class KafkaSupervisorTest extends EasyMockSupport null, null, false, - false, StringUtils.format("badhostname:%d", kafkaServer.getPort()) ); addSomeEvents(1); @@ -2670,7 +2667,6 @@ public class KafkaSupervisorTest extends EasyMockSupport Assert.assertTrue("isUseTransaction", taskConfig.isUseTransaction()); Assert.assertFalse("minimumMessageTime", taskConfig.getMinimumMessageTime().isPresent()); Assert.assertFalse("maximumMessageTime", taskConfig.getMaximumMessageTime().isPresent()); - Assert.assertFalse("skipOffsetGaps", taskConfig.isSkipOffsetGaps()); Assert.assertEquals(topic, taskConfig.getStartPartitions().getStream()); Assert.assertEquals(0L, (long) taskConfig.getStartPartitions().getPartitionSequenceNumberMap().get(0)); @@ -2686,7 +2682,7 @@ public class KafkaSupervisorTest extends EasyMockSupport @Test public void testGetCurrentTotalStats() { - supervisor = getSupervisor(1, 2, true, "PT1H", null, null, false); + supervisor = getSupervisor(1, 2, true, "PT1H", null, null, false, kafkaHost); supervisor.addTaskGroupToActivelyReadingTaskGroup( supervisor.getTaskGroupIdForPartition(0), ImmutableMap.of(0, 0L), @@ -2734,6 +2730,8 @@ public class KafkaSupervisorTest extends EasyMockSupport AdminUtils.createTopic(zkUtils, topic, NUM_PARTITIONS, 1, new Properties(), RackAwareMode.Enforced$.MODULE$); try (final KafkaProducer kafkaProducer = kafkaServer.newProducer()) { + kafkaProducer.initTransactions(); + kafkaProducer.beginTransaction(); for (int i = 0; i < NUM_PARTITIONS; i++) { for (int j = 0; j < numEventsPerPartition; j++) { kafkaProducer.send( @@ -2746,6 +2744,7 @@ public class KafkaSupervisorTest extends EasyMockSupport ).get(); } } + kafkaProducer.commitTransaction(); } } @@ -2755,8 +2754,7 @@ public class KafkaSupervisorTest extends EasyMockSupport boolean useEarliestOffset, String duration, Period lateMessageRejectionPeriod, - Period earlyMessageRejectionPeriod, - boolean skipOffsetGaps + Period earlyMessageRejectionPeriod ) { return getSupervisor( @@ -2766,7 +2764,6 @@ public class KafkaSupervisorTest extends EasyMockSupport duration, lateMessageRejectionPeriod, earlyMessageRejectionPeriod, - skipOffsetGaps, false, kafkaHost ); @@ -2779,7 +2776,6 @@ public class KafkaSupervisorTest extends EasyMockSupport String duration, Period lateMessageRejectionPeriod, Period earlyMessageRejectionPeriod, - boolean skipOffsetGaps, boolean suspended, String kafkaHost ) @@ -2787,6 +2783,7 @@ public class KafkaSupervisorTest extends EasyMockSupport Map consumerProperties = new HashMap<>(); consumerProperties.put("myCustomKey", "myCustomValue"); consumerProperties.put("bootstrap.servers", kafkaHost); + consumerProperties.put("isolation.level", "read_committed"); KafkaSupervisorIOConfig kafkaSupervisorIOConfig = new KafkaSupervisorIOConfig( topic, replicas, @@ -2799,8 +2796,7 @@ public class KafkaSupervisorTest extends EasyMockSupport useEarliestOffset, new Period("PT30M"), lateMessageRejectionPeriod, - earlyMessageRejectionPeriod, - skipOffsetGaps + earlyMessageRejectionPeriod ); KafkaIndexTaskClientFactory taskClientFactory = new KafkaIndexTaskClientFactory( @@ -2908,8 +2904,7 @@ public class KafkaSupervisorTest extends EasyMockSupport KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, minimumMessageTime, - maximumMessageTime, - false + maximumMessageTime ), Collections.emptyMap(), null, diff --git a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/test/TestBroker.java b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/test/TestBroker.java index 6659f92ebbf..10c9b2ef409 100644 --- a/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/test/TestBroker.java +++ b/extensions-core/kafka-indexing-service/src/test/java/org/apache/druid/indexing/kafka/test/TestBroker.java @@ -40,11 +40,12 @@ import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Properties; +import java.util.Random; import java.util.concurrent.ThreadLocalRandom; public class TestBroker implements Closeable { - + private static final Random RANDOM = ThreadLocalRandom.current(); private final String zookeeperConnect; private final File directory; private final boolean directoryCleanup; @@ -77,6 +78,9 @@ public class TestBroker implements Closeable props.setProperty("broker.id", String.valueOf(id)); props.setProperty("port", String.valueOf(ThreadLocalRandom.current().nextInt(9999) + 10000)); props.setProperty("advertised.host.name", "localhost"); + props.setProperty("transaction.state.log.replication.factor", "1"); + props.setProperty("offsets.topic.replication.factor", "1"); + props.setProperty("transaction.state.log.min.isr", "1"); props.putAll(brokerProps); final KafkaConfig config = new KafkaConfig(props); @@ -112,6 +116,8 @@ public class TestBroker implements Closeable props.put("key.serializer", ByteArraySerializer.class.getName()); props.put("value.serializer", ByteArraySerializer.class.getName()); props.put("acks", "all"); + props.put("enable.idempotence", "true"); + props.put("transactional.id", String.valueOf(RANDOM.nextInt())); return props; } @@ -121,8 +127,9 @@ public class TestBroker implements Closeable props.put("bootstrap.servers", StringUtils.format("localhost:%d", getPort())); props.put("key.deserializer", ByteArrayDeserializer.class.getName()); props.put("value.deserializer", ByteArrayDeserializer.class.getName()); - props.put("group.id", String.valueOf(ThreadLocalRandom.current().nextInt())); + props.put("group.id", String.valueOf(RANDOM.nextInt())); props.put("auto.offset.reset", "earliest"); + props.put("isolation.level", "read_committed"); return props; } diff --git a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskIOConfig.java b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskIOConfig.java index 307e971bcfd..8dd32f24634 100644 --- a/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskIOConfig.java +++ b/extensions-core/kinesis-indexing-service/src/main/java/org/apache/druid/indexing/kinesis/KinesisIndexTaskIOConfig.java @@ -68,7 +68,6 @@ public class KinesisIndexTaskIOConfig extends SeekableStreamIndexTaskIOConfig extends AbstractTask +public abstract class SeekableStreamIndexTask extends AbstractTask implements ChatHandler { public static final long LOCK_ACQUIRE_TIMEOUT_SECONDS = 15; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskIOConfig.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskIOConfig.java index 6c469c7d012..dde9702b22f 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskIOConfig.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/SeekableStreamIndexTaskIOConfig.java @@ -43,7 +43,6 @@ public abstract class SeekableStreamIndexTaskIOConfig minimumMessageTime; private final Optional maximumMessageTime; - private final boolean skipOffsetGaps; private final Set exclusiveStartSequenceNumberPartitions; @JsonCreator @@ -55,7 +54,6 @@ public abstract class SeekableStreamIndexTaskIOConfig exclusiveStartSequenceNumberPartitions ) @@ -67,7 +65,6 @@ public abstract class SeekableStreamIndexTaskIOConfig Partition Number Type * @param Sequence Number Type */ -public abstract class SeekableStreamIndexTaskRunner implements ChatHandler +public abstract class SeekableStreamIndexTaskRunner implements ChatHandler { public enum Status { @@ -474,7 +474,7 @@ public abstract class SeekableStreamIndexTaskRunner record : records) { + // for Kafka, the end offsets are exclusive, so skip it if (isEndSequenceOffsetsExclusive() && createSequenceNumber(record.getSequenceNumber()).compareTo( - createSequenceNumber(endOffsets.get(record.getPartitionId()))) == 0) { + createSequenceNumber(endOffsets.get(record.getPartitionId()))) >= 0) { continue; } @@ -530,17 +531,6 @@ public abstract class SeekableStreamIndexTaskRunner valueBytess = record.getData(); final List rows; @@ -1897,7 +1887,7 @@ public abstract class SeekableStreamIndexTaskRunner partition id * @param sequence number */ -public class OrderedPartitionableRecord +public class OrderedPartitionableRecord { private final String stream; private final PartitionIdType partitionId; diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/common/RecordSupplier.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/common/RecordSupplier.java index d9e599da0c8..3a6e87ed3b2 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/common/RecordSupplier.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/common/RecordSupplier.java @@ -36,7 +36,7 @@ import java.util.Set; * @param Sequence Number Type */ @Beta -public interface RecordSupplier extends Closeable +public interface RecordSupplier extends Closeable { /** * assigns the given partitions to this RecordSupplier diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java index 25250ac0487..4c6509d31f8 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/seekablestream/supervisor/SeekableStreamSupervisor.java @@ -119,7 +119,7 @@ import java.util.stream.Stream; * @param the type of the partition id, for example, partitions in Kafka are int type while partitions in Kinesis are String type * @param the type of the sequence number or offsets, for example, Kafka uses long offsets while Kinesis uses String sequence numbers */ -public abstract class SeekableStreamSupervisor +public abstract class SeekableStreamSupervisor implements Supervisor { public static final String IS_INCREMENTAL_HANDOFF_SUPPORTED = "IS_INCREMENTAL_HANDOFF_SUPPORTED"; diff --git a/integration-tests/pom.xml b/integration-tests/pom.xml index 24c910c1880..6159021d9e6 100644 --- a/integration-tests/pom.xml +++ b/integration-tests/pom.xml @@ -32,7 +32,7 @@ - 0.10.2.2 + 2.1.0 @@ -51,17 +51,6 @@ druid-datasketches ${project.parent.version} - - org.apache.druid.extensions - druid-kafka-eight - ${project.parent.version} - - - kafka_2.10 - org.apache.kafka - - - org.apache.druid.extensions druid-histogram diff --git a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaTest.java b/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaTest.java deleted file mode 100644 index 10f9aab3bb1..00000000000 --- a/integration-tests/src/test/java/org/apache/druid/tests/indexer/ITKafkaTest.java +++ /dev/null @@ -1,320 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.tests.indexer; - -import com.google.common.base.Throwables; -import com.google.inject.Inject; -import kafka.admin.AdminUtils; -import kafka.admin.RackAwareMode; -import kafka.utils.ZKStringSerializer$; -import kafka.utils.ZkUtils; -import org.I0Itec.zkclient.ZkClient; -import org.I0Itec.zkclient.ZkConnection; -import org.apache.commons.io.IOUtils; -import org.apache.druid.java.util.common.DateTimes; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.StringUtils; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.testing.IntegrationTestingConfig; -import org.apache.druid.testing.guice.DruidTestModuleFactory; -import org.apache.druid.testing.utils.RetryUtil; -import org.apache.druid.testing.utils.TestQueryHelper; -import org.apache.kafka.clients.producer.KafkaProducer; -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.common.serialization.StringSerializer; -import org.joda.time.DateTime; -import org.joda.time.DateTimeZone; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; -import org.testng.annotations.AfterClass; -import org.testng.annotations.BeforeSuite; -import org.testng.annotations.Guice; -import org.testng.annotations.Test; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Map; -import java.util.Properties; -import java.util.concurrent.Callable; - -/* - * This is a test for the kafka firehose. - */ -@Guice(moduleFactory = DruidTestModuleFactory.class) -public class ITKafkaTest extends AbstractIndexerTest -{ - private static final Logger LOG = new Logger(ITKafkaTest.class); - private static final int DELAY_BETWEEN_EVENTS_SECS = 5; - private static final String INDEXER_FILE = "/indexer/kafka_index_task.json"; - private static final String QUERIES_FILE = "/indexer/kafka_index_queries.json"; - private static final String DATASOURCE = "kafka_test"; - private static final String TOPIC_NAME = "kafkaTopic"; - private static final int MINUTES_TO_SEND = 2; - public static final String testPropertyPrefix = "kafka.test.property."; - - - // We'll fill in the current time and numbers for added, deleted and changed - // before sending the event. - final String event_template = - "{\"timestamp\": \"%s\"," + - "\"page\": \"Gypsy Danger\"," + - "\"language\" : \"en\"," + - "\"user\" : \"nuclear\"," + - "\"unpatrolled\" : \"true\"," + - "\"newPage\" : \"true\"," + - "\"robot\": \"false\"," + - "\"anonymous\": \"false\"," + - "\"namespace\":\"article\"," + - "\"continent\":\"North America\"," + - "\"country\":\"United States\"," + - "\"region\":\"Bay Area\"," + - "\"city\":\"San Francisco\"," + - "\"added\":%d," + - "\"deleted\":%d," + - "\"delta\":%d}"; - - private String taskID; - private ZkClient zkClient; - private ZkUtils zkUtils; - private boolean segmentsExist; // to tell if we should remove segments during teardown - - // format for the querying interval - private final DateTimeFormatter INTERVAL_FMT = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:'00Z'"); - // format for the expected timestamp in a query response - private final DateTimeFormatter TIMESTAMP_FMT = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss'.000Z'"); - private DateTime dtFirst; // timestamp of 1st event - private DateTime dtLast; // timestamp of last event - - @Inject - private TestQueryHelper queryHelper; - @Inject - private IntegrationTestingConfig config; - - private String fullDatasourceName; - - @BeforeSuite - public void setFullDatasourceName() - { - fullDatasourceName = DATASOURCE + config.getExtraDatasourceNameSuffix(); - } - @Test - public void testKafka() - { - LOG.info("Starting test: ITKafkaTest"); - - // create topic - try { - int sessionTimeoutMs = 10000; - int connectionTimeoutMs = 10000; - String zkHosts = config.getZookeeperHosts(); - zkClient = new ZkClient(zkHosts, sessionTimeoutMs, connectionTimeoutMs, ZKStringSerializer$.MODULE$); - zkUtils = new ZkUtils(zkClient, new ZkConnection(zkHosts, sessionTimeoutMs), false); - if (config.manageKafkaTopic()) { - int numPartitions = 1; - int replicationFactor = 1; - Properties topicConfig = new Properties(); - // addFilteredProperties(topicConfig); - AdminUtils.createTopic( - zkUtils, - TOPIC_NAME, - numPartitions, - replicationFactor, - topicConfig, - RackAwareMode.Disabled$.MODULE$ - ); - } - } - catch (Exception e) { - throw new ISE(e, "could not create kafka topic"); - } - - // set up kafka producer - Properties properties = new Properties(); - addFilteredProperties(properties); - properties.put("bootstrap.servers", config.getKafkaHost()); - LOG.info("Kafka bootstrap.servers: [%s]", config.getKafkaHost()); - properties.put("acks", "all"); - properties.put("retries", "3"); - - KafkaProducer producer = new KafkaProducer<>( - properties, - new StringSerializer(), - new StringSerializer() - ); - - DateTimeZone zone = DateTimes.inferTzFromString("UTC"); - // format for putting into events - DateTimeFormatter event_fmt = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss'Z'"); - - DateTime dt = new DateTime(zone); // timestamp to put on events - dtFirst = dt; // timestamp of 1st event - dtLast = dt; // timestamp of last event - - // these are used to compute the expected aggregations - int added = 0; - int num_events = 10; - - // send data to kafka - for (int i = 0; i < num_events; i++) { - added += i; - // construct the event to send - String event = StringUtils.format( - event_template, - event_fmt.print(dt), i, 0, i - ); - LOG.info("sending event: [%s]", event); - try { - // Send event to kafka - producer.send(new ProducerRecord(TOPIC_NAME, event)).get(); - } - catch (Exception ioe) { - throw Throwables.propagate(ioe); - } - - dtLast = dt; - dt = new DateTime(zone); - } - - producer.close(); - - String indexerSpec; - - // replace temp strings in indexer file - try { - LOG.info("indexerFile name: [%s]", INDEXER_FILE); - - Properties consumerProperties = new Properties(); - consumerProperties.put("zookeeper.connect", config.getZookeeperInternalHosts()); - consumerProperties.put("zookeeper.connection.timeout.ms", "15000"); - consumerProperties.put("zookeeper.sync.time.ms", "5000"); - consumerProperties.put("group.id", Long.toString(System.currentTimeMillis())); - consumerProperties.put("fetch.message.max.bytes", "1048586"); - consumerProperties.put("auto.offset.reset", "smallest"); - consumerProperties.put("auto.commit.enable", "false"); - - addFilteredProperties(consumerProperties); - - indexerSpec = getTaskAsString(INDEXER_FILE); - indexerSpec = StringUtils.replace(indexerSpec, "%%DATASOURCE%%", fullDatasourceName); - indexerSpec = StringUtils.replace(indexerSpec, "%%TOPIC%%", TOPIC_NAME); - indexerSpec = StringUtils.replace(indexerSpec, "%%COUNT%%", Integer.toString(num_events)); - String consumerPropertiesJson = jsonMapper.writeValueAsString(consumerProperties); - indexerSpec = StringUtils.replace(indexerSpec, "%%CONSUMER_PROPERTIES%%", consumerPropertiesJson); - - LOG.info("indexerFile: [%s]\n", indexerSpec); - } - catch (Exception e) { - // log here so the message will appear in the console output - LOG.error("could not read indexer file [%s]", INDEXER_FILE); - throw new ISE(e, "could not read indexer file [%s]", INDEXER_FILE); - } - - // start indexing task - taskID = indexer.submitTask(indexerSpec); - LOG.info("-------------SUBMITTED TASK"); - - // wait for the task to finish - indexer.waitUntilTaskCompletes(taskID, 10000, 60); - - // wait for segments to be handed off - try { - RetryUtil.retryUntil( - new Callable() - { - @Override - public Boolean call() - { - return coordinator.areSegmentsLoaded(fullDatasourceName); - } - }, - true, - 10000, - 30, - "Real-time generated segments loaded" - ); - } - catch (Exception e) { - throw Throwables.propagate(e); - } - LOG.info("segments are present"); - segmentsExist = true; - - // put the timestamps into the query structure - String queryResponseTemplate; - InputStream is = ITKafkaTest.class.getResourceAsStream(QUERIES_FILE); - if (null == is) { - throw new ISE("could not open query file: %s", QUERIES_FILE); - } - - try { - queryResponseTemplate = IOUtils.toString(is, "UTF-8"); - } - catch (IOException e) { - throw new ISE(e, "could not read query file: %s", QUERIES_FILE); - } - - String queryStr = queryResponseTemplate; - queryStr = StringUtils.replace(queryStr, "%%DATASOURCE%%", fullDatasourceName); - // time boundary - queryStr = StringUtils.replace(queryStr, "%%TIMEBOUNDARY_RESPONSE_TIMESTAMP%%", TIMESTAMP_FMT.print(dtFirst)); - queryStr = StringUtils.replace(queryStr, "%%TIMEBOUNDARY_RESPONSE_MAXTIME%%", TIMESTAMP_FMT.print(dtLast)); - queryStr = StringUtils.replace(queryStr, "%%TIMEBOUNDARY_RESPONSE_MINTIME%%", TIMESTAMP_FMT.print(dtFirst)); - // time series - queryStr = StringUtils.replace(queryStr, "%%TIMESERIES_QUERY_START%%", INTERVAL_FMT.print(dtFirst)); - String queryEnd = INTERVAL_FMT.print(dtFirst.plusMinutes(MINUTES_TO_SEND + 2)); - queryStr = StringUtils.replace(queryStr, "%%TIMESERIES_QUERY_END%%", queryEnd); - queryStr = StringUtils.replace(queryStr, "%%TIMESERIES_RESPONSE_TIMESTAMP%%", TIMESTAMP_FMT.print(dtFirst)); - queryStr = StringUtils.replace(queryStr, "%%TIMESERIES_ADDED%%", Integer.toString(added)); - queryStr = StringUtils.replace(queryStr, "%%TIMESERIES_NUMEVENTS%%", Integer.toString(num_events)); - - // this query will probably be answered from the realtime task - try { - this.queryHelper.testQueriesFromString(queryStr, 2); - } - catch (Exception e) { - throw Throwables.propagate(e); - } - } - - @AfterClass - public void afterClass() - { - LOG.info("teardown"); - if (config.manageKafkaTopic()) { - // delete kafka topic - AdminUtils.deleteTopic(zkUtils, TOPIC_NAME); - } - - // remove segments - if (segmentsExist) { - unloadAndKillData(fullDatasourceName); - } - } - - public void addFilteredProperties(Properties properties) - { - for (Map.Entry entry : config.getProperties().entrySet()) { - if (entry.getKey().startsWith(testPropertyPrefix)) { - properties.put(entry.getKey().substring(testPropertyPrefix.length()), entry.getValue()); - } - } - } -} - diff --git a/integration-tests/src/test/resources/indexer/kafka_index_task.json b/integration-tests/src/test/resources/indexer/kafka_index_task.json deleted file mode 100644 index 55e28c7c47a..00000000000 --- a/integration-tests/src/test/resources/indexer/kafka_index_task.json +++ /dev/null @@ -1,68 +0,0 @@ -{ - "type" : "index_realtime", - "spec" : { - "dataSchema": { - "dataSource": "%%DATASOURCE%%", - "parser" : { - "type" : "string", - "parseSpec" : { - "format" : "json", - "timestampSpec" : { - "column" : "timestamp", - "format" : "auto" - }, - "dimensionsSpec" : { - "dimensions": ["page","language","user","unpatrolled","newPage","robot","anonymous","namespace","continent","country","region","city"], - "dimensionExclusions" : [], - "spatialDimensions" : [] - } - } - }, - "metricsSpec": [ - { - "type": "count", - "name": "count" - }, - { - "type": "doubleSum", - "name": "added", - "fieldName": "added" - }, - { - "type": "doubleSum", - "name": "deleted", - "fieldName": "deleted" - }, - { - "type": "doubleSum", - "name": "delta", - "fieldName": "delta" - } - ], - "granularitySpec": { - "type" : "uniform", - "segmentGranularity": "MINUTE", - "queryGranularity": "NONE" - } - }, - "ioConfig" : { - "type" : "realtime", - "firehose": { - "type": "fixedCount", - "count": "%%COUNT%%", - "delegate": { - "type": "kafka-0.8", - "consumerProps": %%CONSUMER_PROPERTIES%%, - "feed": "%%TOPIC%%" - } - } - }, - "tuningConfig": { - "type" : "realtime", - "maxRowsInMemory": 500000, - "intermediatePersistPeriod": "PT3M", - "windowPeriod": "PT150S", - "basePersistDirectory": "/home/y/var/druid_state/kafka_test/realtime/basePersist" - } - } -} diff --git a/pom.xml b/pom.xml index 351e735b0e5..95ca07b21e9 100644 --- a/pom.xml +++ b/pom.xml @@ -696,7 +696,7 @@ org.slf4j slf4j-api - 1.6.4 + 1.7.25 org.roaringbitmap From cadb6c52804d5913e9c7c8cea8e4d431ad4c68b9 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 19 Feb 2019 10:52:05 -0800 Subject: [PATCH 21/25] Missing Overlord and MiddleManager api docs (#7042) * document middle manager api * re-arrange * correction * document more missing overlord api calls, minor re-arrange of some code i was referencing * fix it * this will fix it * fixup * link to other docs --- .../apache/druid/indexer/TaskStatusPlus.java | 4 - docs/content/operations/api-reference.md | 183 +++++++- .../overlord/http/OverlordResource.java | 400 +++++++++--------- 3 files changed, 351 insertions(+), 236 deletions(-) diff --git a/core/src/main/java/org/apache/druid/indexer/TaskStatusPlus.java b/core/src/main/java/org/apache/druid/indexer/TaskStatusPlus.java index 4912900fb88..34733af08bb 100644 --- a/core/src/main/java/org/apache/druid/indexer/TaskStatusPlus.java +++ b/core/src/main/java/org/apache/druid/indexer/TaskStatusPlus.java @@ -23,7 +23,6 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import org.apache.druid.java.util.common.RE; -import org.apache.druid.java.util.common.logger.Logger; import org.joda.time.DateTime; import javax.annotation.Nullable; @@ -31,8 +30,6 @@ import java.util.Objects; public class TaskStatusPlus { - private static final Logger log = new Logger(TaskStatusPlus.class); - private final String id; private final String type; private final DateTime createdTime; @@ -74,7 +71,6 @@ public class TaskStatusPlus ); } - @JsonCreator public TaskStatusPlus( @JsonProperty("id") String id, diff --git a/docs/content/operations/api-reference.md b/docs/content/operations/api-reference.md index 23d118864a9..407b47355df 100644 --- a/docs/content/operations/api-reference.md +++ b/docs/content/operations/api-reference.md @@ -143,14 +143,17 @@ Returns full segment metadata for a specific segment as stored in the metadata s * `/druid/coordinator/v1/metadata/datasources/{dataSourceName}/segments` -Returns a list of all segments, overlapping with any of given intervals, for a datasource as stored in the metadata store. Request body is array of string intervals like [interval1, interval2,...] for example ["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"] +Returns a list of all segments, overlapping with any of given intervals, for a datasource as stored in the metadata store. Request body is array of string IS0 8601 intervals like [interval1, interval2,...] for example ["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"] * `/druid/coordinator/v1/metadata/datasources/{dataSourceName}/segments?full` -Returns a list of all segments, overlapping with any of given intervals, for a datasource with the full segment metadata as stored in the metadata store. Request body is array of string intervals like [interval1, interval2,...] for example ["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"] +Returns a list of all segments, overlapping with any of given intervals, for a datasource with the full segment metadata as stored in the metadata store. Request body is array of string ISO 8601 intervals like [interval1, interval2,...] for example ["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"] #### Datasources +Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/` +(e.g., 2016-06-27_2016-06-28). + ##### GET * `/druid/coordinator/v1/datasources` @@ -187,7 +190,7 @@ Returns a map of an interval to a map of segment metadata to a set of server nam * `/druid/coordinator/v1/datasources/{dataSourceName}/intervals/{interval}` -Returns a set of segment ids for an ISO8601 interval. Note that {interval} parameters are delimited by a `_` instead of a `/` (e.g., 2016-06-27_2016-06-28). +Returns a set of segment ids for an interval. * `/druid/coordinator/v1/datasources/{dataSourceName}/intervals/{interval}?simple` @@ -234,18 +237,19 @@ Enables a segment of a datasource. Disables a datasource. * `/druid/coordinator/v1/datasources/{dataSourceName}/intervals/{interval}` -* `@Deprecated. /druid/coordinator/v1/datasources/{dataSourceName}?kill=true&interval={myISO8601Interval}` +* `@Deprecated. /druid/coordinator/v1/datasources/{dataSourceName}?kill=true&interval={myInterval}` Runs a [Kill task](../ingestion/tasks.html) for a given interval and datasource. -Note that {interval} parameters are delimited by a `_` instead of a `/` (e.g., 2016-06-27_2016-06-28). - * `/druid/coordinator/v1/datasources/{dataSourceName}/segments/{segmentId}` Disables a segment. #### Retention Rules +Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/` +(e.g., 2016-06-27_2016-06-28). + ##### GET * `/druid/coordinator/v1/rules` @@ -292,9 +296,10 @@ Optional Header Parameters for auditing the config change can also be specified. #### Intervals -##### GET +Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/` +(e.g., 2016-06-27_2016-06-28). -Note that {interval} parameters are delimited by a `_` instead of a `/` (e.g., 2016-06-27_2016-06-28). +##### GET * `/druid/coordinator/v1/intervals` @@ -338,6 +343,7 @@ will be set for them. Creates or updates the compaction config for a dataSource. See [Compaction Configuration](../configuration/index.html#compaction-dynamic-configuration) for configuration details. + ##### DELETE * `/druid/coordinator/v1/config/compaction/{dataSource}` @@ -357,12 +363,12 @@ ports. * `/druid/coordinator/v1/servers?simple` Returns a list of server data objects in which each object has the following keys: -- `host`: host URL include (`{hostname}:{port}`) -- `type`: node type (`indexer-executor`, `historical`) -- `currSize`: storage size currently used -- `maxSize`: maximum storage size -- `priority` -- `tier` +* `host`: host URL include (`{hostname}:{port}`) +* `type`: node type (`indexer-executor`, `historical`) +* `currSize`: storage size currently used +* `maxSize`: maximum storage size +* `priority` +* `tier` ### Overlord @@ -382,8 +388,44 @@ only want the active leader to be considered in-service at the load balancer. #### Tasks +Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/` +(e.g., 2016-06-27_2016-06-28). + ##### GET +* `/druid/indexer/v1/tasks` + +Retrieve list of tasks. Accepts query string parameters `state`, `datasource`, `createdTimeInterval`, `max`, and `type`. + +|Query Parameter |Description | +|---|---| +|`state`|filter list of tasks by task state, valid options are `running`, `complete`, `waiting`, and `pending`.| +| `datasource`| return tasks filtered by Druid datasource.| +| `createdTimeInterval`| return tasks created within the specified interval. | +| `max`| maximum number of `"complete"` tasks to return. Only applies when `state` is set to `"complete"`.| +| `type`| filter tasks by task type. See [task documentation](../ingestion/tasks.html) for more details.| + + +* `/druid/indexer/v1/completeTasks` + +Retrieve list of complete tasks. Equivalent to `/druid/indexer/v1/tasks?state=complete`. + +* `/druid/indexer/v1/runningTasks` + +Retrieve list of running tasks. Equivalent to `/druid/indexer/v1/tasks?state=running`. + +* `/druid/indexer/v1/waitingTasks` + +Retrieve list of waiting tasks. Equivalent to `/druid/indexer/v1/tasks?state=waiting`. + +* `/druid/indexer/v1/pendingTasks` + +Retrieve list of pending tasks. Equivalent to `/druid/indexer/v1/tasks?state=pending`. + +* `/druid/indexer/v1/task/{taskId}` + +Retrieve the 'payload' of a task. + * `/druid/indexer/v1/task/{taskId}/status` Retrieve the status of a task. @@ -406,14 +448,27 @@ Retrieve a [task completion report](../ingestion/reports.html) for a task. Only Endpoint for submitting tasks and supervisor specs to the Overlord. Returns the taskId of the submitted task. -* `druid/indexer/v1/task/{taskId}/shutdown` +* `/druid/indexer/v1/task/{taskId}/shutdown` Shuts down a task. -* `druid/indexer/v1/datasources/{dataSource}/shutdownAllTasks` +* `/druid/indexer/v1/datasources/{dataSource}/shutdownAllTasks` Shuts down all tasks for a dataSource. +* `/druid/indexer/v1/taskStatus` + +Retrieve list of task status objects for list of task id strings in request body. + +##### DELETE + +* `/druid/indexer/v1/pendingSegments/{dataSource}` + +Manually clean up pending segments table in metadata storage for `datasource`. Returns a JSON object response with +`numDeleted` and count of rows deleted from the pending segments table. This API is used by the +`druid.coordinator.kill.pendingSegments.on` [coordinator setting](../configuration/index.html#coordinator-operation) +which automates this operation to perform periodically. + #### Supervisors ##### GET @@ -490,13 +545,94 @@ This API is deprecated and will be removed in future releases. Please use the equivalent 'terminate' instead. +#### Dynamic Configuration +See [Overlord Dynamic Configuration](../configuration/index.html#overlord-dynamic-configuration) for details. + +Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/` +(e.g., 2016-06-27_2016-06-28). + +##### GET + +* `/druid/indexer/v1/worker` + +Retreives current overlord dynamic configuration. + +* `/druid/indexer/v1/worker/history?interval={interval}&counter={count}` + +Retrieves history of changes to overlord dynamic configuration. Accepts `interval` and `count` query string parameters +to filter by interval and limit the number of results respectively. + +* `/druid/indexer/v1/scaling` + +Retrieves overlord scaling events if auto-scaling runners are in use. + +##### POST + +* /druid/indexer/v1/worker + +Update overlord dynamic worker configuration. + ## Data Server -This section documents the API endpoints for the processes that reside on Data servers (MiddleManagers/Peons and Historicals) in the suggested [three-server configuration](../design/processes.html#server-types). +This section documents the API endpoints for the processes that reside on Data servers (MiddleManagers/Peons and Historicals) +in the suggested [three-server configuration](../design/processes.html#server-types). ### MiddleManager -The MiddleManager does not have any API endpoints beyond the [common endpoints](#common). +##### GET + +* `/druid/worker/v1/enabled` + +Check whether a MiddleManager is in an enabled or disabled state. Returns JSON object keyed by the combined `druid.host` +and `druid.port` with the boolean state as the value. + +```json +{"localhost:8091":true} +``` + +* `/druid/worker/v1/tasks` + +Retrieve a list of active tasks being run on MiddleManager. Returns JSON list of taskid strings. Normal usage should +prefer to use the `/druid/indexer/v1/tasks` [Overlord API](#overlord) or one of it's task state specific variants instead. + +```json +["index_wikiticker_2019-02-11T02:20:15.316Z"] +``` + +* `/druid/worker/v1/task/{taskid}/log` + +Retrieve task log output stream by task id. Normal usage should prefer to use the `/druid/indexer/v1/task/{taskId}/log` +[Overlord API](#overlord) instead. + +##### POST + +* `/druid/worker/v1/disable` + +'Disable' a MiddleManager, causing it to stop accepting new tasks but complete all existing tasks. Returns JSON object +keyed by the combined `druid.host` and `druid.port`: + +```json +{"localhost:8091":"disabled"} +``` + +* `/druid/worker/v1/enable` + +'Enable' a MiddleManager, allowing it to accept new tasks again if it was previously disabled. Returns JSON object +keyed by the combined `druid.host` and `druid.port`: + +```json +{"localhost:8091":"enabled"} +``` + +* `/druid/worker/v1/task/{taskid}/shutdown` + +Shutdown a running task by `taskid`. Normal usage should prefer to use the `/druid/indexer/v1/task/{taskId}/shutdown` +[Overlord API](#overlord) instead. Returns JSON: + +```json +{"task":"index_kafka_wikiticker_f7011f8ffba384b_fpeclode"} +``` + ### Peon @@ -536,6 +672,9 @@ This section documents the API endpoints for the processes that reside on Query #### Datasource Information +Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/` +(e.g., 2016-06-27_2016-06-28). + ##### GET * `/druid/v2/datasources` @@ -546,7 +685,7 @@ Returns a list of queryable datasources. Returns the dimensions and metrics of the datasource. Optionally, you can provide request parameter "full" to get list of served intervals with dimensions and metrics being served for those intervals. You can also provide request param "interval" explicitly to refer to a particular interval. -If no interval is specified, a default interval spanning a configurable period before the current time will be used. The duration of this interval is specified in ISO8601 format via: +If no interval is specified, a default interval spanning a configurable period before the current time will be used. The default duration of this interval is specified in ISO 8601 duration format via: druid.query.segmentMetadata.defaultHistory @@ -555,7 +694,7 @@ druid.query.segmentMetadata.defaultHistory Returns the dimensions of the datasource.

-This API is deprecated and will be removed in future releases. Please use [SegmentMetadataQuery](../querying/segmentmetadataquery.html) instead +This API is deprecated and will be removed in future releases. Please use SegmentMetadataQuery instead which provides more comprehensive information and supports all dataSource types including streaming dataSources. It's also encouraged to use [INFORMATION_SCHEMA tables](../querying/sql.html#retrieving-metadata) if you're using SQL.
@@ -565,12 +704,12 @@ if you're using SQL. Returns the metrics of the datasource.
-This API is deprecated and will be removed in future releases. Please use [SegmentMetadataQuery](../querying/segmentmetadataquery.html) instead +This API is deprecated and will be removed in future releases. Please use SegmentMetadataQuery instead which provides more comprehensive information and supports all dataSource types including streaming dataSources. It's also encouraged to use [INFORMATION_SCHEMA tables](../querying/sql.html#retrieving-metadata) if you're using SQL.
-* `/druid/v2/datasources/{dataSourceName}/candidates?intervals={comma-separated-intervals-in-ISO8601-format}&numCandidates={numCandidates}` +* `/druid/v2/datasources/{dataSourceName}/candidates?intervals={comma-separated-intervals}&numCandidates={numCandidates}` Returns segment information lists including server locations for the given datasource and intervals. If "numCandidates" is not specified, it will return all servers for each interval. diff --git a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/http/OverlordResource.java b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/http/OverlordResource.java index 9b59202ee0e..e5abac4070e 100644 --- a/indexing-service/src/main/java/org/apache/druid/indexing/overlord/http/OverlordResource.java +++ b/indexing-service/src/main/java/org/apache/druid/indexing/overlord/http/OverlordResource.java @@ -101,7 +101,6 @@ import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; -import java.util.function.BiFunction; import java.util.stream.Collectors; /** @@ -122,7 +121,6 @@ public class OverlordResource private AtomicReference workerConfigRef = null; private static final List API_TASK_STATES = ImmutableList.of("pending", "waiting", "running", "complete"); - @Inject public OverlordResource( TaskMaster taskMaster, @@ -503,100 +501,6 @@ public class OverlordResource return getTasks("waiting", null, null, null, null, req); } - private static class AnyTask extends TaskRunnerWorkItem - { - private final String taskType; - private final String dataSource; - private final TaskState taskState; - private final RunnerTaskState runnerTaskState; - private final DateTime createdTime; - private final DateTime queueInsertionTime; - private final TaskLocation taskLocation; - - AnyTask( - String taskId, - String taskType, - ListenableFuture result, - String dataSource, - TaskState state, - RunnerTaskState runnerState, - DateTime createdTime, - DateTime queueInsertionTime, - TaskLocation taskLocation - ) - { - super(taskId, result, DateTimes.EPOCH, DateTimes.EPOCH); - this.taskType = taskType; - this.dataSource = dataSource; - this.taskState = state; - this.runnerTaskState = runnerState; - this.createdTime = createdTime; - this.queueInsertionTime = queueInsertionTime; - this.taskLocation = taskLocation; - } - - @Override - public TaskLocation getLocation() - { - return taskLocation; - } - - @Override - public String getTaskType() - { - return taskType; - } - - @Override - public String getDataSource() - { - return dataSource; - } - - public TaskState getTaskState() - { - return taskState; - } - - public RunnerTaskState getRunnerTaskState() - { - return runnerTaskState; - } - - @Override - public DateTime getCreatedTime() - { - return createdTime; - } - - @Override - public DateTime getQueueInsertionTime() - { - return queueInsertionTime; - } - - public AnyTask withTaskState( - TaskState newTaskState, - RunnerTaskState runnerState, - DateTime createdTime, - DateTime queueInsertionTime, - TaskLocation taskLocation - ) - { - return new AnyTask( - getTaskId(), - getTaskType(), - getResult(), - getDataSource(), - newTaskState, - runnerState, - createdTime, - queueInsertionTime, - taskLocation - ); - } - } - @GET @Path("/pendingTasks") @Produces(MediaType.APPLICATION_JSON) @@ -760,120 +664,6 @@ public class OverlordResource return Response.ok(authorizedList).build(); } - private static BiFunction, RunnerTaskState, TaskStatusPlus> newTaskInfo2TaskStatusPlusFn() - { - return (taskInfo, runnerTaskState) -> new TaskStatusPlus( - taskInfo.getId(), - taskInfo.getTask() == null ? null : taskInfo.getTask().getType(), - taskInfo.getCreatedTime(), - // Would be nice to include the real queue insertion time, but the - // TaskStorage API doesn't yet allow it. - DateTimes.EPOCH, - taskInfo.getStatus().getStatusCode(), - runnerTaskState, - taskInfo.getStatus().getDuration(), - TaskLocation.unknown(), - taskInfo.getDataSource(), - taskInfo.getStatus().getErrorMsg() - ); - } - - private List filterActiveTasks( - RunnerTaskState state, - List allTasks - ) - { - //divide active tasks into 3 lists : running, pending, waiting - Optional taskRunnerOpt = taskMaster.getTaskRunner(); - if (!taskRunnerOpt.isPresent()) { - throw new WebApplicationException( - Response.serverError().entity("No task runner found").build() - ); - } - TaskRunner runner = taskRunnerOpt.get(); - // the order of tasks below is waiting, pending, running to prevent - // skipping a task, it's the order in which tasks will change state - // if they do while this is code is executing, so a task might be - // counted twice but never skipped - if (RunnerTaskState.WAITING.equals(state)) { - Collection runnersKnownTasks = runner.getKnownTasks(); - Set runnerKnownTaskIds = runnersKnownTasks - .stream() - .map(TaskRunnerWorkItem::getTaskId) - .collect(Collectors.toSet()); - final List waitingTasks = new ArrayList<>(); - for (TaskRunnerWorkItem task : allTasks) { - if (!runnerKnownTaskIds.contains(task.getTaskId())) { - waitingTasks.add(((AnyTask) task).withTaskState( - TaskState.RUNNING, - RunnerTaskState.WAITING, - task.getCreatedTime(), - task.getQueueInsertionTime(), - task.getLocation() - )); - } - } - return waitingTasks; - } - - if (RunnerTaskState.PENDING.equals(state)) { - Collection knownPendingTasks = runner.getPendingTasks(); - Set pendingTaskIds = knownPendingTasks - .stream() - .map(TaskRunnerWorkItem::getTaskId) - .collect(Collectors.toSet()); - Map workItemIdMap = knownPendingTasks - .stream() - .collect(Collectors.toMap( - TaskRunnerWorkItem::getTaskId, - java.util.function.Function.identity(), - (previousWorkItem, newWorkItem) -> newWorkItem - )); - final List pendingTasks = new ArrayList<>(); - for (TaskRunnerWorkItem task : allTasks) { - if (pendingTaskIds.contains(task.getTaskId())) { - pendingTasks.add(((AnyTask) task).withTaskState( - TaskState.RUNNING, - RunnerTaskState.PENDING, - workItemIdMap.get(task.getTaskId()).getCreatedTime(), - workItemIdMap.get(task.getTaskId()).getQueueInsertionTime(), - workItemIdMap.get(task.getTaskId()).getLocation() - )); - } - } - return pendingTasks; - } - - if (RunnerTaskState.RUNNING.equals(state)) { - Collection knownRunningTasks = runner.getRunningTasks(); - Set runningTaskIds = knownRunningTasks - .stream() - .map(TaskRunnerWorkItem::getTaskId) - .collect(Collectors.toSet()); - Map workItemIdMap = knownRunningTasks - .stream() - .collect(Collectors.toMap( - TaskRunnerWorkItem::getTaskId, - java.util.function.Function.identity(), - (previousWorkItem, newWorkItem) -> newWorkItem - )); - final List runningTasks = new ArrayList<>(); - for (TaskRunnerWorkItem task : allTasks) { - if (runningTaskIds.contains(task.getTaskId())) { - runningTasks.add(((AnyTask) task).withTaskState( - TaskState.RUNNING, - RunnerTaskState.RUNNING, - workItemIdMap.get(task.getTaskId()).getCreatedTime(), - workItemIdMap.get(task.getTaskId()).getQueueInsertionTime(), - workItemIdMap.get(task.getTaskId()).getLocation() - )); - } - } - return runningTasks; - } - return allTasks; - } - @DELETE @Path("/pendingSegments/{dataSource}") @Produces(MediaType.APPLICATION_JSON) @@ -1016,6 +806,102 @@ public class OverlordResource } } + private List filterActiveTasks( + RunnerTaskState state, + List allTasks + ) + { + //divide active tasks into 3 lists : running, pending, waiting + Optional taskRunnerOpt = taskMaster.getTaskRunner(); + if (!taskRunnerOpt.isPresent()) { + throw new WebApplicationException( + Response.serverError().entity("No task runner found").build() + ); + } + TaskRunner runner = taskRunnerOpt.get(); + // the order of tasks below is waiting, pending, running to prevent + // skipping a task, it's the order in which tasks will change state + // if they do while this is code is executing, so a task might be + // counted twice but never skipped + if (RunnerTaskState.WAITING.equals(state)) { + Collection runnersKnownTasks = runner.getKnownTasks(); + Set runnerKnownTaskIds = runnersKnownTasks + .stream() + .map(TaskRunnerWorkItem::getTaskId) + .collect(Collectors.toSet()); + final List waitingTasks = new ArrayList<>(); + for (TaskRunnerWorkItem task : allTasks) { + if (!runnerKnownTaskIds.contains(task.getTaskId())) { + waitingTasks.add(((AnyTask) task).withTaskState( + TaskState.RUNNING, + RunnerTaskState.WAITING, + task.getCreatedTime(), + task.getQueueInsertionTime(), + task.getLocation() + )); + } + } + return waitingTasks; + } + + if (RunnerTaskState.PENDING.equals(state)) { + Collection knownPendingTasks = runner.getPendingTasks(); + Set pendingTaskIds = knownPendingTasks + .stream() + .map(TaskRunnerWorkItem::getTaskId) + .collect(Collectors.toSet()); + Map workItemIdMap = knownPendingTasks + .stream() + .collect(Collectors.toMap( + TaskRunnerWorkItem::getTaskId, + java.util.function.Function.identity(), + (previousWorkItem, newWorkItem) -> newWorkItem + )); + final List pendingTasks = new ArrayList<>(); + for (TaskRunnerWorkItem task : allTasks) { + if (pendingTaskIds.contains(task.getTaskId())) { + pendingTasks.add(((AnyTask) task).withTaskState( + TaskState.RUNNING, + RunnerTaskState.PENDING, + workItemIdMap.get(task.getTaskId()).getCreatedTime(), + workItemIdMap.get(task.getTaskId()).getQueueInsertionTime(), + workItemIdMap.get(task.getTaskId()).getLocation() + )); + } + } + return pendingTasks; + } + + if (RunnerTaskState.RUNNING.equals(state)) { + Collection knownRunningTasks = runner.getRunningTasks(); + Set runningTaskIds = knownRunningTasks + .stream() + .map(TaskRunnerWorkItem::getTaskId) + .collect(Collectors.toSet()); + Map workItemIdMap = knownRunningTasks + .stream() + .collect(Collectors.toMap( + TaskRunnerWorkItem::getTaskId, + java.util.function.Function.identity(), + (previousWorkItem, newWorkItem) -> newWorkItem + )); + final List runningTasks = new ArrayList<>(); + for (TaskRunnerWorkItem task : allTasks) { + if (runningTaskIds.contains(task.getTaskId())) { + runningTasks.add(((AnyTask) task).withTaskState( + TaskState.RUNNING, + RunnerTaskState.RUNNING, + workItemIdMap.get(task.getTaskId()).getCreatedTime(), + workItemIdMap.get(task.getTaskId()).getQueueInsertionTime(), + workItemIdMap.get(task.getTaskId()).getLocation() + )); + } + } + return runningTasks; + } + return allTasks; + } + private List securedTaskStatusPlus( List collectionToFilter, @Nullable String dataSource, @@ -1057,4 +943,98 @@ public class OverlordResource ) ); } + + private static class AnyTask extends TaskRunnerWorkItem + { + private final String taskType; + private final String dataSource; + private final TaskState taskState; + private final RunnerTaskState runnerTaskState; + private final DateTime createdTime; + private final DateTime queueInsertionTime; + private final TaskLocation taskLocation; + + AnyTask( + String taskId, + String taskType, + ListenableFuture result, + String dataSource, + TaskState state, + RunnerTaskState runnerState, + DateTime createdTime, + DateTime queueInsertionTime, + TaskLocation taskLocation + ) + { + super(taskId, result, DateTimes.EPOCH, DateTimes.EPOCH); + this.taskType = taskType; + this.dataSource = dataSource; + this.taskState = state; + this.runnerTaskState = runnerState; + this.createdTime = createdTime; + this.queueInsertionTime = queueInsertionTime; + this.taskLocation = taskLocation; + } + + @Override + public TaskLocation getLocation() + { + return taskLocation; + } + + @Override + public String getTaskType() + { + return taskType; + } + + @Override + public String getDataSource() + { + return dataSource; + } + + public TaskState getTaskState() + { + return taskState; + } + + public RunnerTaskState getRunnerTaskState() + { + return runnerTaskState; + } + + @Override + public DateTime getCreatedTime() + { + return createdTime; + } + + @Override + public DateTime getQueueInsertionTime() + { + return queueInsertionTime; + } + + public AnyTask withTaskState( + TaskState newTaskState, + RunnerTaskState runnerState, + DateTime createdTime, + DateTime queueInsertionTime, + TaskLocation taskLocation + ) + { + return new AnyTask( + getTaskId(), + getTaskType(), + getResult(), + getDataSource(), + newTaskState, + runnerState, + createdTime, + queueInsertionTime, + taskLocation + ); + } + } } From 2b04e6d0bcfd98b0e5169eadbd91a52d2bd22a76 Mon Sep 17 00:00:00 2001 From: Surekha Date: Tue, 19 Feb 2019 10:52:37 -0800 Subject: [PATCH 22/25] add note on consistency of results for sys.segments queries (#7034) * add doc * change docs * PR comments * few more changes --- docs/content/querying/sql.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/content/querying/sql.md b/docs/content/querying/sql.md index 3f274918569..883904ced99 100644 --- a/docs/content/querying/sql.md +++ b/docs/content/querying/sql.md @@ -571,6 +571,8 @@ The "sys" schema provides visibility into Druid segments, servers and tasks. ### SEGMENTS table Segments table provides details on all Druid segments, whether they are published yet or not. +#### CAVEAT +Note that a segment can be served by more than one stream ingestion tasks or Historical processes, in that case it would have multiple replicas. These replicas are weakly consistent with each other when served by multiple ingestion tasks, until a segment is eventually served by a Historical, at that point the segment is immutable. Broker prefers to query a segment from Historical over an ingestion task. But if a segment has multiple realtime replicas, for eg. kafka index tasks, and one task is slower than other, then the sys.segments query results can vary for the duration of the tasks because only one of the ingestion tasks is queried by the Broker and it is not gauranteed that the same task gets picked everytime. The `num_rows` column of segments table can have inconsistent values during this period. There is an open [issue](https://github.com/apache/incubator-druid/issues/5915) about this inconsistency with stream ingestion tasks. |Column|Notes| |------|-----| From 258485a2fbde740f7efbd484e9d544767a137eff Mon Sep 17 00:00:00 2001 From: Jonathan Wei Date: Tue, 19 Feb 2019 12:38:52 -0800 Subject: [PATCH 23/25] Exclude github issue templates from license check (#7070) * Exclude github issue templates from license check * Adjust capitalization --- .github/ISSUE_TEMPLATE/proposal.md | 21 +-------------------- pom.xml | 1 + 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/proposal.md b/.github/ISSUE_TEMPLATE/proposal.md index 3a93808ff2d..3c043387059 100644 --- a/.github/ISSUE_TEMPLATE/proposal.md +++ b/.github/ISSUE_TEMPLATE/proposal.md @@ -7,25 +7,6 @@ assignees: '' --- - - # Motivation A description of the problem. @@ -52,7 +33,7 @@ This section should describe how the proposed changes will impact the operation - Is there a migration path that cluster operators need to be aware of? - Will there be any effect on the ability to do a rolling upgrade, or to do a rolling _downgrade_ if an operator wants to switch back to a previous version? -# Test Plan (optional) +# Test plan (optional) An optional discussion of how the proposed changes will be tested. This section should focus on higher level system test strategy and not unit tests (as UTs will be implementation dependent). diff --git a/pom.xml b/pom.xml index 95ca07b21e9..1ab8ad1b6c4 100644 --- a/pom.xml +++ b/pom.xml @@ -1482,6 +1482,7 @@ **/*.json **/*.parq **/*.parquet + .github/ISSUE_TEMPLATE/*.md From 7d1e8f353eb768a1170b0faf9405c48fbde69178 Mon Sep 17 00:00:00 2001 From: Fangyuan Deng <982092332@qq.com> Date: Wed, 20 Feb 2019 05:10:55 +0800 Subject: [PATCH 24/25] bugfix: when building materialized-view, if taskCount>1, may cause concurrentModificationException (#6690) * bugfix: when building materialized-view, if taskCount >1, may cause ConcurrentModificationException * remove entry after iteration instead of using ConcurrentMap, and add unit test * small change * modify unit test for coverage * remove unused method --- .../MaterializedViewSupervisor.java | 15 +++- .../MaterializedViewSupervisorTest.java | 83 +++++++++++++++++++ 2 files changed, 96 insertions(+), 2 deletions(-) diff --git a/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisor.java b/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisor.java index 1ccd8129246..105afdf8f23 100644 --- a/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisor.java +++ b/extensions-contrib/materialized-view-maintenance/src/main/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisor.java @@ -270,13 +270,18 @@ public class MaterializedViewSupervisor implements Supervisor void checkSegmentsAndSubmitTasks() { synchronized (taskLock) { + List intervalsToRemove = new ArrayList<>(); for (Map.Entry entry : runningTasks.entrySet()) { Optional taskStatus = taskStorage.getStatus(entry.getValue().getId()); if (!taskStatus.isPresent() || !taskStatus.get().isRunnable()) { - runningTasks.remove(entry.getKey()); - runningVersion.remove(entry.getKey()); + intervalsToRemove.add(entry.getKey()); } } + for (Interval interval : intervalsToRemove) { + runningTasks.remove(interval); + runningVersion.remove(interval); + } + if (runningTasks.size() == maxTaskCount) { //if the number of running tasks reach the max task count, supervisor won't submit new tasks. return; @@ -288,6 +293,12 @@ public class MaterializedViewSupervisor implements Supervisor submitTasks(sortedToBuildVersion, baseSegments); } } + + @VisibleForTesting + Pair, Map> getRunningTasks() + { + return new Pair<>(runningTasks, runningVersion); + } /** * Find infomation about the intervals in which derived dataSource data should be rebuilt. diff --git a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java index 7b575f01dd7..1bf1c39709d 100644 --- a/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java +++ b/extensions-contrib/materialized-view-maintenance/src/test/java/org/apache/druid/indexing/materializedview/MaterializedViewSupervisorTest.java @@ -27,7 +27,11 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Sets; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.StringDimensionSchema; +import org.apache.druid.indexer.HadoopIOConfig; +import org.apache.druid.indexer.HadoopIngestionSpec; import org.apache.druid.indexer.HadoopTuningConfig; +import org.apache.druid.indexer.TaskStatus; +import org.apache.druid.indexing.common.task.HadoopIndexTask; import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator; import org.apache.druid.indexing.overlord.TaskMaster; import org.apache.druid.indexing.overlord.TaskQueue; @@ -41,7 +45,9 @@ import org.apache.druid.metadata.TestDerbyConnector; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.LongSumAggregatorFactory; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.indexing.DataSchema; import org.apache.druid.segment.realtime.firehose.ChatHandlerProvider; +import org.apache.druid.segment.transform.TransformSpec; import org.apache.druid.server.security.AuthorizerMapper; import org.apache.druid.timeline.DataSegment; import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec; @@ -176,6 +182,83 @@ public class MaterializedViewSupervisorTest Assert.assertEquals(expectedSegments, toBuildInterval.rhs); } + @Test + public void testCheckSegmentsAndSubmitTasks() throws IOException + { + Set baseSegments = Sets.newHashSet( + new DataSegment( + "base", + Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), + "2015-01-03", + ImmutableMap.of(), + ImmutableList.of("dim1", "dim2"), + ImmutableList.of("m1"), + new HashBasedNumberedShardSpec(0, 1, null, null), + 9, + 1024 + ) + ); + indexerMetadataStorageCoordinator.announceHistoricalSegments(baseSegments); + expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes(); + expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes(); + expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes(); + expect(taskStorage.getStatus("test_task1")).andReturn(Optional.of(TaskStatus.failure("test_task1"))).anyTimes(); + expect(taskStorage.getStatus("test_task2")).andReturn(Optional.of(TaskStatus.running("test_task2"))).anyTimes(); + EasyMock.replay(taskStorage); + + Pair, Map> runningTasksPair = supervisor.getRunningTasks(); + Map runningTasks = runningTasksPair.lhs; + Map runningVersion = runningTasksPair.rhs; + + DataSchema dataSchema = new DataSchema( + "test_datasource", + null, + null, + null, + TransformSpec.NONE, + objectMapper + ); + HadoopIOConfig hadoopIOConfig = new HadoopIOConfig(new HashMap<>(), null, null); + HadoopIngestionSpec spec = new HadoopIngestionSpec(dataSchema, hadoopIOConfig, null); + HadoopIndexTask task1 = new HadoopIndexTask( + "test_task1", + spec, + null, + null, + null, + objectMapper, + null, + null, + null + ); + runningTasks.put(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), task1); + runningVersion.put(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "test_version1"); + + HadoopIndexTask task2 = new HadoopIndexTask( + "test_task2", + spec, + null, + null, + null, + objectMapper, + null, + null, + null + ); + runningTasks.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), task2); + runningVersion.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "test_version2"); + + supervisor.checkSegmentsAndSubmitTasks(); + + Map expectedRunningTasks = new HashMap<>(); + Map expectedRunningVersion = new HashMap<>(); + expectedRunningTasks.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), task2); + expectedRunningVersion.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "test_version2"); + + Assert.assertEquals(expectedRunningTasks, runningTasks); + Assert.assertEquals(expectedRunningVersion, runningVersion); + + } @Test public void testSuspendedDoesntRun() From 554b0142c3aa417c269ccd034686085b64fc22df Mon Sep 17 00:00:00 2001 From: Dylan Wylie Date: Tue, 19 Feb 2019 15:26:54 -0700 Subject: [PATCH 25/25] Autoclose old PRs using stale bot. (#7031) * Autoclose old PRs using stale bot. * add apache license * Excempt bug label --- .github/stale.yml | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 .github/stale.yml diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 00000000000..da33fe2d94c --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Configuration for probot-stale - https://github.com/probot/stale + +# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable +exemptLabels: + - Security + - Bug + +exemptMilestones: true + +# Limit to only `issues` or `pulls` +only: pulls + +# Label applied when closing +staleLabel: stale + +# Configuration settings that are specific to just 'issues' or 'pulls': +pulls: + daysUntilStale: 60 + daysUntilClose: 7 + markComment: > + This pull request has been marked as stale due to 60 days of inactivity. + It will be closed in 1 week if no further activity occurs. If you think + that’s incorrect or this pull request requires a review, please simply + write any comment. If closed, you can revive the PR at any time and @mention + a reviewer or discuss it on the dev@druid.apache.org list. + Thank you for your contributions. + unmarkComment: > + This pull request is no longer marked as stale. + closeComment: > + This pull request has been closed due to lack of activity. If you think that + is incorrect, or the pull request requires review, you can revive the PR at + any time. \ No newline at end of file