Merge branch 'master' into 6088-Time-Ordering-On-Scans-V2

This commit is contained in:
Justin Borromeo 2019-02-20 10:12:50 -08:00
commit c9142e721c
175 changed files with 5419 additions and 1429 deletions

37
.dockerignore Normal file
View File

@ -0,0 +1,37 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
.git
**/*.jar
**/*.class
dist
target
*.iml
*.ipr
*.iws
*.tar.gz
*.swp
*.swo
.classpath
.idea
.project
.settings/
*.log
*.DS_Store
_site
dependency-reduced-pom.xml

42
.github/ISSUE_TEMPLATE/proposal.md vendored Normal file
View File

@ -0,0 +1,42 @@
---
name: Proposal
about: A template for major Druid change proposals
title: "[PROPOSAL]"
labels: Proposal
assignees: ''
---
# Motivation
A description of the problem.
# Proposed changes
This section should provide a detailed description of the changes being proposed. This will usually be the longest section; please feel free to split this section or other sections into subsections if needed.
This section should include any changes made to user-facing interfaces, for example:
- Parameters
- JSON query/ingest specs
- SQL language
- Emitted metrics
# Rationale
A discussion of why this particular solution is the best one. One good way to approach this is to discuss other alternative solutions that you considered and decided against. This should also include a discussion of any specific benefits or drawbacks you are aware of.
# Operational impact
This section should describe how the proposed changes will impact the operation of existing clusters. It should answer questions such as:
- Is anything going to be deprecated or removed by this change? How will we phase out old behavior?
- Is there a migration path that cluster operators need to be aware of?
- Will there be any effect on the ability to do a rolling upgrade, or to do a rolling _downgrade_ if an operator wants to switch back to a previous version?
# Test plan (optional)
An optional discussion of how the proposed changes will be tested. This section should focus on higher level system test strategy and not unit tests (as UTs will be implementation dependent).
# Future work (optional)
An optional discussion of things that you believe are out of scope for the particular proposal but would be nice follow-ups. It helps show where a particular change could be leading us. There isn't any commitment that the proposal author will actually work on the items discussed in this section.

47
.github/stale.yml vendored Normal file
View File

@ -0,0 +1,47 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Configuration for probot-stale - https://github.com/probot/stale
# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
exemptLabels:
- Security
- Bug
exemptMilestones: true
# Limit to only `issues` or `pulls`
only: pulls
# Label applied when closing
staleLabel: stale
# Configuration settings that are specific to just 'issues' or 'pulls':
pulls:
daysUntilStale: 60
daysUntilClose: 7
markComment: >
This pull request has been marked as stale due to 60 days of inactivity.
It will be closed in 1 week if no further activity occurs. If you think
thats incorrect or this pull request requires a review, please simply
write any comment. If closed, you can revive the PR at any time and @mention
a reviewer or discuss it on the dev@druid.apache.org list.
Thank you for your contributions.
unmarkComment: >
This pull request is no longer marked as stale.
closeComment: >
This pull request has been closed due to lack of activity. If you think that
is incorrect, or the pull request requires review, you can revive the PR at
any time.

View File

@ -28,7 +28,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
</parent>
<dependencies>

View File

@ -27,7 +27,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
</parent>
<prerequisites>

View File

@ -31,7 +31,7 @@
<parent>
<artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
</parent>
<properties>

View File

@ -23,7 +23,6 @@ import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.RE;
import org.apache.druid.java.util.common.logger.Logger;
import org.joda.time.DateTime;
import javax.annotation.Nullable;
@ -31,8 +30,6 @@ import java.util.Objects;
public class TaskStatusPlus
{
private static final Logger log = new Logger(TaskStatusPlus.class);
private final String id;
private final String type;
private final DateTime createdTime;
@ -74,7 +71,6 @@ public class TaskStatusPlus
);
}
@JsonCreator
public TaskStatusPlus(
@JsonProperty("id") String id,

View File

@ -22,8 +22,26 @@ package org.apache.druid.timeline;
import org.apache.druid.guice.annotations.PublicApi;
import org.joda.time.Interval;
/**
* A logical segment can represent an entire segment or a part of a segment. As a result, it can have a different
* interval from its actual base segment. {@link #getInterval()} and {@link #getTrueInterval()} return the interval of
* this logical segment and the interval of the base segment, respectively.
*
* For example, suppose we have 2 segments as below:
*
* - Segment A has an interval of 2017/2018.
* - Segment B has an interval of 2017-08-01/2017-08-02.
*
* For these segments, {@link VersionedIntervalTimeline#lookup} returns 3 segments as below:
*
* - interval of 2017/2017-08-01 (trueInterval: 2017/2018)
* - interval of 2017-08-01/2017-08-02 (trueInterval: 2017-08-01/2017-08-02)
* - interval of 2017-08-02/2018 (trueInterval: 2017/2018)
*/
@PublicApi
public interface LogicalSegment
{
Interval getInterval();
Interval getTrueInterval();
}

View File

@ -19,6 +19,7 @@
package org.apache.druid.timeline;
import com.google.common.annotations.VisibleForTesting;
import org.apache.druid.timeline.partition.PartitionHolder;
import org.joda.time.Interval;
@ -27,16 +28,25 @@ import org.joda.time.Interval;
public class TimelineObjectHolder<VersionType, ObjectType> implements LogicalSegment
{
private final Interval interval;
private final Interval trueInterval;
private final VersionType version;
private final PartitionHolder<ObjectType> object;
@VisibleForTesting
public TimelineObjectHolder(Interval interval, VersionType version, PartitionHolder<ObjectType> object)
{
this(interval, interval, version, object);
}
public TimelineObjectHolder(
Interval interval,
Interval trueInterval,
VersionType version,
PartitionHolder<ObjectType> object
)
{
this.interval = interval;
this.trueInterval = trueInterval;
this.version = version;
this.object = object;
}
@ -47,6 +57,12 @@ public class TimelineObjectHolder<VersionType, ObjectType> implements LogicalSeg
return interval;
}
@Override
public Interval getTrueInterval()
{
return trueInterval;
}
public VersionType getVersion()
{
return version;
@ -62,6 +78,7 @@ public class TimelineObjectHolder<VersionType, ObjectType> implements LogicalSeg
{
return "TimelineObjectHolder{" +
"interval=" + interval +
", trueInterval=" + trueInterval +
", version=" + version +
", object=" + object +
'}';

View File

@ -300,6 +300,7 @@ public class VersionedIntervalTimeline<VersionType, ObjectType> implements Timel
private TimelineObjectHolder<VersionType, ObjectType> timelineEntryToObjectHolder(TimelineEntry entry)
{
return new TimelineObjectHolder<>(
entry.getTrueInterval(),
entry.getTrueInterval(),
entry.getVersion(),
new PartitionHolder<>(entry.getPartitionHolder())
@ -586,10 +587,11 @@ public class VersionedIntervalTimeline<VersionType, ObjectType> implements Timel
if (timelineInterval.overlaps(interval)) {
retVal.add(
new TimelineObjectHolder<VersionType, ObjectType>(
new TimelineObjectHolder<>(
timelineInterval,
val.getTrueInterval(),
val.getVersion(),
new PartitionHolder<ObjectType>(val.getPartitionHolder())
new PartitionHolder<>(val.getPartitionHolder())
)
);
}
@ -604,8 +606,9 @@ public class VersionedIntervalTimeline<VersionType, ObjectType> implements Timel
.isAfter(firstEntry.getInterval().getStart())) {
retVal.set(
0,
new TimelineObjectHolder<VersionType, ObjectType>(
new TimelineObjectHolder<>(
new Interval(interval.getStart(), firstEntry.getInterval().getEnd()),
firstEntry.getTrueInterval(),
firstEntry.getVersion(),
firstEntry.getObject()
)
@ -616,8 +619,9 @@ public class VersionedIntervalTimeline<VersionType, ObjectType> implements Timel
if (interval.overlaps(lastEntry.getInterval()) && interval.getEnd().isBefore(lastEntry.getInterval().getEnd())) {
retVal.set(
retVal.size() - 1,
new TimelineObjectHolder<VersionType, ObjectType>(
new TimelineObjectHolder<>(
new Interval(lastEntry.getInterval().getStart(), interval.getEnd()),
lastEntry.getTrueInterval(),
lastEntry.getVersion(),
lastEntry.getObject()
)

View File

@ -0,0 +1,56 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
FROM maven:3-jdk-8 as builder
COPY . /src
WORKDIR /src
RUN mvn install -ff -DskipTests -Dforbiddenapis.skip=true -Pdist -Pbundle-contrib-exts
RUN \
VER=$(mvn -B org.apache.maven.plugins:maven-help-plugin:3.1.1:evaluate -Dexpression=project.version -q -DforceStdout=true -f pom.xml 2>/dev/null) \
&& tar -zxf ./distribution/target/apache-druid-${VER}-bin.tar.gz -C /opt \
&& ln -s /opt/apache-druid-${VER} /opt/druid
RUN wget -O /opt/druid/extensions/mysql-metadata-storage/mysql-connector-java-5.1.38.jar http://central.maven.org/maven2/mysql/mysql-connector-java/5.1.38/mysql-connector-java-5.1.38.jar \
&& sha256sum --ignore-missing -c /src/distribution/docker/sha256sums.txt \
&& ln -s /opt/druid/extensions/mysql-metadata-storage/mysql-connector-java-5.1.38.jar /opt/druid/lib
RUN addgroup --gid 1000 druid \
&& adduser --home /opt/druid --shell /bin/sh --no-create-home --uid 1000 --gecos '' --gid 1000 --disabled-password druid \
&& mkdir -p /opt/druid/var \
&& chown -R druid:druid /opt/druid \
&& chmod 775 /opt/druid/var
FROM amd64/busybox:1.30.0-glibc as busybox
FROM gcr.io/distroless/java
LABEL maintainer="Don Bowman <don@agilicus.com>"
COPY --from=busybox /bin/busybox /busybox/busybox
RUN ["/busybox/busybox", "--install", "/bin"]
COPY --from=builder /etc/passwd /etc/passwd
COPY --from=builder /etc/group /etc/group
COPY --from=builder --chown=druid /opt /opt
COPY distribution/docker/druid.sh /druid.sh
RUN chown -R druid:druid /opt/druid
USER druid
VOLUME /opt/druid/var
WORKDIR /opt/druid
ENTRYPOINT ["/druid.sh"]

View File

@ -0,0 +1,26 @@
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
## Build
From the root of the repo, run `docker build -t druid:tag -f distribution/docker/Dockerfile .`
## Run
Edit `environment` to suite. Run 'docker-compose -f distribution/docker/docker-compose.yml up`

View File

@ -0,0 +1,124 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
version: "2.2"
volumes:
metadata_data: {}
middle_var: {}
historical_var: {}
broker_var: {}
coordinator_var: {}
overlord_var: {}
services:
postgres:
container_name: postgres
image: postgres:latest
volumes:
- metadata_data:/var/lib/postgresql/data
environment:
- POSTGRES_PASSWORD=FoolishPassword
- POSTGRES_USER=druid
- POSTGRES_DB=druid
# Need 3.5 or later for container nodes
zookeeper:
container_name: zookeeper
image: zookeeper:3.5
environment:
- ZOO_MY_ID=1
coordinator:
image: druid
container_name: coordinator
volumes:
- coordinator_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
ports:
- "3001:8081"
command:
- coordinator
env_file:
- environment
broker:
image: druid
container_name: broker
volumes:
- broker_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "3002:8082"
command:
- broker
env_file:
- environment
historical:
image: druid
container_name: historical
volumes:
- historical_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "3003:8083"
command:
- historical
env_file:
- environment
overlord:
image: druid
container_name: overlord
volumes:
- overlord_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
ports:
- "4000:8090"
command:
- overlord
env_file:
- environment
middlemanager:
image: druid
container_name: middlemanager
volumes:
- middle_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "4001:8091"
command:
- middleManager
env_file:
- environment

137
distribution/docker/druid.sh Executable file
View File

@ -0,0 +1,137 @@
#!/bin/sh
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# NOTE: this is a 'run' script for the stock tarball
# It takes 1 required argument (the name of the service,
# e.g. 'broker', 'historical' etc). Any additional arguments
# are passed to that service.
#
# It accepts 'JAVA_OPTS' as an environment variable
#
# Additional env vars:
# - DRUID_LOG4J -- set the entire log4j.xml verbatim
# - DRUID_LOG_LEVEL -- override the default log level in default log4j
# - DRUID_XMX -- set Java Xmx
# - DRUID_XMS -- set Java Xms
# - DRUID_MAXNEWSIZE -- set Java max new size
# - DRUID_NEWSIZE -- set Java new size
# - DRUID_MAXDIRECTMEMORYSIZE -- set Java max direct memory size
#
# - DRUID_CONFIG -- full path to a file for druid 'common' properties
# - DRUID_CONFIG_${service} -- full path to a file for druid 'service' properties
set -e
SERVICE="$1"
echo "$(date -Is) startup service $SERVICE"
# We put all the config in /tmp/conf to allow for a
# read-only root filesystem
cp -r /opt/druid/conf /tmp/conf
# Delete the old key (if existing) and append new key=value
setKey() {
service="$1"
key="$2"
value="$3"
case "$service" in
_common)
fname=common.runtime.properties ;;
*)
fname=runtime.properties ;;
esac
# Delete from all
sed -ri "/$key=/d" /tmp/conf/druid/_common/common.runtime.properties
[ -f /tmp/conf/druid/$service/$fname ] && sed -ri "/$key=/d" /tmp/conf/druid/$service/$fname
[ -f /tmp/conf/druid/$service/$fname ] && echo "$key=$value" >> /tmp/conf/druid/$service/$fname
[ -f /tmp/conf/druid/$service/$fname ] || echo "$key=$value" >> /tmp/conf/druid/_common/$fname
}
setJavaKey() {
service="$1"
key=$2
value=$3
file=/tmp/conf/druid/$service/jvm.config
sed -ri "/$key/d" $file
echo $value >> $file
}
## Setup host names
if [ -n "${ZOOKEEPER}" ]
then
setKey _common druid.zk.service.host "${ZOOKEEPER}"
fi
setKey $SERVICE druid.host $(ip r get 1 | awk '{print $7;exit}')
env |grep ^druid_ | while read evar
do
# Can't use IFS='=' to parse since var might have = in it (e.g. password)
val=$(echo "$evar" | sed -e 's?[^=]*=??')
var=$(echo "$evar" | sed -e 's?^\([^=]*\)=.*?\1?g' -e 's?_?.?g')
setKey $SERVICE "$var" "$val"
done
env |grep ^s3service | while read evar
do
val=$(echo "$evar" | sed -e 's?[^=]*=??')
var=$(echo "$evar" | sed -e 's?^\([^=]*\)=.*?\1?g' -e 's?_?.?' -e 's?_?-?g')
echo "$var=$val" >> /tmp/conf/druid/_common/jets3t.properties
done
# This is to allow configuration via a Kubernetes configMap without
# e.g. using subPath (you can also mount the configMap on /tmp/conf/druid)
if [ -n "$DRUID_CONFIG_COMMON" ]
then
cp -f "$DRUID_CONFIG_COMMON" /tmp/conf/druid/_common/common.runtime.properties
fi
SCONFIG=$(printf "%s_%s" DRUID_CONFIG ${SERVICE})
SCONFIG=$(eval echo \$$(echo $SCONFIG))
if [ -n "${SCONFIG}" ]
then
cp -f "${SCONFIG}" /tmp/conf/druid/${SERVICE}/runtime.properties
fi
# Now do the java options
if [ -n "$DRUID_XMX" ]; then setJavaKey ${SERVICE} -Xmx -Xmx${DRUID_XMX}; fi
if [ -n "$DRUID_XMS" ]; then setJavaKey ${SERVICE} -Xms -Xms${DRUID_XMS}; fi
if [ -n "$DRUID_MAXNEWSIZE" ]; then setJavaKey ${SERVICE} -XX:MaxNewSize -XX:MaxNewSize=${DRUID_MAXNEWSIZE}; fi
if [ -n "$DRUID_NEWSIZE" ]; then setJavaKey ${SERVICE} -XX:NewSize -XX:MaxNewSize=${DRUID_NEWSIZE}; fi
if [ -n "$DRUID_MAXDIRECTMEMORYSIZE" ]; then setJavaKey ${SERVICE} -XX:MaxDirectMemorySize -XX:MaxDirectMemorySize=${DRUID_MAXDIRECTMEMORYSIZE}; fi
JAVA_OPTS="$JAVA_OPTS $(cat /tmp/conf/druid/${SERVICE}/jvm.config | xargs)"
if [ -n "$DRUID_LOG_LEVEL" ]
then
sed -ri 's/"info"/"'$DRUID_LOG_LEVEL'"/g' /tmp/conf/druid/_common/log4j2.xml
fi
if [ -n "$DRUID_LOG4J" ]
then
echo "$DRUID_LOG4J" > /tmp/conf/druid/_common/log4j2.xml
fi
mkdir -p var/tmp var/druid/segments var/druid/indexing-logs var/druid/task var/druid/hadoop-tmp var/druid/segment-cache
exec java ${JAVA_OPTS} -cp /tmp/conf/druid/_common:/tmp/conf/druid/${SERVICE}:lib/*: org.apache.druid.cli.Main server $@

View File

@ -0,0 +1,51 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Java tuning
DRUID_XMX=1g
DRUID_XMS=1g
DRUID_MAXNEWSIZE=250m
DRUID_NEWSIZE=250m
DRUID_MAXDIRECTMEMORYSIZE=6172m
druid_emitter_logging_logLevel=debug
druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "druid-azure-extensions", "postgresql-metadata-storage"]
druid_zk_service_host=zookeeper
druid_metadata_storage_host=
druid_metadata_storage_type=postgresql
druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
druid_metadata_storage_connector_user=druid
druid_metadata_storage_connector_password=FoolishPassword
druid_coordinator_balancer_strategy=cachingCost
druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=268435456
druid_storage_type=azure
druid_azure_account=YOURACCOUNT
druid_azure_key=YOURKEY
druid_azure_container=druid
druid_azure_protocol=https
druid_azure_maxTries=3
DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>

View File

@ -0,0 +1,19 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
b95bf9fe25cb5428f378a62fc842e177ca004b4ae1f9054968b2a396dcc1ec22 /opt/druid/extensions/mysql-metadata-storage/mysql-connector-java-5.1.38.jar

View File

@ -31,7 +31,7 @@
<parent>
<artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
</parent>
<dependencies>

View File

@ -112,6 +112,13 @@
</includes>
<outputDirectory>quickstart/tutorial/conf/druid/middleManager</outputDirectory>
</fileSet>
<fileSet>
<directory>../examples/quickstart/tutorial/conf/druid/router</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>quickstart/tutorial/conf/druid/router</outputDirectory>
</fileSet>
<fileSet>
<directory>../examples/quickstart/tutorial/conf/tranquility</directory>
<includes>
@ -199,6 +206,13 @@
</includes>
<outputDirectory>conf/druid/middleManager</outputDirectory>
</fileSet>
<fileSet>
<directory>../examples/conf/druid/router</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>conf/druid/router</outputDirectory>
</fileSet>
<fileSet>
<directory>../examples/conf/tranquility</directory>
<includes>

View File

@ -45,6 +45,7 @@
<exclude>%regex[(?!((?!${project.build.directory}/)[^/]+/)*src/)(.*/)?release\.properties]</exclude>
<exclude>.gitignore</exclude>
<exclude>.dockerignore</exclude>
<exclude>.travis.yml</exclude>
<exclude>publications/**</exclude>
<exclude>upload.sh</exclude>

View File

@ -0,0 +1,120 @@
---
layout: doc_page
title: "Moment Sketches for Approximate Quantiles module"
---
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
# MomentSketch Quantiles Sketch module
This module provides Druid aggregators for approximate quantile queries using the [momentsketch](https://github.com/stanford-futuredata/momentsketch) library.
The momentsketch provides coarse quantile estimates with less space and aggregation time overheads than traditional sketches, approaching the performance of counts and sums by reconstructing distributions from computed statistics.
To use this aggregator, make sure you [include](../../operations/including-extensions.html) the extension in your config file:
```
druid.extensions.loadList=["druid-momentsketch"]
```
### Aggregator
The result of the aggregation is a momentsketch that is the union of all sketches either built from raw data or read from the segments.
The `momentSketch` aggregator operates over raw data while the `momentSketchMerge` aggregator should be used when aggregating pre-computed sketches.
```json
{
"type" : <aggregator_type>,
"name" : <output_name>,
"fieldName" : <input_name>,
"k" : <int>,
"compress" : <boolean>
}
```
|property|description|required?|
|--------|-----------|---------|
|type|Type of aggregator desired. Either "momentSketch" or "momentSketchMerge" |yes|
|name|A String for the output (result) name of the calculation.|yes|
|fieldName|A String for the name of the input field (can contain sketches or raw numeric values).|yes|
|k|Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Usable range is generally [3,15] |no, defaults to 13.|
|compress|Flag for whether the aggregator compresses numeric values using arcsinh. Can improve robustness to skewed and long-tailed distributions, but reduces accuracy slightly on more uniform distributions.| no, defaults to true
### Post Aggregators
Users can query for a set of quantiles using the `momentSketchSolveQuantiles` post-aggregator on the sketches created by the `momentSketch` or `momentSketchMerge` aggregators.
```json
{
"type" : "momentSketchSolveQuantiles",
"name" : <output_name>,
"field" : <reference to moment sketch>,
"fractions" : <array of doubles in [0,1]>
}
```
Users can also query for the min/max of a distribution:
```json
{
"type" : "momentSketchMin" | "momentSketchMax",
"name" : <output_name>,
"field" : <reference to moment sketch>,
}
```
### Example
As an example of a query with sketches pre-aggregated at ingestion time, one could set up the following aggregator at ingest:
```json
{
"type": "momentSketch",
"name": "sketch",
"fieldName": "value",
"k": 10,
"compress": true,
}
```
and make queries using the following aggregator + post-aggregator:
```json
{
"aggregations": [{
"type": "momentSketchMerge",
"name": "sketch",
"fieldName": "sketch",
"k": 10,
"compress": true
}],
"postAggregations": [
{
"type": "momentSketchSolveQuantiles",
"name": "quantiles",
"fractions": [0.1, 0.5, 0.9],
"field": {
"type": "fieldAccess",
"fieldName": "sketch"
}
},
{
"type": "momentSketchMin",
"name": "min",
"field": {
"type": "fieldAccess",
"fieldName": "sketch"
}
}]
}
```

View File

@ -201,7 +201,6 @@ For Roaring bitmaps:
|`completionTimeout`|ISO8601 Period|The length of time to wait before declaring a publishing task as failed and terminating it. If this is set too low, your tasks may never publish. The publishing clock for a task begins roughly after `taskDuration` elapses.|no (default == PT30M)|
|`lateMessageRejectionPeriod`|ISO8601 Period|Configure tasks to reject messages with timestamps earlier than this period before the task was created; for example if this is set to `PT1H` and the supervisor creates a task at *2016-01-01T12:00Z*, messages with timestamps earlier than *2016-01-01T11:00Z* will be dropped. This may help prevent concurrency issues if your data stream has late messages and you have multiple pipelines that need to operate on the same segments (e.g. a realtime and a nightly batch ingestion pipeline).|no (default == none)|
|`earlyMessageRejectionPeriod`|ISO8601 Period|Configure tasks to reject messages with timestamps later than this period after the task reached its taskDuration; for example if this is set to `PT1H`, the taskDuration is set to `PT1H` and the supervisor creates a task at *2016-01-01T12:00Z*, messages with timestamps later than *2016-01-01T14:00Z* will be dropped. **Note:** Tasks sometimes run past their task duration, for example, in cases of supervisor failover. Setting earlyMessageRejectionPeriod too low may cause messages to be dropped unexpectedly whenever a task runs past its originally configured task duration.|no (default == none)|
|`skipOffsetGaps`|Boolean|Whether or not to allow gaps of missing offsets in the Kafka stream. This is required for compatibility with implementations such as MapR Streams which does not guarantee consecutive offsets. If this is false, an exception will be thrown if offsets are not consecutive.|no (default == false)|
## Operations

View File

@ -48,7 +48,7 @@ Core extensions are maintained by Druid committers.
|druid-datasketches|Support for approximate counts and set operations with [DataSketches](http://datasketches.github.io/).|[link](../development/extensions-core/datasketches-extension.html)|
|druid-hdfs-storage|HDFS deep storage.|[link](../development/extensions-core/hdfs.html)|
|druid-histogram|Approximate histograms and quantiles aggregator.|[link](../development/extensions-core/approximate-histograms.html)|
|druid-kafka-eight|Kafka ingest firehose (high level consumer) for realtime nodes.|[link](../development/extensions-core/kafka-eight-firehose.html)|
|druid-kafka-eight|Kafka ingest firehose (high level consumer) for realtime nodes(deprecated).|[link](../development/extensions-core/kafka-eight-firehose.html)|
|druid-kafka-extraction-namespace|Kafka-based namespaced lookup. Requires namespace lookup extension.|[link](../development/extensions-core/kafka-extraction-namespace.html)|
|druid-kafka-indexing-service|Supervised exactly-once Kafka ingestion for the indexing service.|[link](../development/extensions-core/kafka-ingestion.html)|
|druid-kinesis-indexing-service|Supervised exactly-once Kinesis ingestion for the indexing service.|[link](../development/extensions-core/kinesis-ingestion.html)|
@ -81,7 +81,7 @@ All of these community extensions can be downloaded using *pull-deps* with the c
|druid-cassandra-storage|Apache Cassandra deep storage.|[link](../development/extensions-contrib/cassandra.html)|
|druid-cloudfiles-extensions|Rackspace Cloudfiles deep storage and firehose.|[link](../development/extensions-contrib/cloudfiles.html)|
|druid-distinctcount|DistinctCount aggregator|[link](../development/extensions-contrib/distinctcount.html)|
|druid-kafka-eight-simpleConsumer|Kafka ingest firehose (low level consumer).|[link](../development/extensions-contrib/kafka-simple.html)|
|druid-kafka-eight-simpleConsumer|Kafka ingest firehose (low level consumer)(deprecated).|[link](../development/extensions-contrib/kafka-simple.html)|
|druid-orc-extensions|Support for data in Apache Orc data format.|[link](../development/extensions-contrib/orc.html)|
|druid-rabbitmq|RabbitMQ firehose.|[link](../development/extensions-contrib/rabbitmq.html)|
|druid-redis-cache|A cache implementation for Druid based on Redis.|[link](../development/extensions-contrib/redis-cache.html)|

View File

@ -110,7 +110,10 @@ A sample ingest firehose spec is shown below -
#### SqlFirehose
SqlFirehoseFactory can be used to ingest events residing in RDBMS. The database connection information is provided as part of the ingestion spec. For each query, the results are fetched locally and indexed. If there are multiple queries from which data needs to be indexed, queries are prefetched in the background upto `maxFetchCapacityBytes` bytes.
An example is shown below:
Requires one of the following extensions:
* [MySQL Metadata Store](../ingestion/mysql.html).
* [PostgreSQL Metadata Store](../ingestion/postgresql.html).
```json
{
@ -118,20 +121,19 @@ An example is shown below:
"database": {
"type": "mysql",
"connectorConfig" : {
"connectURI" : "jdbc:mysql://host:port/schema",
"user" : "user",
"password" : "password"
"connectURI" : "jdbc:mysql://host:port/schema",
"user" : "user",
"password" : "password"
}
},
"sqls" : ["SELECT * FROM table1", "SELECT * FROM table2"]
}
```
|property|description|default|required?|
|--------|-----------|-------|---------|
|type|This should be "sql".||Yes|
|database|Specifies the database connection details.`type` should specify the database type and `connectorConfig` should specify the database connection properties via `connectURI`, `user` and `password`||Yes|
|database|Specifies the database connection details.||Yes|
|maxCacheCapacityBytes|Maximum size of the cache space in bytes. 0 means disabling cache. Cached files are not removed until the ingestion task completes.|1073741824|No|
|maxFetchCapacityBytes|Maximum size of the fetch space in bytes. 0 means disabling prefetch. Prefetched files are removed immediately once they are read.|1073741824|No|
|prefetchTriggerBytes|Threshold to trigger prefetching SQL result objects.|maxFetchCapacityBytes / 2|No|
@ -139,6 +141,14 @@ An example is shown below:
|foldCase|Toggle case folding of database column names. This may be enabled in cases where the database returns case insensitive column names in query results.|false|No|
|sqls|List of SQL queries where each SQL query would retrieve the data to be indexed.||Yes|
#### Database
|property|description|default|required?|
|--------|-----------|-------|---------|
|type|The type of database to query. Valid values are `mysql` and `postgresql`_||Yes|
|connectorConfig|specify the database connection properties via `connectURI`, `user` and `password`||Yes|
### CombiningFirehose
This firehose can be used to combine and merge data from a list of different firehoses.

View File

@ -0,0 +1,43 @@
---
layout: doc_page
title: "Hadoop-based Batch Ingestion VS Native Batch Ingestion"
---
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
# Comparison of Batch Ingestion Methods
Druid basically supports three types of batch ingestion: Hadoop-based
batch ingestion, native parallel batch ingestion, and native local batch
ingestion. The below table shows what features are supported by each
ingestion method.
| |Hadoop-based ingestion|Native parallel ingestion|Native local ingestion|
|---|----------------------|-------------------------|----------------------|
| Parallel indexing | Always parallel | Parallel if firehose is splittable | Always sequential |
| Supported indexing modes | Replacing mode | Both appending and replacing modes | Both appending and replacing modes |
| External dependency | Hadoop (it internally submits Hadoop jobs) | No dependency | No dependency |
| Supported [rollup modes](http://druid.io/docs/latest/ingestion/index.html#roll-up-modes) | Perfect rollup | Best-effort rollup | Both perfect and best-effort rollup |
| Supported partitioning methods | [Both Hash-based and range partitioning](http://druid.io/docs/latest/ingestion/hadoop.html#partitioning-specification) | N/A | Hash-based partitioning (when `forceGuaranteedRollup` = true) |
| Supported input locations | All locations accessible via HDFS client or Druid dataSource | All implemented [firehoses](./firehose.html) | All implemented [firehoses](./firehose.html) |
| Supported file formats | All implemented Hadoop InputFormats | Currently only text file format (CSV, TSV, JSON) | Currently only text file format (CSV, TSV, JSON) |
| Saving parse exceptions in ingestion report | Currently not supported | Currently not supported | Supported |
| Custom segment version | Supported, but this is NOT recommended | N/A | N/A |

View File

@ -25,7 +25,9 @@ title: "Hadoop-based Batch Ingestion"
# Hadoop-based Batch Ingestion
Hadoop-based batch ingestion in Druid is supported via a Hadoop-ingestion task. These tasks can be posted to a running
instance of a Druid [Overlord](../design/overlord.html).
instance of a Druid [Overlord](../design/overlord.html).
Please check [Hadoop-based Batch Ingestion VS Native Batch Ingestion](./hadoop-vs-native-batch.html) for differences between native batch ingestion and Hadoop-based ingestion.
## Command Line Hadoop Indexer

View File

@ -178,7 +178,7 @@ the best one for your situation.
|Method|How it works|Can append and overwrite?|Can handle late data?|Exactly-once ingestion?|Real-time queries?|
|------|------------|-------------------------|---------------------|-----------------------|------------------|
|[Native batch](native_tasks.html)|Druid loads data directly from S3, HTTP, NFS, or other networked storage.|Append or overwrite|Yes|Yes|No|
|[Hadoop](hadoop.html)|Druid launches Hadoop Map/Reduce jobs to load data files.|Append or overwrite|Yes|Yes|No|
|[Hadoop](hadoop.html)|Druid launches Hadoop Map/Reduce jobs to load data files.|Overwrite|Yes|Yes|No|
|[Kafka indexing service](../development/extensions-core/kafka-ingestion.html)|Druid reads directly from Kafka.|Append only|Yes|Yes|Yes|
|[Tranquility](stream-push.html)|You use Tranquility, a client side library, to push individual records into Druid.|Append only|No - late data is dropped|No - may drop or duplicate data|Yes|
@ -191,7 +191,7 @@ a _time chunk_, and each time chunk contains one or more [segments](../design/se
particular time chunk may be partitioned further using options that vary based on the ingestion method you have chosen.
* With [Hadoop](hadoop.html) you can do hash- or range-based partitioning on one or more columns.
* With [Native batch](native_tasks.html) you can partition on a hash of all dimension columns. This is useful when
* With [Native batch](native_tasks.html) you can partition on a hash of dimension columns. This is useful when
rollup is enabled, since it maximizes your space savings.
* With [Kafka indexing](../development/extensions-core/kafka-ingestion.html), partitioning is based on Kafka
partitions, and is not configurable through Druid. You can configure it on the Kafka side by using the partitioning

View File

@ -28,6 +28,8 @@ Druid currently has two types of native batch indexing tasks, `index_parallel` w
in parallel on multiple MiddleManager nodes, and `index` which will run a single indexing task locally on a single
MiddleManager.
Please check [Hadoop-based Batch Ingestion VS Native Batch Ingestion](./hadoop-vs-native-batch.html) for differences between native batch ingestion and Hadoop-based ingestion.
Parallel Index Task
--------------------------------
@ -500,7 +502,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon
|indexSpec|defines segment storage format options to be used at indexing time, see [IndexSpec](#indexspec)|null|no|
|maxPendingPersists|Maximum number of persists that can be pending but not started. If this limit would be exceeded by a new intermediate persist, ingestion will block until the currently-running persist finishes. Maximum heap memory usage for indexing scales with maxRowsInMemory * (2 + maxPendingPersists).|0 (meaning one persist can be running concurrently with ingestion, and none can be queued up)|no|
|forceExtendableShardSpecs|Forces use of extendable shardSpecs. Experimental feature intended for use with the [Kafka indexing service extension](../development/extensions-core/kafka-ingestion.html).|false|no|
|forceGuaranteedRollup|Forces guaranteeing the [perfect rollup](../ingestion/index.html#roll-up-modes). The perfect rollup optimizes the total size of generated segments and querying time while indexing time will be increased. This flag cannot be used with either `appendToExisting` of IOConfig or `forceExtendableShardSpecs`. For more details, see the below __Segment pushing modes__ section.|false|no|
|forceGuaranteedRollup|Forces guaranteeing the [perfect rollup](../ingestion/index.html#roll-up-modes). The perfect rollup optimizes the total size of generated segments and querying time while indexing time will be increased. If this is set to true, the index task will read the entire input data twice: one for finding the optimal number of partitions per time chunk and one for generating segments. Note that the result segments would be hash-partitioned. You can set `forceExtendableShardSpecs` if you plan to append more data to the same time range in the future. This flag cannot be used with `appendToExisting` of IOConfig. For more details, see the below __Segment pushing modes__ section.|false|no|
|reportParseExceptions|DEPRECATED. If true, exceptions encountered during parsing will be thrown and will halt ingestion; if false, unparseable rows and fields will be skipped. Setting `reportParseExceptions` to true will override existing configurations for `maxParseExceptions` and `maxSavedParseExceptions`, setting `maxParseExceptions` to 0 and limiting `maxSavedParseExceptions` to no more than 1.|false|no|
|pushTimeout|Milliseconds to wait for pushing segments. It must be >= 0, where 0 means to wait forever.|0|no|
|segmentWriteOutMediumFactory|Segment write-out medium to use when creating segments. See [SegmentWriteOutMediumFactory](#segmentWriteOutMediumFactory).|Not specified, the value from `druid.peon.defaultSegmentWriteOutMediumFactory.type` is used|no|

View File

@ -41,6 +41,10 @@ See [batch ingestion](../ingestion/hadoop.html).
Druid provides a native index task which doesn't need any dependencies on other systems.
See [native index tasks](./native_tasks.html) for more details.
<div class="note info">
Please check [Hadoop-based Batch Ingestion VS Native Batch Ingestion](./hadoop-vs-native-batch.html) for differences between native batch ingestion and Hadoop-based ingestion.
</div>
### Kafka Indexing Tasks
Kafka Indexing tasks are automatically created by a Kafka Supervisor and are responsible for pulling data from Kafka streams. These tasks are not meant to be created/submitted directly by users. See [Kafka Indexing Service](../development/extensions-core/kafka-ingestion.html) for more details.

View File

@ -74,17 +74,15 @@ Returns the current leader Coordinator of the cluster.
* `/druid/coordinator/v1/isLeader`
Returns true if the Coordinator receiving the request is the current leader.
#### Segment Loading
##### GET
Returns a JSON object with field "leader", either true or false, indicating if this server is the current leader
Coordinator of the cluster. In addition, returns HTTP 200 if the server is the current leader and HTTP 404 if not.
This is suitable for use as a load balancer status check if you only want the active leader to be considered in-service
at the load balancer.
#### Segment Loading
##### GET
* `/druid/coordinator/v1/loadstatus`
Returns the percentage of segments actually loaded in the cluster versus segments that should be loaded in the cluster.
@ -145,14 +143,17 @@ Returns full segment metadata for a specific segment as stored in the metadata s
* `/druid/coordinator/v1/metadata/datasources/{dataSourceName}/segments`
Returns a list of all segments, overlapping with any of given intervals, for a datasource as stored in the metadata store. Request body is array of string intervals like [interval1, interval2,...] for example ["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"]
Returns a list of all segments, overlapping with any of given intervals, for a datasource as stored in the metadata store. Request body is array of string IS0 8601 intervals like [interval1, interval2,...] for example ["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"]
* `/druid/coordinator/v1/metadata/datasources/{dataSourceName}/segments?full`
Returns a list of all segments, overlapping with any of given intervals, for a datasource with the full segment metadata as stored in the metadata store. Request body is array of string intervals like [interval1, interval2,...] for example ["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"]
Returns a list of all segments, overlapping with any of given intervals, for a datasource with the full segment metadata as stored in the metadata store. Request body is array of string ISO 8601 intervals like [interval1, interval2,...] for example ["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"]
#### Datasources
Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/`
(e.g., 2016-06-27_2016-06-28).
##### GET
* `/druid/coordinator/v1/datasources`
@ -189,7 +190,7 @@ Returns a map of an interval to a map of segment metadata to a set of server nam
* `/druid/coordinator/v1/datasources/{dataSourceName}/intervals/{interval}`
Returns a set of segment ids for an ISO8601 interval. Note that {interval} parameters are delimited by a `_` instead of a `/` (e.g., 2016-06-27_2016-06-28).
Returns a set of segment ids for an interval.
* `/druid/coordinator/v1/datasources/{dataSourceName}/intervals/{interval}?simple`
@ -236,18 +237,19 @@ Enables a segment of a datasource.
Disables a datasource.
* `/druid/coordinator/v1/datasources/{dataSourceName}/intervals/{interval}`
* `@Deprecated. /druid/coordinator/v1/datasources/{dataSourceName}?kill=true&interval={myISO8601Interval}`
* `@Deprecated. /druid/coordinator/v1/datasources/{dataSourceName}?kill=true&interval={myInterval}`
Runs a [Kill task](../ingestion/tasks.html) for a given interval and datasource.
Note that {interval} parameters are delimited by a `_` instead of a `/` (e.g., 2016-06-27_2016-06-28).
* `/druid/coordinator/v1/datasources/{dataSourceName}/segments/{segmentId}`
Disables a segment.
#### Retention Rules
Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/`
(e.g., 2016-06-27_2016-06-28).
##### GET
* `/druid/coordinator/v1/rules`
@ -294,9 +296,10 @@ Optional Header Parameters for auditing the config change can also be specified.
#### Intervals
##### GET
Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/`
(e.g., 2016-06-27_2016-06-28).
Note that {interval} parameters are delimited by a `_` instead of a `/` (e.g., 2016-06-27_2016-06-28).
##### GET
* `/druid/coordinator/v1/intervals`
@ -340,6 +343,7 @@ will be set for them.
Creates or updates the compaction config for a dataSource. See [Compaction Configuration](../configuration/index.html#compaction-dynamic-configuration) for configuration details.
##### DELETE
* `/druid/coordinator/v1/config/compaction/{dataSource}`
@ -359,12 +363,12 @@ ports.
* `/druid/coordinator/v1/servers?simple`
Returns a list of server data objects in which each object has the following keys:
- `host`: host URL include (`{hostname}:{port}`)
- `type`: node type (`indexer-executor`, `historical`)
- `currSize`: storage size currently used
- `maxSize`: maximum storage size
- `priority`
- `tier`
* `host`: host URL include (`{hostname}:{port}`)
* `type`: node type (`indexer-executor`, `historical`)
* `currSize`: storage size currently used
* `maxSize`: maximum storage size
* `priority`
* `tier`
### Overlord
@ -384,8 +388,44 @@ only want the active leader to be considered in-service at the load balancer.
#### Tasks<a name="overlord-tasks"></a>
Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/`
(e.g., 2016-06-27_2016-06-28).
##### GET
* `/druid/indexer/v1/tasks`
Retrieve list of tasks. Accepts query string parameters `state`, `datasource`, `createdTimeInterval`, `max`, and `type`.
|Query Parameter |Description |
|---|---|
|`state`|filter list of tasks by task state, valid options are `running`, `complete`, `waiting`, and `pending`.|
| `datasource`| return tasks filtered by Druid datasource.|
| `createdTimeInterval`| return tasks created within the specified interval. |
| `max`| maximum number of `"complete"` tasks to return. Only applies when `state` is set to `"complete"`.|
| `type`| filter tasks by task type. See [task documentation](../ingestion/tasks.html) for more details.|
* `/druid/indexer/v1/completeTasks`
Retrieve list of complete tasks. Equivalent to `/druid/indexer/v1/tasks?state=complete`.
* `/druid/indexer/v1/runningTasks`
Retrieve list of running tasks. Equivalent to `/druid/indexer/v1/tasks?state=running`.
* `/druid/indexer/v1/waitingTasks`
Retrieve list of waiting tasks. Equivalent to `/druid/indexer/v1/tasks?state=waiting`.
* `/druid/indexer/v1/pendingTasks`
Retrieve list of pending tasks. Equivalent to `/druid/indexer/v1/tasks?state=pending`.
* `/druid/indexer/v1/task/{taskId}`
Retrieve the 'payload' of a task.
* `/druid/indexer/v1/task/{taskId}/status`
Retrieve the status of a task.
@ -408,14 +448,27 @@ Retrieve a [task completion report](../ingestion/reports.html) for a task. Only
Endpoint for submitting tasks and supervisor specs to the Overlord. Returns the taskId of the submitted task.
* `druid/indexer/v1/task/{taskId}/shutdown`
* `/druid/indexer/v1/task/{taskId}/shutdown`
Shuts down a task.
* `druid/indexer/v1/datasources/{dataSource}/shutdownAllTasks`
* `/druid/indexer/v1/datasources/{dataSource}/shutdownAllTasks`
Shuts down all tasks for a dataSource.
* `/druid/indexer/v1/taskStatus`
Retrieve list of task status objects for list of task id strings in request body.
##### DELETE
* `/druid/indexer/v1/pendingSegments/{dataSource}`
Manually clean up pending segments table in metadata storage for `datasource`. Returns a JSON object response with
`numDeleted` and count of rows deleted from the pending segments table. This API is used by the
`druid.coordinator.kill.pendingSegments.on` [coordinator setting](../configuration/index.html#coordinator-operation)
which automates this operation to perform periodically.
#### Supervisors
##### GET
@ -492,13 +545,94 @@ This API is deprecated and will be removed in future releases.
Please use the equivalent 'terminate' instead.
</div>
#### Dynamic Configuration
See [Overlord Dynamic Configuration](../configuration/index.html#overlord-dynamic-configuration) for details.
Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/`
(e.g., 2016-06-27_2016-06-28).
##### GET
* `/druid/indexer/v1/worker`
Retreives current overlord dynamic configuration.
* `/druid/indexer/v1/worker/history?interval={interval}&counter={count}`
Retrieves history of changes to overlord dynamic configuration. Accepts `interval` and `count` query string parameters
to filter by interval and limit the number of results respectively.
* `/druid/indexer/v1/scaling`
Retrieves overlord scaling events if auto-scaling runners are in use.
##### POST
* /druid/indexer/v1/worker
Update overlord dynamic worker configuration.
## Data Server
This section documents the API endpoints for the processes that reside on Data servers (MiddleManagers/Peons and Historicals) in the suggested [three-server configuration](../design/processes.html#server-types).
This section documents the API endpoints for the processes that reside on Data servers (MiddleManagers/Peons and Historicals)
in the suggested [three-server configuration](../design/processes.html#server-types).
### MiddleManager
The MiddleManager does not have any API endpoints beyond the [common endpoints](#common).
##### GET
* `/druid/worker/v1/enabled`
Check whether a MiddleManager is in an enabled or disabled state. Returns JSON object keyed by the combined `druid.host`
and `druid.port` with the boolean state as the value.
```json
{"localhost:8091":true}
```
* `/druid/worker/v1/tasks`
Retrieve a list of active tasks being run on MiddleManager. Returns JSON list of taskid strings. Normal usage should
prefer to use the `/druid/indexer/v1/tasks` [Overlord API](#overlord) or one of it's task state specific variants instead.
```json
["index_wikiticker_2019-02-11T02:20:15.316Z"]
```
* `/druid/worker/v1/task/{taskid}/log`
Retrieve task log output stream by task id. Normal usage should prefer to use the `/druid/indexer/v1/task/{taskId}/log`
[Overlord API](#overlord) instead.
##### POST
* `/druid/worker/v1/disable`
'Disable' a MiddleManager, causing it to stop accepting new tasks but complete all existing tasks. Returns JSON object
keyed by the combined `druid.host` and `druid.port`:
```json
{"localhost:8091":"disabled"}
```
* `/druid/worker/v1/enable`
'Enable' a MiddleManager, allowing it to accept new tasks again if it was previously disabled. Returns JSON object
keyed by the combined `druid.host` and `druid.port`:
```json
{"localhost:8091":"enabled"}
```
* `/druid/worker/v1/task/{taskid}/shutdown`
Shutdown a running task by `taskid`. Normal usage should prefer to use the `/druid/indexer/v1/task/{taskId}/shutdown`
[Overlord API](#overlord) instead. Returns JSON:
```json
{"task":"index_kafka_wikiticker_f7011f8ffba384b_fpeclode"}
```
### Peon
@ -538,6 +672,9 @@ This section documents the API endpoints for the processes that reside on Query
#### Datasource Information
Note that all _interval_ URL parameters are ISO 8601 strings delimited by a `_` instead of a `/`
(e.g., 2016-06-27_2016-06-28).
##### GET
* `/druid/v2/datasources`
@ -548,7 +685,7 @@ Returns a list of queryable datasources.
Returns the dimensions and metrics of the datasource. Optionally, you can provide request parameter "full" to get list of served intervals with dimensions and metrics being served for those intervals. You can also provide request param "interval" explicitly to refer to a particular interval.
If no interval is specified, a default interval spanning a configurable period before the current time will be used. The duration of this interval is specified in ISO8601 format via:
If no interval is specified, a default interval spanning a configurable period before the current time will be used. The default duration of this interval is specified in ISO 8601 duration format via:
druid.query.segmentMetadata.defaultHistory
@ -557,7 +694,7 @@ druid.query.segmentMetadata.defaultHistory
Returns the dimensions of the datasource.
<div class="note caution">
This API is deprecated and will be removed in future releases. Please use [SegmentMetadataQuery](../querying/segmentmetadataquery.html) instead
This API is deprecated and will be removed in future releases. Please use <a href="../querying/segmentmetadataquery.html">SegmentMetadataQuery</a> instead
which provides more comprehensive information and supports all dataSource types including streaming dataSources. It's also encouraged to use [INFORMATION_SCHEMA tables](../querying/sql.html#retrieving-metadata)
if you're using SQL.
</div>
@ -567,12 +704,12 @@ if you're using SQL.
Returns the metrics of the datasource.
<div class="note caution">
This API is deprecated and will be removed in future releases. Please use [SegmentMetadataQuery](../querying/segmentmetadataquery.html) instead
This API is deprecated and will be removed in future releases. Please use <a href="../querying/segmentmetadataquery.html">SegmentMetadataQuery</a> instead
which provides more comprehensive information and supports all dataSource types including streaming dataSources. It's also encouraged to use [INFORMATION_SCHEMA tables](../querying/sql.html#retrieving-metadata)
if you're using SQL.
</div>
* `/druid/v2/datasources/{dataSourceName}/candidates?intervals={comma-separated-intervals-in-ISO8601-format}&numCandidates={numCandidates}`
* `/druid/v2/datasources/{dataSourceName}/candidates?intervals={comma-separated-intervals}&numCandidates={numCandidates}`
Returns segment information lists including server locations for the given datasource and intervals. If "numCandidates" is not specified, it will return all servers for each interval.

View File

@ -260,8 +260,11 @@ For example, a post to `/druid/coordinator/v1/lookups/config/realtime_customer1/
This will replace the `site_id_customer1` lookup in the `realtime_customer1` with the definition above.
## Get All Lookups
A `GET` to `/druid/coordinator/v1/lookups/config/all` will return all known lookup specs for all tiers.
## Get Lookup
A `GET` to a particular lookup extractor factory is accomplished via `/druid/coordinator/v1/lookups/{tier}/{id}`
A `GET` to a particular lookup extractor factory is accomplished via `/druid/coordinator/v1/lookups/config/{tier}/{id}`
Using the prior example, a `GET` to `/druid/coordinator/v1/lookups/config/realtime_customer2/site_id_customer2` should return

View File

@ -571,6 +571,8 @@ The "sys" schema provides visibility into Druid segments, servers and tasks.
### SEGMENTS table
Segments table provides details on all Druid segments, whether they are published yet or not.
#### CAVEAT
Note that a segment can be served by more than one stream ingestion tasks or Historical processes, in that case it would have multiple replicas. These replicas are weakly consistent with each other when served by multiple ingestion tasks, until a segment is eventually served by a Historical, at that point the segment is immutable. Broker prefers to query a segment from Historical over an ingestion task. But if a segment has multiple realtime replicas, for eg. kafka index tasks, and one task is slower than other, then the sys.segments query results can vary for the duration of the tasks because only one of the ingestion tasks is queried by the Broker and it is not gauranteed that the same task gets picked everytime. The `num_rows` column of segments table can have inconsistent values during this period. There is an open [issue](https://github.com/apache/incubator-druid/issues/5915) about this inconsistency with stream ingestion tasks.
|Column|Notes|
|------|-----|

View File

@ -29,7 +29,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
</parent>
<dependencies>

View File

@ -32,7 +32,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
</parent>
<dependencies>

View File

@ -25,7 +25,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -30,7 +30,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -29,7 +29,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -30,7 +30,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -30,7 +30,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -24,7 +24,7 @@
<parent>
<artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -29,7 +29,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -25,7 +25,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -30,7 +30,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -29,7 +29,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -28,6 +28,7 @@ import org.apache.druid.initialization.DruidModule;
import java.util.List;
@Deprecated
public class KafkaEightSimpleConsumerDruidModule implements DruidModule
{
@Override

View File

@ -46,6 +46,7 @@ import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
@Deprecated
public class KafkaEightSimpleConsumerFirehoseFactory implements
FirehoseFactoryV2<ByteBufferInputRowParser>
{

View File

@ -56,6 +56,7 @@ import java.util.concurrent.TimeUnit;
* This class is not thread safe, the caller must ensure all the methods be
* called from single thread
*/
@Deprecated
public class KafkaSimpleConsumer
{

View File

@ -25,7 +25,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -24,7 +24,7 @@
<parent>
<artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -270,13 +270,18 @@ public class MaterializedViewSupervisor implements Supervisor
void checkSegmentsAndSubmitTasks()
{
synchronized (taskLock) {
List<Interval> intervalsToRemove = new ArrayList<>();
for (Map.Entry<Interval, HadoopIndexTask> entry : runningTasks.entrySet()) {
Optional<TaskStatus> taskStatus = taskStorage.getStatus(entry.getValue().getId());
if (!taskStatus.isPresent() || !taskStatus.get().isRunnable()) {
runningTasks.remove(entry.getKey());
runningVersion.remove(entry.getKey());
intervalsToRemove.add(entry.getKey());
}
}
for (Interval interval : intervalsToRemove) {
runningTasks.remove(interval);
runningVersion.remove(interval);
}
if (runningTasks.size() == maxTaskCount) {
//if the number of running tasks reach the max task count, supervisor won't submit new tasks.
return;
@ -288,6 +293,12 @@ public class MaterializedViewSupervisor implements Supervisor
submitTasks(sortedToBuildVersion, baseSegments);
}
}
@VisibleForTesting
Pair<Map<Interval, HadoopIndexTask>, Map<Interval, String>> getRunningTasks()
{
return new Pair<>(runningTasks, runningVersion);
}
/**
* Find infomation about the intervals in which derived dataSource data should be rebuilt.

View File

@ -27,7 +27,11 @@ import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Sets;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.StringDimensionSchema;
import org.apache.druid.indexer.HadoopIOConfig;
import org.apache.druid.indexer.HadoopIngestionSpec;
import org.apache.druid.indexer.HadoopTuningConfig;
import org.apache.druid.indexer.TaskStatus;
import org.apache.druid.indexing.common.task.HadoopIndexTask;
import org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator;
import org.apache.druid.indexing.overlord.TaskMaster;
import org.apache.druid.indexing.overlord.TaskQueue;
@ -41,7 +45,9 @@ import org.apache.druid.metadata.TestDerbyConnector;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.realtime.firehose.ChatHandlerProvider;
import org.apache.druid.segment.transform.TransformSpec;
import org.apache.druid.server.security.AuthorizerMapper;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.HashBasedNumberedShardSpec;
@ -176,6 +182,83 @@ public class MaterializedViewSupervisorTest
Assert.assertEquals(expectedSegments, toBuildInterval.rhs);
}
@Test
public void testCheckSegmentsAndSubmitTasks() throws IOException
{
Set<DataSegment> baseSegments = Sets.newHashSet(
new DataSegment(
"base",
Intervals.of("2015-01-02T00Z/2015-01-03T00Z"),
"2015-01-03",
ImmutableMap.of(),
ImmutableList.of("dim1", "dim2"),
ImmutableList.of("m1"),
new HashBasedNumberedShardSpec(0, 1, null, null),
9,
1024
)
);
indexerMetadataStorageCoordinator.announceHistoricalSegments(baseSegments);
expect(taskMaster.getTaskQueue()).andReturn(Optional.of(taskQueue)).anyTimes();
expect(taskMaster.getTaskRunner()).andReturn(Optional.absent()).anyTimes();
expect(taskStorage.getActiveTasks()).andReturn(ImmutableList.of()).anyTimes();
expect(taskStorage.getStatus("test_task1")).andReturn(Optional.of(TaskStatus.failure("test_task1"))).anyTimes();
expect(taskStorage.getStatus("test_task2")).andReturn(Optional.of(TaskStatus.running("test_task2"))).anyTimes();
EasyMock.replay(taskStorage);
Pair<Map<Interval, HadoopIndexTask>, Map<Interval, String>> runningTasksPair = supervisor.getRunningTasks();
Map<Interval, HadoopIndexTask> runningTasks = runningTasksPair.lhs;
Map<Interval, String> runningVersion = runningTasksPair.rhs;
DataSchema dataSchema = new DataSchema(
"test_datasource",
null,
null,
null,
TransformSpec.NONE,
objectMapper
);
HadoopIOConfig hadoopIOConfig = new HadoopIOConfig(new HashMap<>(), null, null);
HadoopIngestionSpec spec = new HadoopIngestionSpec(dataSchema, hadoopIOConfig, null);
HadoopIndexTask task1 = new HadoopIndexTask(
"test_task1",
spec,
null,
null,
null,
objectMapper,
null,
null,
null
);
runningTasks.put(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), task1);
runningVersion.put(Intervals.of("2015-01-01T00Z/2015-01-02T00Z"), "test_version1");
HadoopIndexTask task2 = new HadoopIndexTask(
"test_task2",
spec,
null,
null,
null,
objectMapper,
null,
null,
null
);
runningTasks.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), task2);
runningVersion.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "test_version2");
supervisor.checkSegmentsAndSubmitTasks();
Map<Interval, HadoopIndexTask> expectedRunningTasks = new HashMap<>();
Map<Interval, String> expectedRunningVersion = new HashMap<>();
expectedRunningTasks.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), task2);
expectedRunningVersion.put(Intervals.of("2015-01-02T00Z/2015-01-03T00Z"), "test_version2");
Assert.assertEquals(expectedRunningTasks, runningTasks);
Assert.assertEquals(expectedRunningVersion, runningVersion);
}
@Test
public void testSuspendedDoesntRun()

View File

@ -24,7 +24,7 @@
<parent>
<artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -0,0 +1,94 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.druid.extensions.contrib</groupId>
<artifactId>druid-momentsketch</artifactId>
<name>druid-momentsketch</name>
<description>Aggregators for the approximate quantile moment sketch</description>
<dependencies>
<dependency>
<groupId>com.github.stanford-futuredata.momentsketch</groupId>
<artifactId>momentsketch-solver</artifactId>
<version>0.1.1</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-core</artifactId>
<version>${project.parent.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-processing</artifactId>
<version>${project.parent.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymock</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-core</artifactId>
<version>${project.parent.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-processing</artifactId>
<version>${project.parent.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.druid</groupId>
<artifactId>druid-server</artifactId>
<version>${project.parent.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchAggregatorFactory;
import org.apache.druid.segment.GenericColumnSerializer;
import org.apache.druid.segment.column.ColumnBuilder;
import org.apache.druid.segment.data.GenericIndexed;
import org.apache.druid.segment.data.ObjectStrategy;
import org.apache.druid.segment.serde.ComplexColumnPartSupplier;
import org.apache.druid.segment.serde.ComplexMetricExtractor;
import org.apache.druid.segment.serde.ComplexMetricSerde;
import org.apache.druid.segment.serde.LargeColumnSupportedComplexColumnSerializer;
import org.apache.druid.segment.writeout.SegmentWriteOutMedium;
import java.nio.ByteBuffer;
public class MomentSketchComplexMetricSerde extends ComplexMetricSerde
{
private static final MomentSketchObjectStrategy STRATEGY = new MomentSketchObjectStrategy();
@Override
public String getTypeName()
{
return MomentSketchAggregatorFactory.TYPE_NAME;
}
@Override
public ComplexMetricExtractor getExtractor()
{
return new ComplexMetricExtractor()
{
@Override
public Class<?> extractedClass()
{
return MomentSketchWrapper.class;
}
@Override
public Object extractValue(final InputRow inputRow, final String metricName)
{
return (MomentSketchWrapper) inputRow.getRaw(metricName);
}
};
}
@Override
public void deserializeColumn(ByteBuffer buffer, ColumnBuilder builder)
{
final GenericIndexed<MomentSketchWrapper> column = GenericIndexed.read(
buffer,
STRATEGY,
builder.getFileMapper()
);
builder.setComplexColumnSupplier(new ComplexColumnPartSupplier(getTypeName(), column));
}
@Override
public ObjectStrategy<MomentSketchWrapper> getObjectStrategy()
{
return STRATEGY;
}
@Override
public GenericColumnSerializer getSerializer(SegmentWriteOutMedium segmentWriteOutMedium, String column)
{
return LargeColumnSupportedComplexColumnSerializer.create(
segmentWriteOutMedium,
column,
this.getObjectStrategy()
);
}
}

View File

@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonSerializer;
import com.fasterxml.jackson.databind.SerializerProvider;
import java.io.IOException;
public class MomentSketchJsonSerializer extends JsonSerializer<MomentSketchWrapper>
{
@Override
public void serialize(
MomentSketchWrapper momentsSketch,
JsonGenerator jsonGenerator,
SerializerProvider serializerProvider
) throws IOException
{
jsonGenerator.writeBinary(momentsSketch.toByteArray());
}
}

View File

@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch;
import com.fasterxml.jackson.databind.Module;
import com.fasterxml.jackson.databind.jsontype.NamedType;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.inject.Binder;
import org.apache.druid.initialization.DruidModule;
import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchAggregatorFactory;
import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchMaxPostAggregator;
import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchMergeAggregatorFactory;
import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchMinPostAggregator;
import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchQuantilePostAggregator;
import org.apache.druid.segment.serde.ComplexMetrics;
import java.util.List;
/**
* Module defining aggregators for the moments approximate quantiles sketch
* @see MomentSketchAggregatorFactory
*/
public class MomentSketchModule implements DruidModule
{
@Override
public List<? extends Module> getJacksonModules()
{
return ImmutableList.of(
new SimpleModule(
getClass().getSimpleName()
).registerSubtypes(
new NamedType(
MomentSketchAggregatorFactory.class,
MomentSketchAggregatorFactory.TYPE_NAME
),
new NamedType(
MomentSketchMergeAggregatorFactory.class,
MomentSketchMergeAggregatorFactory.TYPE_NAME
),
new NamedType(
MomentSketchQuantilePostAggregator.class,
MomentSketchQuantilePostAggregator.TYPE_NAME
),
new NamedType(
MomentSketchMinPostAggregator.class,
MomentSketchMinPostAggregator.TYPE_NAME
),
new NamedType(
MomentSketchMaxPostAggregator.class,
MomentSketchMaxPostAggregator.TYPE_NAME
)
).addSerializer(MomentSketchWrapper.class, new MomentSketchJsonSerializer())
);
}
@Override
public void configure(Binder binder)
{
registerSerde();
}
@VisibleForTesting
public static void registerSerde()
{
if (ComplexMetrics.getSerdeForType(MomentSketchAggregatorFactory.TYPE_NAME) == null) {
ComplexMetrics.registerSerde(MomentSketchAggregatorFactory.TYPE_NAME, new MomentSketchComplexMetricSerde());
}
}
}

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch;
import org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchAggregatorFactory;
import org.apache.druid.segment.data.ObjectStrategy;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
public class MomentSketchObjectStrategy implements ObjectStrategy<MomentSketchWrapper>
{
private static final byte[] EMPTY_BYTES = new byte[0];
@Override
public Class<? extends MomentSketchWrapper> getClazz()
{
return MomentSketchWrapper.class;
}
@Override
public MomentSketchWrapper fromByteBuffer(ByteBuffer buffer, int numBytes)
{
if (numBytes == 0) {
return null;
}
buffer.limit(buffer.position() + numBytes);
return MomentSketchWrapper.fromBytes(buffer);
}
@Override
public byte[] toBytes(@Nullable MomentSketchWrapper val)
{
if (val == null) {
return EMPTY_BYTES;
}
return val.toByteArray();
}
@Override
public int compare(MomentSketchWrapper o1, MomentSketchWrapper o2)
{
return MomentSketchAggregatorFactory.COMPARATOR.compare(o1, o2);
}
}

View File

@ -0,0 +1,189 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch;
import com.github.stanfordfuturedata.momentsketch.MomentSolver;
import com.github.stanfordfuturedata.momentsketch.MomentStruct;
import java.nio.ByteBuffer;
/**
* Class for wrapping the operations of the moments sketch for use in
* the moment sketch aggregator
* {@link org.apache.druid.query.aggregation.momentsketch.aggregator.MomentSketchAggregatorFactory}.
*
* k controls the size and accuracy provided by the sketch.
* The sinh function is used to compress the range of data to allow for more robust results
* on skewed and long-tailed metrics, but slightly reducing accuracy on metrics with more uniform
* distributions.
*/
public class MomentSketchWrapper
{
// The MomentStruct object stores the relevant statistics about a metric distribution.
protected MomentStruct data;
// Whether we use arcsinh to compress the range
protected boolean useArcSinh = true;
public MomentSketchWrapper(int k)
{
data = new MomentStruct(k);
}
public MomentSketchWrapper(MomentStruct data)
{
this.data = data;
}
public void setCompressed(boolean flag)
{
useArcSinh = flag;
}
public boolean getCompressed()
{
return useArcSinh;
}
public int getK()
{
return data.power_sums.length;
}
public double[] getPowerSums()
{
return data.power_sums;
}
public double getMin()
{
if (useArcSinh) {
return Math.sinh(data.min);
} else {
return data.min;
}
}
public double getMax()
{
if (useArcSinh) {
return Math.sinh(data.max);
} else {
return data.max;
}
}
public void add(double rawX)
{
double x = rawX;
if (useArcSinh) {
// Since Java does not have a native arcsinh implementation we
// compute it manually using the following formula.
// This is the inverse operation of Math.sinh
x = Math.log(rawX + Math.sqrt(1 + rawX * rawX));
}
data.add(x);
}
public void merge(MomentSketchWrapper other)
{
data.merge(other.data);
}
public byte[] toByteArray()
{
ByteBuffer bb = ByteBuffer.allocate(2 * Integer.BYTES + (data.power_sums.length + 2) * Double.BYTES);
return toBytes(bb).array();
}
public MomentSolver getSolver()
{
MomentSolver ms = new MomentSolver(data);
return ms;
}
/**
* Estimates quantiles given the statistics in a moments sketch.
* @param fractions real values between [0,1] for which we want to estimate quantiles
*
* @return estimated quantiles.
*/
public double[] getQuantiles(double[] fractions)
{
// The solver attempts to construct a distribution estimate which matches the
// statistics tracked by the moments sketch. We can then read off quantile estimates
// from the reconstructed distribution.
// This operation can be relatively expensive (~1 ms) so we set the parameters from distribution
// reconstruction to conservative values.
MomentSolver ms = new MomentSolver(data);
// Constants here are chosen to yield maximum precision while keeping solve times ~1ms on 2Ghz cpu
// Grid size can be increased if longer solve times are acceptable
ms.setGridSize(1024);
ms.setMaxIter(15);
ms.solve();
double[] rawQuantiles = ms.getQuantiles(fractions);
for (int i = 0; i < fractions.length; i++) {
if (useArcSinh) {
rawQuantiles[i] = Math.sinh(rawQuantiles[i]);
}
}
return rawQuantiles;
}
public ByteBuffer toBytes(ByteBuffer bb)
{
int compressedInt = getCompressed() ? 1 : 0;
bb.putInt(data.power_sums.length);
bb.putInt(compressedInt);
bb.putDouble(data.min);
bb.putDouble(data.max);
for (double x : data.power_sums) {
bb.putDouble(x);
}
return bb;
}
public static MomentSketchWrapper fromBytes(ByteBuffer bb)
{
int k = bb.getInt();
int compressedInt = bb.getInt();
boolean compressed = (compressedInt > 0);
MomentStruct m = new MomentStruct(k);
m.min = bb.getDouble();
m.max = bb.getDouble();
for (int i = 0; i < k; i++) {
m.power_sums[i] = bb.getDouble();
}
MomentSketchWrapper mw = new MomentSketchWrapper(m);
mw.setCompressed(compressed);
return mw;
}
public static MomentSketchWrapper fromByteArray(byte[] input)
{
ByteBuffer bb = ByteBuffer.wrap(input);
return fromBytes(bb);
}
@Override
public String toString()
{
return data.toString();
}
}

View File

@ -0,0 +1,294 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch.aggregator;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException;
import org.apache.druid.query.aggregation.AggregatorUtil;
import org.apache.druid.query.aggregation.BufferAggregator;
import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ValueType;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
/**
* Aggregation operations over the moment-based quantile sketch
* available on <a href="https://github.com/stanford-futuredata/momentsketch">github</a> and described
* in the paper <a href="https://arxiv.org/abs/1803.01969">Moment-based quantile sketches</a>.
*
* This sketch stores a set of (k) statistics about univariate metrics that can be used to
* solve for approximate quantiles of the original distribution at query time after aggregating
* the statistics.
*/
public class MomentSketchAggregatorFactory extends AggregatorFactory
{
// Default number of moments (k) chosen for ~1% quantile error.
public static final int DEFAULT_K = 13;
// Safer to compress data with unknown ranges by default, but reduces accuracy on uniform data
public static final boolean DEFAULT_COMPRESS = true;
private final String name;
private final String fieldName;
// Number of moments tracked. Larger k allows for better estimates but greater resource usage
private final int k;
// Controls whether or not data is compressed onto a smaller range using arcsinh
private final boolean compress;
private final byte cacheTypeId;
public static final String TYPE_NAME = "momentSketch";
@JsonCreator
public MomentSketchAggregatorFactory(
@JsonProperty("name") final String name,
@JsonProperty("fieldName") final String fieldName,
@Nullable @JsonProperty("k") final Integer k,
@Nullable @JsonProperty("compress") final Boolean compress
)
{
this(name, fieldName, k, compress, AggregatorUtil.MOMENTS_SKETCH_BUILD_CACHE_TYPE_ID);
}
MomentSketchAggregatorFactory(
final String name,
final String fieldName,
@Nullable final Integer k,
@Nullable final Boolean compress,
final byte cacheTypeId
)
{
Objects.requireNonNull(name, "Must have a valid, non-null aggregator name");
this.name = name;
Objects.requireNonNull(fieldName, "Parameter fieldName must be specified");
this.fieldName = fieldName;
this.k = k == null ? DEFAULT_K : k;
this.compress = compress == null ? DEFAULT_COMPRESS : compress;
this.cacheTypeId = cacheTypeId;
}
@Override
public byte[] getCacheKey()
{
return new CacheKeyBuilder(
cacheTypeId
).appendString(fieldName).appendInt(k).appendBoolean(compress).build();
}
@Override
public Aggregator factorize(ColumnSelectorFactory metricFactory)
{
ColumnCapabilities cap = metricFactory.getColumnCapabilities(fieldName);
if (cap == null || ValueType.isNumeric(cap.getType())) {
final ColumnValueSelector<Double> selector = metricFactory.makeColumnValueSelector(fieldName);
return new MomentSketchBuildAggregator(selector, k, getCompress());
} else {
final ColumnValueSelector<MomentSketchWrapper> selector = metricFactory.makeColumnValueSelector(fieldName);
return new MomentSketchMergeAggregator(selector, k, getCompress());
}
}
@Override
public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory)
{
ColumnCapabilities cap = metricFactory.getColumnCapabilities(fieldName);
if (cap == null || ValueType.isNumeric(cap.getType())) {
final ColumnValueSelector<Double> selector = metricFactory.makeColumnValueSelector(fieldName);
return new MomentSketchBuildBufferAggregator(selector, k, getCompress());
} else {
final ColumnValueSelector<MomentSketchWrapper> selector = metricFactory.makeColumnValueSelector(fieldName);
return new MomentSketchMergeBufferAggregator(selector, k, getCompress());
}
}
public static final Comparator<MomentSketchWrapper> COMPARATOR = Comparator.nullsFirst(
Comparator.comparingDouble(a -> a.getPowerSums()[0])
);
@Override
public Comparator getComparator()
{
return COMPARATOR;
}
@Override
public Object combine(@Nullable Object lhs, @Nullable Object rhs)
{
if (lhs == null) {
return rhs;
}
if (rhs == null) {
return lhs;
}
MomentSketchWrapper union = (MomentSketchWrapper) lhs;
union.merge((MomentSketchWrapper) rhs);
return union;
}
@Override
public AggregatorFactory getCombiningFactory()
{
return new MomentSketchMergeAggregatorFactory(name, k, compress);
}
@Override
public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException
{
if (other.getName().equals(this.getName()) && this.getClass() == other.getClass()) {
return getCombiningFactory();
} else {
throw new AggregatorFactoryNotMergeableException(this, other);
}
}
@Override
public List<AggregatorFactory> getRequiredColumns()
{
return Collections.singletonList(
new MomentSketchAggregatorFactory(
fieldName,
fieldName,
k,
compress
)
);
}
private MomentSketchWrapper deserializeFromByteArray(byte[] bytes)
{
return MomentSketchWrapper.fromByteArray(bytes);
}
@Override
public Object deserialize(Object serializedSketch)
{
if (serializedSketch instanceof String) {
String str = (String) serializedSketch;
return deserializeFromByteArray(StringUtils.decodeBase64(StringUtils.toUtf8(str)));
} else if (serializedSketch instanceof byte[]) {
return deserializeFromByteArray((byte[]) serializedSketch);
} else if (serializedSketch instanceof MomentSketchWrapper) {
return serializedSketch;
}
throw new ISE(
"Object cannot be deserialized to a Moments Sketch: "
+ serializedSketch.getClass()
);
}
@Override
public Object finalizeComputation(Object object)
{
return object;
}
@Override
@JsonProperty
public String getName()
{
return name;
}
@JsonProperty
public String getFieldName()
{
return fieldName;
}
@JsonProperty
public int getK()
{
return k;
}
@JsonProperty
public boolean getCompress()
{
return compress;
}
@Override
public List<String> requiredFields()
{
return Collections.singletonList(fieldName);
}
@Override
public String getTypeName()
{
return TYPE_NAME;
}
@Override
public int getMaxIntermediateSize()
{
// k double precision moments, 2 doubles for the min and max
// one integer to specify the number of moments
// one integer to specify whether data range is compressed
return (k + 2) * Double.BYTES + 2 * Integer.BYTES;
}
@Override
public boolean equals(final Object o)
{
if (this == o) {
return true;
}
if (o == null || !getClass().equals(o.getClass())) {
return false;
}
final MomentSketchAggregatorFactory that = (MomentSketchAggregatorFactory) o;
return Objects.equals(name, that.name) &&
Objects.equals(fieldName, that.fieldName) &&
k == that.k &&
compress == that.compress;
}
@Override
public int hashCode()
{
return Objects.hash(name, fieldName, k, compress);
}
@Override
public String toString()
{
return getClass().getSimpleName() + "{"
+ "name=" + name
+ ", fieldName=" + fieldName
+ ", k=" + k
+ ", compress=" + compress
+ "}";
}
}

View File

@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch.aggregator;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper;
import org.apache.druid.segment.BaseDoubleColumnValueSelector;
public class MomentSketchBuildAggregator implements Aggregator
{
private final BaseDoubleColumnValueSelector valueSelector;
private final int k;
private final boolean compress;
private MomentSketchWrapper momentsSketch;
public MomentSketchBuildAggregator(
final BaseDoubleColumnValueSelector valueSelector,
final int k,
final boolean compress
)
{
this.valueSelector = valueSelector;
this.k = k;
this.compress = compress;
momentsSketch = new MomentSketchWrapper(k);
momentsSketch.setCompressed(compress);
}
@Override
public void aggregate()
{
momentsSketch.add(valueSelector.getDouble());
}
@Override
public Object get()
{
return momentsSketch;
}
@Override
public float getFloat()
{
throw new UnsupportedOperationException("not implemented");
}
@Override
public long getLong()
{
throw new UnsupportedOperationException("not implemented");
}
@Override
public Aggregator clone()
{
return new MomentSketchBuildAggregator(valueSelector, k, compress);
}
@Override
public void close()
{
momentsSketch = null;
}
}

View File

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch.aggregator;
import org.apache.druid.query.aggregation.BufferAggregator;
import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper;
import org.apache.druid.segment.BaseDoubleColumnValueSelector;
import java.nio.ByteBuffer;
public class MomentSketchBuildBufferAggregator implements BufferAggregator
{
private final BaseDoubleColumnValueSelector selector;
private final int k;
private final boolean compress;
public MomentSketchBuildBufferAggregator(
final BaseDoubleColumnValueSelector valueSelector,
final int k,
final boolean compress
)
{
this.selector = valueSelector;
this.k = k;
this.compress = compress;
}
@Override
public synchronized void init(final ByteBuffer buffer, final int position)
{
ByteBuffer mutationBuffer = buffer.duplicate();
mutationBuffer.position(position);
MomentSketchWrapper emptyStruct = new MomentSketchWrapper(k);
emptyStruct.setCompressed(compress);
emptyStruct.toBytes(mutationBuffer);
}
@Override
public synchronized void aggregate(final ByteBuffer buffer, final int position)
{
ByteBuffer mutationBuffer = buffer.duplicate();
mutationBuffer.position(position);
MomentSketchWrapper ms0 = MomentSketchWrapper.fromBytes(mutationBuffer);
double x = selector.getDouble();
ms0.add(x);
mutationBuffer.position(position);
ms0.toBytes(mutationBuffer);
}
@Override
public synchronized Object get(final ByteBuffer buffer, final int position)
{
ByteBuffer mutationBuffer = buffer.duplicate();
mutationBuffer.position(position);
return MomentSketchWrapper.fromBytes(mutationBuffer);
}
@Override
public float getFloat(final ByteBuffer buffer, final int position)
{
throw new UnsupportedOperationException("Not implemented");
}
@Override
public long getLong(final ByteBuffer buffer, final int position)
{
throw new UnsupportedOperationException("Not implemented");
}
@Override
public void close()
{
}
}

View File

@ -0,0 +1,130 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch.aggregator;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper;
import org.apache.druid.query.aggregation.post.PostAggregatorIds;
import org.apache.druid.query.cache.CacheKeyBuilder;
import java.util.Comparator;
import java.util.Map;
import java.util.Set;
public class MomentSketchMaxPostAggregator implements PostAggregator
{
private final String name;
private final PostAggregator field;
public static final String TYPE_NAME = "momentSketchMax";
@JsonCreator
public MomentSketchMaxPostAggregator(
@JsonProperty("name") final String name,
@JsonProperty("field") final PostAggregator field
)
{
this.name = Preconditions.checkNotNull(name, "name is null");
this.field = Preconditions.checkNotNull(field, "field is null");
}
@Override
@JsonProperty
public String getName()
{
return name;
}
@JsonProperty
public PostAggregator getField()
{
return field;
}
@Override
public Object compute(final Map<String, Object> combinedAggregators)
{
final MomentSketchWrapper sketch = (MomentSketchWrapper) field.compute(combinedAggregators);
return sketch.getMax();
}
@Override
public Comparator<double[]> getComparator()
{
throw new IAE("Comparing arrays of quantiles is not supported");
}
@Override
public Set<String> getDependentFields()
{
return field.getDependentFields();
}
@Override
public String toString()
{
return getClass().getSimpleName() + "{" +
"name='" + name + '\'' +
", field=" + field +
"}";
}
@Override
public boolean equals(final Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final MomentSketchMaxPostAggregator that = (MomentSketchMaxPostAggregator) o;
if (!name.equals(that.name)) {
return false;
}
return field.equals(that.field);
}
@Override
public int hashCode()
{
return (name.hashCode() * 31 + field.hashCode());
}
@Override
public byte[] getCacheKey()
{
final CacheKeyBuilder builder = new CacheKeyBuilder(
PostAggregatorIds.MOMENTS_SKETCH_TO_MAX_CACHE_TYPE_ID
).appendCacheable(field);
return builder.build();
}
@Override
public PostAggregator decorate(final Map<String, AggregatorFactory> map)
{
return this;
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch.aggregator;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper;
import org.apache.druid.segment.ColumnValueSelector;
public class MomentSketchMergeAggregator implements Aggregator
{
private final ColumnValueSelector<MomentSketchWrapper> selector;
private MomentSketchWrapper momentsSketch;
public MomentSketchMergeAggregator(
ColumnValueSelector<MomentSketchWrapper> selector,
final int k,
final boolean compress
)
{
this.selector = selector;
this.momentsSketch = new MomentSketchWrapper(k);
momentsSketch.setCompressed(compress);
}
@Override
public void aggregate()
{
final MomentSketchWrapper sketch = selector.getObject();
if (sketch == null) {
return;
}
this.momentsSketch.merge(sketch);
}
@Override
public Object get()
{
return momentsSketch;
}
@Override
public float getFloat()
{
throw new UnsupportedOperationException("Not implemented");
}
@Override
public long getLong()
{
throw new UnsupportedOperationException("Not implemented");
}
@Override
public void close()
{
momentsSketch = null;
}
}

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch.aggregator;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.apache.druid.query.aggregation.Aggregator;
import org.apache.druid.query.aggregation.AggregatorUtil;
import org.apache.druid.query.aggregation.BufferAggregator;
import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector;
public class MomentSketchMergeAggregatorFactory extends MomentSketchAggregatorFactory
{
public static final String TYPE_NAME = "momentSketchMerge";
@JsonCreator
public MomentSketchMergeAggregatorFactory(
@JsonProperty("name") final String name,
@JsonProperty("k") final Integer k,
@JsonProperty("compress") final Boolean compress
)
{
super(name, name, k, compress, AggregatorUtil.MOMENTS_SKETCH_MERGE_CACHE_TYPE_ID);
}
@Override
public Aggregator factorize(final ColumnSelectorFactory metricFactory)
{
final ColumnValueSelector<MomentSketchWrapper> selector = metricFactory.makeColumnValueSelector(
getFieldName());
return new MomentSketchMergeAggregator(selector, getK(), getCompress());
}
@Override
public BufferAggregator factorizeBuffered(final ColumnSelectorFactory metricFactory)
{
final ColumnValueSelector<MomentSketchWrapper> selector = metricFactory.makeColumnValueSelector(
getFieldName()
);
return new MomentSketchMergeBufferAggregator(selector, getK(), getCompress());
}
}

View File

@ -0,0 +1,110 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch.aggregator;
import org.apache.druid.query.aggregation.BufferAggregator;
import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.ColumnValueSelector;
import java.nio.ByteBuffer;
public class MomentSketchMergeBufferAggregator implements BufferAggregator
{
private final ColumnValueSelector<MomentSketchWrapper> selector;
private final int size;
private final boolean compress;
public MomentSketchMergeBufferAggregator(
ColumnValueSelector<MomentSketchWrapper> selector,
int size,
boolean compress
)
{
this.selector = selector;
this.size = size;
this.compress = compress;
}
@Override
public void init(ByteBuffer buf, int position)
{
MomentSketchWrapper h = new MomentSketchWrapper(size);
h.setCompressed(compress);
ByteBuffer mutationBuffer = buf.duplicate();
mutationBuffer.position(position);
h.toBytes(mutationBuffer);
}
@Override
public void aggregate(ByteBuffer buf, int position)
{
MomentSketchWrapper msNext = selector.getObject();
if (msNext == null) {
return;
}
ByteBuffer mutationBuffer = buf.duplicate();
mutationBuffer.position(position);
MomentSketchWrapper ms0 = MomentSketchWrapper.fromBytes(mutationBuffer);
ms0.merge(msNext);
mutationBuffer.position(position);
ms0.toBytes(mutationBuffer);
}
@Override
public Object get(ByteBuffer buf, int position)
{
ByteBuffer mutationBuffer = buf.asReadOnlyBuffer();
mutationBuffer.position(position);
return MomentSketchWrapper.fromBytes(mutationBuffer);
}
@Override
public float getFloat(ByteBuffer buf, int position)
{
throw new UnsupportedOperationException("Not implemented");
}
@Override
public long getLong(ByteBuffer buf, int position)
{
throw new UnsupportedOperationException("Not implemented");
}
@Override
public double getDouble(ByteBuffer buf, int position)
{
throw new UnsupportedOperationException("Not implemented");
}
@Override
public void close()
{
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("selector", selector);
}
}

View File

@ -0,0 +1,129 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch.aggregator;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper;
import org.apache.druid.query.aggregation.post.PostAggregatorIds;
import org.apache.druid.query.cache.CacheKeyBuilder;
import java.util.Comparator;
import java.util.Map;
import java.util.Set;
public class MomentSketchMinPostAggregator implements PostAggregator
{
private final String name;
private final PostAggregator field;
public static final String TYPE_NAME = "momentSketchMin";
@JsonCreator
public MomentSketchMinPostAggregator(
@JsonProperty("name") final String name,
@JsonProperty("field") final PostAggregator field
)
{
this.name = Preconditions.checkNotNull(name, "name is null");
this.field = Preconditions.checkNotNull(field, "field is null");
}
@Override
@JsonProperty
public String getName()
{
return name;
}
@JsonProperty
public PostAggregator getField()
{
return field;
}
@Override
public Object compute(final Map<String, Object> combinedAggregators)
{
final MomentSketchWrapper sketch = (MomentSketchWrapper) field.compute(combinedAggregators);
return sketch.getMin();
}
@Override
public Comparator<double[]> getComparator()
{
throw new IAE("Comparing arrays of quantiles is not supported");
}
@Override
public Set<String> getDependentFields()
{
return field.getDependentFields();
}
@Override
public String toString()
{
return getClass().getSimpleName() + "{" +
"name='" + name + '\'' +
", field=" + field +
"}";
}
@Override
public boolean equals(final Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final MomentSketchMinPostAggregator that = (MomentSketchMinPostAggregator) o;
if (!name.equals(that.name)) {
return false;
}
return field.equals(that.field);
}
@Override
public int hashCode()
{
return (name.hashCode() * 31 + field.hashCode());
}
@Override
public byte[] getCacheKey()
{
final CacheKeyBuilder builder = new CacheKeyBuilder(
PostAggregatorIds.MOMENTS_SKETCH_TO_MIN_CACHE_TYPE_ID
).appendCacheable(field);
return builder.build();
}
@Override
public PostAggregator decorate(final Map<String, AggregatorFactory> map)
{
return this;
}
}

View File

@ -0,0 +1,148 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch.aggregator;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper;
import org.apache.druid.query.aggregation.post.PostAggregatorIds;
import org.apache.druid.query.cache.CacheKeyBuilder;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
import java.util.Set;
public class MomentSketchQuantilePostAggregator implements PostAggregator
{
private final String name;
private final PostAggregator field;
private final double[] fractions;
public static final String TYPE_NAME = "momentSketchSolveQuantiles";
@JsonCreator
public MomentSketchQuantilePostAggregator(
@JsonProperty("name") final String name,
@JsonProperty("field") final PostAggregator field,
@JsonProperty("fractions") final double[] fractions
)
{
this.name = Preconditions.checkNotNull(name, "name is null");
this.field = Preconditions.checkNotNull(field, "field is null");
this.fractions = Preconditions.checkNotNull(fractions, "array of fractions is null");
}
@Override
@JsonProperty
public String getName()
{
return name;
}
@JsonProperty
public PostAggregator getField()
{
return field;
}
@JsonProperty
public double[] getFractions()
{
return fractions;
}
@Override
public Object compute(final Map<String, Object> combinedAggregators)
{
final MomentSketchWrapper sketch = (MomentSketchWrapper) field.compute(combinedAggregators);
double[] quantiles = sketch.getQuantiles(fractions);
return quantiles;
}
@Override
public Comparator<double[]> getComparator()
{
throw new IAE("Comparing arrays of quantiles is not supported");
}
@Override
public Set<String> getDependentFields()
{
return field.getDependentFields();
}
@Override
public String toString()
{
return getClass().getSimpleName() + "{" +
"name='" + name + '\'' +
", field=" + field +
", fractions=" + Arrays.toString(fractions) +
"}";
}
@Override
public boolean equals(final Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final MomentSketchQuantilePostAggregator that = (MomentSketchQuantilePostAggregator) o;
if (!name.equals(that.name)) {
return false;
}
if (!Arrays.equals(fractions, that.fractions)) {
return false;
}
return field.equals(that.field);
}
@Override
public int hashCode()
{
return (name.hashCode() * 31 + field.hashCode()) * 31 + Arrays.hashCode(fractions);
}
@Override
public byte[] getCacheKey()
{
final CacheKeyBuilder builder = new CacheKeyBuilder(
PostAggregatorIds.MOMENTS_SKETCH_TO_QUANTILES_CACHE_TYPE_ID
)
.appendCacheable(field)
.appendDoubleArray(fractions);
return builder.build();
}
@Override
public PostAggregator decorate(final Map<String, AggregatorFactory> map)
{
return this;
}
}

View File

@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.druid.query.aggregation.momentsketch.MomentSketchModule

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
public class MomentSketchWrapperTest
{
@Test
public void testDeserialize()
{
MomentSketchWrapper mw = new MomentSketchWrapper(10);
mw.setCompressed(false);
mw.add(10);
byte[] bs = mw.toByteArray();
MomentSketchWrapper mw2 = MomentSketchWrapper.fromByteArray(bs);
assertEquals(10, mw2.getPowerSums()[1], 1e-10);
}
@Test
public void testSimpleSolve()
{
MomentSketchWrapper mw = new MomentSketchWrapper(13);
mw.setCompressed(true);
for (int x = 0; x < 101; x++) {
mw.add((double) x);
}
double[] ps = {0.0, 0.5, 1.0};
double[] qs = mw.getQuantiles(ps);
assertEquals(0, qs[0], 1.0);
assertEquals(50, qs[1], 1.0);
}
}

View File

@ -0,0 +1,204 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.momentsketch.aggregator;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.druid.data.input.Row;
import org.apache.druid.initialization.DruidModule;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.query.aggregation.AggregationTestHelper;
import org.apache.druid.query.aggregation.momentsketch.MomentSketchModule;
import org.apache.druid.query.aggregation.momentsketch.MomentSketchWrapper;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByQueryRunnerTest;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import static org.junit.Assert.assertEquals;
@RunWith(Parameterized.class)
public class MomentsSketchAggregatorTest
{
private final AggregationTestHelper helper;
@Rule
public final TemporaryFolder tempFolder = new TemporaryFolder();
public MomentsSketchAggregatorTest(final GroupByQueryConfig config)
{
MomentSketchModule.registerSerde();
DruidModule module = new MomentSketchModule();
helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper(
module.getJacksonModules(), config, tempFolder);
}
@Parameterized.Parameters(name = "{0}")
public static Collection<?> constructorFeeder()
{
final List<Object[]> constructors = new ArrayList<>();
for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) {
constructors.add(new Object[]{config});
}
return constructors;
}
// this is to test Json properties and equals
@Test
public void serializeDeserializeFactoryWithFieldName() throws Exception
{
ObjectMapper objectMapper = new DefaultObjectMapper();
MomentSketchAggregatorFactory factory = new MomentSketchAggregatorFactory(
"name", "fieldName", 128, true
);
MomentSketchAggregatorFactory other = objectMapper.readValue(
objectMapper.writeValueAsString(factory),
MomentSketchAggregatorFactory.class
);
assertEquals(factory, other);
}
@Test
public void buildingSketchesAtIngestionTime() throws Exception
{
Sequence<Row> seq = helper.createIndexAndRunQueryOnSegment(
new File(this.getClass().getClassLoader().getResource("doubles_build_data.tsv").getFile()),
String.join(
"\n",
"{",
" \"type\": \"string\",",
" \"parseSpec\": {",
" \"format\": \"tsv\",",
" \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},",
" \"dimensionsSpec\": {",
" \"dimensions\": [\"product\"],",
" \"dimensionExclusions\": [ \"sequenceNumber\"],",
" \"spatialDimensions\": []",
" },",
" \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]",
" }",
"}"
),
"[{\"type\": \"momentSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 10, \"compress\": true}]",
0,
// minTimestamp
Granularities.NONE,
10,
// maxRowCount
String.join(
"\n",
"{",
" \"queryType\": \"groupBy\",",
" \"dataSource\": \"test_datasource\",",
" \"granularity\": \"ALL\",",
" \"dimensions\": [],",
" \"aggregations\": [",
" {\"type\": \"momentSketchMerge\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 10, \"compress\": true}",
" ],",
" \"postAggregations\": [",
" {\"type\": \"momentSketchSolveQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},",
" {\"type\": \"momentSketchMin\", \"name\": \"min\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},",
" {\"type\": \"momentSketchMax\", \"name\": \"max\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}",
" ],",
" \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]",
"}"
)
);
List<Row> results = seq.toList();
assertEquals(1, results.size());
Row row = results.get(0);
double[] quantilesArray = (double[]) row.getRaw("quantiles");
assertEquals(0, quantilesArray[0], 0.05);
assertEquals(.5, quantilesArray[1], 0.05);
assertEquals(1.0, quantilesArray[2], 0.05);
Double minValue = (Double) row.getRaw("min");
assertEquals(0.0011, minValue, 0.0001);
Double maxValue = (Double) row.getRaw("max");
assertEquals(0.9969, maxValue, 0.0001);
MomentSketchWrapper sketchObject = (MomentSketchWrapper) row.getRaw("sketch");
assertEquals(400.0, sketchObject.getPowerSums()[0], 1e-10);
}
@Test
public void buildingSketchesAtQueryTime() throws Exception
{
Sequence<Row> seq = helper.createIndexAndRunQueryOnSegment(
new File(this.getClass().getClassLoader().getResource("doubles_build_data.tsv").getFile()),
String.join(
"\n",
"{",
" \"type\": \"string\",",
" \"parseSpec\": {",
" \"format\": \"tsv\",",
" \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},",
" \"dimensionsSpec\": {",
" \"dimensions\": [ \"product\"],",
" \"dimensionExclusions\": [\"sequenceNumber\"],",
" \"spatialDimensions\": []",
" },",
" \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]",
" }",
"}"
),
"[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]",
0, // minTimestamp
Granularities.NONE,
10, // maxRowCount
String.join(
"\n",
"{",
" \"queryType\": \"groupBy\",",
" \"dataSource\": \"test_datasource\",",
" \"granularity\": \"ALL\",",
" \"dimensions\": [],",
" \"aggregations\": [",
" {\"type\": \"momentSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 10}",
" ],",
" \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]",
"}"
)
);
List<Row> results = seq.toList();
assertEquals(1, results.size());
Row row = results.get(0);
MomentSketchWrapper sketchObject = (MomentSketchWrapper) row.getRaw("sketch");
// 9 total products since we pre-sum the values.
assertEquals(9.0, sketchObject.getPowerSums()[0], 1e-10);
}
}

View File

@ -0,0 +1,400 @@
2016010101 0 0 0.6529403005319299
2016010101 1 0 0.9270214958987323
2016010101 2 0 0.6383273609981486
2016010101 3 0 0.8088289215633632
2016010101 4 0 0.8163864917598281
2016010101 5 0 0.38484848588530784
2016010101 6 0 0.7690020468986823
2016010101 7 0 0.6212078833139824
2016010101 8 0 0.4915825094949512
2016010101 9 0 0.688004059332008
2016010101 10 0 0.2536908275250508
2016010101 11 0 0.6618435914290263
2016010101 12 0 0.7892773595797635
2016010101 13 0 0.08857624134076048
2016010101 14 0 0.11992633801904151
2016010101 15 0 0.4959192800105586
2016010101 16 0 0.5564893557708243
2016010101 17 0 0.7755547456799993
2016010101 18 0 0.06420706406984311
2016010101 19 0 0.23085639094262378
2016010101 20 7 0.012013916725163498
2016010101 21 7 0.34077219818209503
2016010101 22 7 0.8445966884204918
2016010101 23 7 0.6466142718287953
2016010101 24 7 0.43959032391415487
2016010101 25 7 0.7768829233737787
2016010101 26 7 0.5899544206136442
2016010101 27 7 0.017782361911801825
2016010101 28 7 0.5431916165782864
2016010101 29 7 0.8218253174439416
2016010101 30 7 0.6372788284951859
2016010101 31 7 0.41403671834680933
2016010101 32 7 0.042508330730374855
2016010101 33 7 0.7416290691530969
2016010101 34 7 0.6990557213726277
2016010101 35 7 0.6302154208823348
2016010101 36 7 0.021053567154993402
2016010101 37 7 0.770280353784988
2016010101 38 7 0.08205576978448703
2016010101 39 7 0.2049660800682488
2016010101 40 5 0.08129304678049831
2016010101 41 5 0.17754747271638005
2016010101 42 5 0.8441702357096768
2016010101 43 5 0.9060464737257796
2016010101 44 5 0.5970595512785409
2016010101 45 5 0.843859346312315
2016010101 46 5 0.1649847892987305
2016010101 47 5 0.5279903496999094
2016010101 48 5 0.08758749830556767
2016010101 49 5 0.6088480522002063
2016010101 50 5 0.31079133043670004
2016010101 51 5 0.43062105356651226
2016010101 52 5 0.8542989852099488
2016010101 53 5 0.42443162807834045
2016010101 54 5 0.5020327054358468
2016010101 55 5 0.36453920012074237
2016010101 56 5 0.9884597580348689
2016010101 57 5 0.3770559586575706
2016010101 58 5 0.5989237303385875
2016010101 59 5 0.9926342802399872
2016010101 60 4 0.7813961047849703
2016010101 61 4 0.062171533805525425
2016010101 62 4 0.5284977503473608
2016010101 63 4 0.5924687065581794
2016010101 64 4 0.06305234223879275
2016010101 65 4 0.4959562731747129
2016010101 66 4 0.6336733165353365
2016010101 67 4 0.48860263540869875
2016010101 68 4 0.9387610528974851
2016010101 69 4 0.3391271652731308
2016010101 70 4 0.5962837638971421
2016010101 71 4 0.9190447294921896
2016010101 72 4 0.33082943548872534
2016010101 73 4 0.6236359023672029
2016010101 74 4 0.27134427542016615
2016010101 75 4 0.11665530238761901
2016010101 76 4 0.10469260335277608
2016010101 77 4 0.6824658847771211
2016010101 78 4 0.6131047630496756
2016010101 79 4 0.9838171536972515
2016010101 80 4 0.7484669110852756
2016010101 81 4 0.797620888697219
2016010101 82 4 0.7166673353657907
2016010101 83 4 0.46968710353176557
2016010101 84 4 0.3998491199643106
2016010101 85 4 0.6314883585976869
2016010101 86 4 0.8305617875577815
2016010101 87 4 0.6867651870284084
2016010101 88 4 0.9961677044887979
2016010101 89 4 0.19745766301180412
2016010101 90 4 0.2737652043079263
2016010101 91 4 0.2954503444695358
2016010101 92 4 0.6191902196833489
2016010101 93 4 0.6828058006233482
2016010101 94 4 0.7967115641510757
2016010101 95 4 0.5485460823820962
2016010101 96 4 0.4278132830938558
2016010101 97 4 0.32194908458166194
2016010101 98 4 0.07094920295725238
2016010101 99 4 0.4351839393889565
2016010101 100 1 0.6160833396611648
2016010101 101 1 0.4652667787803648
2016010101 102 1 0.5026953463132913
2016010101 103 1 0.4103237191034753
2016010101 104 1 0.3298554666697301
2016010101 105 1 0.16907537273919138
2016010101 106 1 0.6945260598989513
2016010101 107 1 0.917138530496438
2016010101 108 1 0.8810129148605083
2016010101 109 1 0.11845626048380542
2016010101 110 1 0.8848971155827816
2016010101 111 1 0.9969103769603667
2016010101 112 1 0.06274198529295416
2016010101 113 1 0.2923616769686519
2016010101 114 1 0.12621083638328634
2016010101 115 1 0.9655188575577313
2016010101 116 1 0.6074995164352884
2016010101 117 1 0.5501887988201414
2016010101 118 1 0.9406914128003497
2016010101 119 1 0.03264873659277656
2016010101 120 6 0.004852543443656487
2016010101 121 6 0.11161194329252788
2016010101 122 6 0.9403527002796559
2016010101 123 6 0.8951866979503953
2016010101 124 6 0.07629846897033454
2016010101 125 6 0.9898485014275873
2016010101 126 6 0.42827377712188075
2016010101 127 6 0.4274796777951825
2016010101 128 6 0.5569522946332676
2016010101 129 6 0.028195121559112635
2016010101 130 6 0.8599127909482382
2016010101 131 6 0.3516112293128607
2016010101 132 6 0.3888868189342449
2016010101 133 6 0.644589126160206
2016010101 134 6 0.7398741071492928
2016010101 135 6 0.1998479248216123
2016010101 136 6 0.8803215884594476
2016010101 137 6 0.7079531966558515
2016010101 138 6 0.7904290564015343
2016010101 139 6 0.475671788742007
2016010101 140 3 0.034708334899357096
2016010101 141 3 0.4134637419532796
2016010101 142 3 0.9757934592902832
2016010101 143 3 0.37422347371609666
2016010101 144 3 0.5904996168737154
2016010101 145 3 0.5883259679727514
2016010101 146 3 0.3380286015499171
2016010101 147 3 0.42174393035143043
2016010101 148 3 0.4764900074141757
2016010101 149 3 0.01864239537224921
2016010101 150 3 0.9124007087743986
2016010101 151 3 0.8951275235699193
2016010101 152 3 0.7037272142266654
2016010101 153 3 0.5685506209266902
2016010101 154 3 0.4104883958833594
2016010101 155 3 0.7794005551450208
2016010101 156 3 0.2879354697088996
2016010101 157 3 0.5243215707259823
2016010101 158 3 0.22238840286136063
2016010101 159 3 0.11336472553284738
2016010101 160 4 0.9800770037725316
2016010101 161 4 0.7628237317889158
2016010101 162 4 0.5355335935170453
2016010101 163 4 0.9676939330565402
2016010101 164 4 0.657825753108034
2016010101 165 4 0.9175328548944673
2016010101 166 4 0.6834666043257283
2016010101 167 4 0.08580759367942314
2016010101 168 4 0.3134740602060899
2016010101 169 4 0.3218818254752742
2016010101 170 4 0.6119297354994999
2016010101 171 4 0.07086832750773142
2016010101 172 4 0.2700864307032772
2016010101 173 4 0.7497315076673637
2016010101 174 4 0.4959921300968493
2016010101 175 4 0.09294825796093753
2016010101 176 4 0.4954515904444161
2016010101 177 4 0.8820366880191506
2016010101 178 4 0.17978298283728522
2016010101 179 4 0.05259679741524781
2016010101 180 5 0.4711892966981096
2016010101 181 5 0.5965662941715105
2016010101 182 5 0.4775201668966973
2016010101 183 5 0.05084576687030873
2016010101 184 5 0.16680660677593928
2016010101 185 5 0.9342287333653685
2016010101 186 5 0.8153161893769392
2016010101 187 5 0.9362517669519288
2016010101 188 5 0.10865218471840699
2016010101 189 5 0.44665378915111065
2016010101 190 5 0.8804454791937898
2016010101 191 5 0.20666928346935398
2016010101 192 5 0.7052479677101612
2016010101 193 5 0.5006205470200923
2016010101 194 5 0.23220501028575968
2016010101 195 5 0.11776507130391467
2016010101 196 5 0.592011744069295
2016010101 197 5 0.7089191450076786
2016010101 198 5 0.7269340552231702
2016010101 199 5 0.7049554871226075
2016010101 200 1 0.44078367400761076
2016010101 201 1 0.7715264806037321
2016010101 202 1 0.10151701902103971
2016010101 203 1 0.661891806135609
2016010101 204 1 0.23095745116331567
2016010101 205 1 0.46625278601359255
2016010101 206 1 0.5912486124707177
2016010101 207 1 0.963946871892115
2016010101 208 1 0.8172596270687692
2016010101 209 1 0.05745699928199144
2016010101 210 1 0.40612684342877337
2016010101 211 1 0.6330844777969608
2016010101 212 1 0.3148973406065705
2016010101 213 1 0.23230462811318175
2016010101 214 1 0.9960772952945196
2016010101 215 1 0.4581376339786414
2016010101 216 1 0.7181494575770677
2016010101 217 1 0.04277917580280799
2016010101 218 1 0.11137419446625674
2016010101 219 1 0.014716278313423037
2016010101 220 2 0.8988603727313186
2016010101 221 2 0.8192124226306603
2016010101 222 2 0.9304683598956597
2016010101 223 2 0.4375546733938238
2016010101 224 2 0.7676359685332207
2016010101 225 2 0.30977859822027964
2016010101 226 2 0.008595955287459267
2016010101 227 2 0.6790605343724216
2016010101 228 2 0.36949588946147993
2016010101 229 2 0.3826798435706562
2016010101 230 2 0.13836513167087128
2016010101 231 2 0.4451570472364902
2016010101 232 2 0.8944067771338549
2016010101 233 2 0.6068095655362902
2016010101 234 2 0.7084870042917992
2016010101 235 2 0.5867363290655241
2016010101 236 2 0.6903863088381504
2016010101 237 2 0.30984947936089124
2016010101 238 2 0.31561088279452665
2016010101 239 2 0.006286479849849758
2016010101 240 5 0.34397466439693725
2016010101 241 5 0.052476003295899964
2016010101 242 5 0.726106045184451
2016010101 243 5 0.01559115401009159
2016010101 244 5 0.9219270739836661
2016010101 245 5 0.5147917330760431
2016010101 246 5 0.41919804470784205
2016010101 247 5 0.4145101775865617
2016010101 248 5 0.34153038022995796
2016010101 249 5 0.9503817180587767
2016010101 250 5 0.6958354849389804
2016010101 251 5 0.46000811480536297
2016010101 252 5 0.18379911670616378
2016010101 253 5 0.20973108758556713
2016010101 254 5 0.5979201603287885
2016010101 255 5 0.5552419362393491
2016010101 256 5 0.10996555307297629
2016010101 257 5 0.3591453585622102
2016010101 258 5 0.06098055111386691
2016010101 259 5 0.5227270267924988
2016010101 260 0 0.8492702312836989
2016010101 261 0 0.5941242001151825
2016010101 262 0 0.6840733026822607
2016010101 263 0 0.8109777000249937
2016010101 264 0 0.8599286045013937
2016010101 265 0 0.7828806670746145
2016010101 266 0 0.8102260971867188
2016010101 267 0 0.38306094770114385
2016010101 268 0 0.7093609268723879
2016010101 269 0 0.4806583187577358
2016010101 270 0 0.5766489331365172
2016010101 271 0 0.7565067278238041
2016010101 272 0 0.8262768908267573
2016010101 273 0 0.7951015619138146
2016010101 274 0 0.1938448910588796
2016010101 275 0 0.8884608583839426
2016010101 276 0 0.7046203516594505
2016010101 277 0 0.5951074760704175
2016010101 278 0 0.38207409719784036
2016010101 279 0 0.2445271560830221
2016010101 280 7 0.6032919624054952
2016010101 281 7 0.1473220747987144
2016010101 282 7 0.38396643099307604
2016010101 283 7 0.4431561135554619
2016010101 284 7 0.896578318093225
2016010101 285 7 0.6729206122043515
2016010101 286 7 0.8498821349478478
2016010101 287 7 0.48231924024179784
2016010101 288 7 0.005379480238994816
2016010101 289 7 0.8017936717647264
2016010101 290 7 0.08193232952990348
2016010101 291 7 0.3422943366454193
2016010101 292 7 0.6081556855207957
2016010101 293 7 0.641193222941943
2016010101 294 7 0.3716858024654186
2016010101 295 7 0.0011169303830090849
2016010101 296 7 0.4698784438339285
2016010101 297 7 0.958198841287214
2016010101 298 7 0.730945048929339
2016010101 299 7 0.1858601884405512
2016010101 300 5 0.1020825694779407
2016010101 301 5 0.5742385074938443
2016010101 302 5 0.9846817584978909
2016010101 303 5 0.3858694391491331
2016010101 304 5 0.9822246873202894
2016010101 305 5 0.39822015482143314
2016010101 306 5 0.6575924137957005
2016010101 307 5 0.02359557062746842
2016010101 308 5 0.42059510563039115
2016010101 309 5 0.5970764856116284
2016010101 310 5 0.2817399870096221
2016010101 311 5 0.5334091165258412
2016010101 312 5 0.31199853410796585
2016010101 313 5 0.3156991306990594
2016010101 314 5 0.9560285139855889
2016010101 315 5 0.7846951771498516
2016010101 316 5 0.009731486767097897
2016010101 317 5 0.22625857375026215
2016010101 318 5 0.8580955944724618
2016010101 319 5 0.9622008926137687
2016010101 320 5 0.023872302930851297
2016010101 321 5 0.3580981601151092
2016010101 322 5 0.9120442264954038
2016010101 323 5 0.5968491989965334
2016010101 324 5 0.5028516120506729
2016010101 325 5 0.30590552314314
2016010101 326 5 0.5566430714368423
2016010101 327 5 0.6441099124064397
2016010101 328 5 0.8765287851559298
2016010101 329 5 0.38405928947408385
2016010101 330 5 0.29654203975364
2016010101 331 5 0.3606921959261904
2016010101 332 5 0.9617038824842609
2016010101 333 5 0.3103700669261584
2016010101 334 5 0.4935170174690311
2016010101 335 5 0.34757561267296444
2016010101 336 5 0.1236918485545484
2016010101 337 5 0.24925258973306597
2016010101 338 5 0.4104821367672965
2016010101 339 5 0.3621850216936935
2016010101 340 6 0.3816099229918041
2016010101 341 6 0.9496667754823915
2016010101 342 6 0.5594605720642025
2016010101 343 6 0.8537860901562698
2016010101 344 6 0.74787202967909
2016010101 345 6 0.29699361421249604
2016010101 346 6 0.035943527086235605
2016010101 347 6 0.20106098029261277
2016010101 348 6 0.6589994525818863
2016010101 349 6 0.3851541727199762
2016010101 350 6 0.12262059605539744
2016010101 351 6 0.33383436408012057
2016010101 352 6 0.5087733967157267
2016010101 353 6 0.34978350071897446
2016010101 354 6 0.9171509423859847
2016010101 355 6 0.6395164525815664
2016010101 356 6 0.659637993918835
2016010101 357 6 0.5689746534857604
2016010101 358 6 0.03266513163571427
2016010101 359 6 0.5863675010868861
2016010101 360 9 0.8665167898047901
2016010101 361 9 0.7933960420424948
2016010101 362 9 0.8409667771425247
2016010101 363 9 0.9544310598825743
2016010101 364 9 0.36206869840549716
2016010101 365 9 0.253957983880155
2016010101 366 9 0.08496022679431525
2016010101 367 9 0.5483782518766319
2016010101 368 9 0.41440902281408365
2016010101 369 9 0.2947889064970717
2016010101 370 9 0.659477180019486
2016010101 371 9 0.9016744422830162
2016010101 372 9 0.4692828259677926
2016010101 373 9 0.4221974527778145
2016010101 374 9 0.26318360778150285
2016010101 375 9 0.10064081807071767
2016010101 376 9 0.7781802619858804
2016010101 377 9 0.529215767115243
2016010101 378 9 0.21094147073619007
2016010101 379 9 0.18894985078463877
2016010101 380 5 0.20683422198832369
2016010101 381 5 0.9506923735546904
2016010101 382 5 0.25734447316063913
2016010101 383 5 0.6439025323539892
2016010101 384 5 0.9099080819805052
2016010101 385 5 0.9331714165375404
2016010101 386 5 0.24979840404324272
2016010101 387 5 0.40270120064812764
2016010101 388 5 0.35895113537427137
2016010101 389 5 0.44814114645480074
2016010101 390 5 0.437368419580639
2016010101 391 5 0.2777496228001308
2016010101 392 5 0.09350862521048608
2016010101 393 5 0.10366624548706516
2016010101 394 5 0.8715309310993357
2016010101 395 5 0.8953111125914557
2016010101 396 5 0.9410866942183567
2016010101 397 5 0.16367286942347592
2016010101 398 5 0.6995415361957786
2016010101 399 5 0.7170527361072194
1 2016010101 0 0 0.6529403005319299
2 2016010101 1 0 0.9270214958987323
3 2016010101 2 0 0.6383273609981486
4 2016010101 3 0 0.8088289215633632
5 2016010101 4 0 0.8163864917598281
6 2016010101 5 0 0.38484848588530784
7 2016010101 6 0 0.7690020468986823
8 2016010101 7 0 0.6212078833139824
9 2016010101 8 0 0.4915825094949512
10 2016010101 9 0 0.688004059332008
11 2016010101 10 0 0.2536908275250508
12 2016010101 11 0 0.6618435914290263
13 2016010101 12 0 0.7892773595797635
14 2016010101 13 0 0.08857624134076048
15 2016010101 14 0 0.11992633801904151
16 2016010101 15 0 0.4959192800105586
17 2016010101 16 0 0.5564893557708243
18 2016010101 17 0 0.7755547456799993
19 2016010101 18 0 0.06420706406984311
20 2016010101 19 0 0.23085639094262378
21 2016010101 20 7 0.012013916725163498
22 2016010101 21 7 0.34077219818209503
23 2016010101 22 7 0.8445966884204918
24 2016010101 23 7 0.6466142718287953
25 2016010101 24 7 0.43959032391415487
26 2016010101 25 7 0.7768829233737787
27 2016010101 26 7 0.5899544206136442
28 2016010101 27 7 0.017782361911801825
29 2016010101 28 7 0.5431916165782864
30 2016010101 29 7 0.8218253174439416
31 2016010101 30 7 0.6372788284951859
32 2016010101 31 7 0.41403671834680933
33 2016010101 32 7 0.042508330730374855
34 2016010101 33 7 0.7416290691530969
35 2016010101 34 7 0.6990557213726277
36 2016010101 35 7 0.6302154208823348
37 2016010101 36 7 0.021053567154993402
38 2016010101 37 7 0.770280353784988
39 2016010101 38 7 0.08205576978448703
40 2016010101 39 7 0.2049660800682488
41 2016010101 40 5 0.08129304678049831
42 2016010101 41 5 0.17754747271638005
43 2016010101 42 5 0.8441702357096768
44 2016010101 43 5 0.9060464737257796
45 2016010101 44 5 0.5970595512785409
46 2016010101 45 5 0.843859346312315
47 2016010101 46 5 0.1649847892987305
48 2016010101 47 5 0.5279903496999094
49 2016010101 48 5 0.08758749830556767
50 2016010101 49 5 0.6088480522002063
51 2016010101 50 5 0.31079133043670004
52 2016010101 51 5 0.43062105356651226
53 2016010101 52 5 0.8542989852099488
54 2016010101 53 5 0.42443162807834045
55 2016010101 54 5 0.5020327054358468
56 2016010101 55 5 0.36453920012074237
57 2016010101 56 5 0.9884597580348689
58 2016010101 57 5 0.3770559586575706
59 2016010101 58 5 0.5989237303385875
60 2016010101 59 5 0.9926342802399872
61 2016010101 60 4 0.7813961047849703
62 2016010101 61 4 0.062171533805525425
63 2016010101 62 4 0.5284977503473608
64 2016010101 63 4 0.5924687065581794
65 2016010101 64 4 0.06305234223879275
66 2016010101 65 4 0.4959562731747129
67 2016010101 66 4 0.6336733165353365
68 2016010101 67 4 0.48860263540869875
69 2016010101 68 4 0.9387610528974851
70 2016010101 69 4 0.3391271652731308
71 2016010101 70 4 0.5962837638971421
72 2016010101 71 4 0.9190447294921896
73 2016010101 72 4 0.33082943548872534
74 2016010101 73 4 0.6236359023672029
75 2016010101 74 4 0.27134427542016615
76 2016010101 75 4 0.11665530238761901
77 2016010101 76 4 0.10469260335277608
78 2016010101 77 4 0.6824658847771211
79 2016010101 78 4 0.6131047630496756
80 2016010101 79 4 0.9838171536972515
81 2016010101 80 4 0.7484669110852756
82 2016010101 81 4 0.797620888697219
83 2016010101 82 4 0.7166673353657907
84 2016010101 83 4 0.46968710353176557
85 2016010101 84 4 0.3998491199643106
86 2016010101 85 4 0.6314883585976869
87 2016010101 86 4 0.8305617875577815
88 2016010101 87 4 0.6867651870284084
89 2016010101 88 4 0.9961677044887979
90 2016010101 89 4 0.19745766301180412
91 2016010101 90 4 0.2737652043079263
92 2016010101 91 4 0.2954503444695358
93 2016010101 92 4 0.6191902196833489
94 2016010101 93 4 0.6828058006233482
95 2016010101 94 4 0.7967115641510757
96 2016010101 95 4 0.5485460823820962
97 2016010101 96 4 0.4278132830938558
98 2016010101 97 4 0.32194908458166194
99 2016010101 98 4 0.07094920295725238
100 2016010101 99 4 0.4351839393889565
101 2016010101 100 1 0.6160833396611648
102 2016010101 101 1 0.4652667787803648
103 2016010101 102 1 0.5026953463132913
104 2016010101 103 1 0.4103237191034753
105 2016010101 104 1 0.3298554666697301
106 2016010101 105 1 0.16907537273919138
107 2016010101 106 1 0.6945260598989513
108 2016010101 107 1 0.917138530496438
109 2016010101 108 1 0.8810129148605083
110 2016010101 109 1 0.11845626048380542
111 2016010101 110 1 0.8848971155827816
112 2016010101 111 1 0.9969103769603667
113 2016010101 112 1 0.06274198529295416
114 2016010101 113 1 0.2923616769686519
115 2016010101 114 1 0.12621083638328634
116 2016010101 115 1 0.9655188575577313
117 2016010101 116 1 0.6074995164352884
118 2016010101 117 1 0.5501887988201414
119 2016010101 118 1 0.9406914128003497
120 2016010101 119 1 0.03264873659277656
121 2016010101 120 6 0.004852543443656487
122 2016010101 121 6 0.11161194329252788
123 2016010101 122 6 0.9403527002796559
124 2016010101 123 6 0.8951866979503953
125 2016010101 124 6 0.07629846897033454
126 2016010101 125 6 0.9898485014275873
127 2016010101 126 6 0.42827377712188075
128 2016010101 127 6 0.4274796777951825
129 2016010101 128 6 0.5569522946332676
130 2016010101 129 6 0.028195121559112635
131 2016010101 130 6 0.8599127909482382
132 2016010101 131 6 0.3516112293128607
133 2016010101 132 6 0.3888868189342449
134 2016010101 133 6 0.644589126160206
135 2016010101 134 6 0.7398741071492928
136 2016010101 135 6 0.1998479248216123
137 2016010101 136 6 0.8803215884594476
138 2016010101 137 6 0.7079531966558515
139 2016010101 138 6 0.7904290564015343
140 2016010101 139 6 0.475671788742007
141 2016010101 140 3 0.034708334899357096
142 2016010101 141 3 0.4134637419532796
143 2016010101 142 3 0.9757934592902832
144 2016010101 143 3 0.37422347371609666
145 2016010101 144 3 0.5904996168737154
146 2016010101 145 3 0.5883259679727514
147 2016010101 146 3 0.3380286015499171
148 2016010101 147 3 0.42174393035143043
149 2016010101 148 3 0.4764900074141757
150 2016010101 149 3 0.01864239537224921
151 2016010101 150 3 0.9124007087743986
152 2016010101 151 3 0.8951275235699193
153 2016010101 152 3 0.7037272142266654
154 2016010101 153 3 0.5685506209266902
155 2016010101 154 3 0.4104883958833594
156 2016010101 155 3 0.7794005551450208
157 2016010101 156 3 0.2879354697088996
158 2016010101 157 3 0.5243215707259823
159 2016010101 158 3 0.22238840286136063
160 2016010101 159 3 0.11336472553284738
161 2016010101 160 4 0.9800770037725316
162 2016010101 161 4 0.7628237317889158
163 2016010101 162 4 0.5355335935170453
164 2016010101 163 4 0.9676939330565402
165 2016010101 164 4 0.657825753108034
166 2016010101 165 4 0.9175328548944673
167 2016010101 166 4 0.6834666043257283
168 2016010101 167 4 0.08580759367942314
169 2016010101 168 4 0.3134740602060899
170 2016010101 169 4 0.3218818254752742
171 2016010101 170 4 0.6119297354994999
172 2016010101 171 4 0.07086832750773142
173 2016010101 172 4 0.2700864307032772
174 2016010101 173 4 0.7497315076673637
175 2016010101 174 4 0.4959921300968493
176 2016010101 175 4 0.09294825796093753
177 2016010101 176 4 0.4954515904444161
178 2016010101 177 4 0.8820366880191506
179 2016010101 178 4 0.17978298283728522
180 2016010101 179 4 0.05259679741524781
181 2016010101 180 5 0.4711892966981096
182 2016010101 181 5 0.5965662941715105
183 2016010101 182 5 0.4775201668966973
184 2016010101 183 5 0.05084576687030873
185 2016010101 184 5 0.16680660677593928
186 2016010101 185 5 0.9342287333653685
187 2016010101 186 5 0.8153161893769392
188 2016010101 187 5 0.9362517669519288
189 2016010101 188 5 0.10865218471840699
190 2016010101 189 5 0.44665378915111065
191 2016010101 190 5 0.8804454791937898
192 2016010101 191 5 0.20666928346935398
193 2016010101 192 5 0.7052479677101612
194 2016010101 193 5 0.5006205470200923
195 2016010101 194 5 0.23220501028575968
196 2016010101 195 5 0.11776507130391467
197 2016010101 196 5 0.592011744069295
198 2016010101 197 5 0.7089191450076786
199 2016010101 198 5 0.7269340552231702
200 2016010101 199 5 0.7049554871226075
201 2016010101 200 1 0.44078367400761076
202 2016010101 201 1 0.7715264806037321
203 2016010101 202 1 0.10151701902103971
204 2016010101 203 1 0.661891806135609
205 2016010101 204 1 0.23095745116331567
206 2016010101 205 1 0.46625278601359255
207 2016010101 206 1 0.5912486124707177
208 2016010101 207 1 0.963946871892115
209 2016010101 208 1 0.8172596270687692
210 2016010101 209 1 0.05745699928199144
211 2016010101 210 1 0.40612684342877337
212 2016010101 211 1 0.6330844777969608
213 2016010101 212 1 0.3148973406065705
214 2016010101 213 1 0.23230462811318175
215 2016010101 214 1 0.9960772952945196
216 2016010101 215 1 0.4581376339786414
217 2016010101 216 1 0.7181494575770677
218 2016010101 217 1 0.04277917580280799
219 2016010101 218 1 0.11137419446625674
220 2016010101 219 1 0.014716278313423037
221 2016010101 220 2 0.8988603727313186
222 2016010101 221 2 0.8192124226306603
223 2016010101 222 2 0.9304683598956597
224 2016010101 223 2 0.4375546733938238
225 2016010101 224 2 0.7676359685332207
226 2016010101 225 2 0.30977859822027964
227 2016010101 226 2 0.008595955287459267
228 2016010101 227 2 0.6790605343724216
229 2016010101 228 2 0.36949588946147993
230 2016010101 229 2 0.3826798435706562
231 2016010101 230 2 0.13836513167087128
232 2016010101 231 2 0.4451570472364902
233 2016010101 232 2 0.8944067771338549
234 2016010101 233 2 0.6068095655362902
235 2016010101 234 2 0.7084870042917992
236 2016010101 235 2 0.5867363290655241
237 2016010101 236 2 0.6903863088381504
238 2016010101 237 2 0.30984947936089124
239 2016010101 238 2 0.31561088279452665
240 2016010101 239 2 0.006286479849849758
241 2016010101 240 5 0.34397466439693725
242 2016010101 241 5 0.052476003295899964
243 2016010101 242 5 0.726106045184451
244 2016010101 243 5 0.01559115401009159
245 2016010101 244 5 0.9219270739836661
246 2016010101 245 5 0.5147917330760431
247 2016010101 246 5 0.41919804470784205
248 2016010101 247 5 0.4145101775865617
249 2016010101 248 5 0.34153038022995796
250 2016010101 249 5 0.9503817180587767
251 2016010101 250 5 0.6958354849389804
252 2016010101 251 5 0.46000811480536297
253 2016010101 252 5 0.18379911670616378
254 2016010101 253 5 0.20973108758556713
255 2016010101 254 5 0.5979201603287885
256 2016010101 255 5 0.5552419362393491
257 2016010101 256 5 0.10996555307297629
258 2016010101 257 5 0.3591453585622102
259 2016010101 258 5 0.06098055111386691
260 2016010101 259 5 0.5227270267924988
261 2016010101 260 0 0.8492702312836989
262 2016010101 261 0 0.5941242001151825
263 2016010101 262 0 0.6840733026822607
264 2016010101 263 0 0.8109777000249937
265 2016010101 264 0 0.8599286045013937
266 2016010101 265 0 0.7828806670746145
267 2016010101 266 0 0.8102260971867188
268 2016010101 267 0 0.38306094770114385
269 2016010101 268 0 0.7093609268723879
270 2016010101 269 0 0.4806583187577358
271 2016010101 270 0 0.5766489331365172
272 2016010101 271 0 0.7565067278238041
273 2016010101 272 0 0.8262768908267573
274 2016010101 273 0 0.7951015619138146
275 2016010101 274 0 0.1938448910588796
276 2016010101 275 0 0.8884608583839426
277 2016010101 276 0 0.7046203516594505
278 2016010101 277 0 0.5951074760704175
279 2016010101 278 0 0.38207409719784036
280 2016010101 279 0 0.2445271560830221
281 2016010101 280 7 0.6032919624054952
282 2016010101 281 7 0.1473220747987144
283 2016010101 282 7 0.38396643099307604
284 2016010101 283 7 0.4431561135554619
285 2016010101 284 7 0.896578318093225
286 2016010101 285 7 0.6729206122043515
287 2016010101 286 7 0.8498821349478478
288 2016010101 287 7 0.48231924024179784
289 2016010101 288 7 0.005379480238994816
290 2016010101 289 7 0.8017936717647264
291 2016010101 290 7 0.08193232952990348
292 2016010101 291 7 0.3422943366454193
293 2016010101 292 7 0.6081556855207957
294 2016010101 293 7 0.641193222941943
295 2016010101 294 7 0.3716858024654186
296 2016010101 295 7 0.0011169303830090849
297 2016010101 296 7 0.4698784438339285
298 2016010101 297 7 0.958198841287214
299 2016010101 298 7 0.730945048929339
300 2016010101 299 7 0.1858601884405512
301 2016010101 300 5 0.1020825694779407
302 2016010101 301 5 0.5742385074938443
303 2016010101 302 5 0.9846817584978909
304 2016010101 303 5 0.3858694391491331
305 2016010101 304 5 0.9822246873202894
306 2016010101 305 5 0.39822015482143314
307 2016010101 306 5 0.6575924137957005
308 2016010101 307 5 0.02359557062746842
309 2016010101 308 5 0.42059510563039115
310 2016010101 309 5 0.5970764856116284
311 2016010101 310 5 0.2817399870096221
312 2016010101 311 5 0.5334091165258412
313 2016010101 312 5 0.31199853410796585
314 2016010101 313 5 0.3156991306990594
315 2016010101 314 5 0.9560285139855889
316 2016010101 315 5 0.7846951771498516
317 2016010101 316 5 0.009731486767097897
318 2016010101 317 5 0.22625857375026215
319 2016010101 318 5 0.8580955944724618
320 2016010101 319 5 0.9622008926137687
321 2016010101 320 5 0.023872302930851297
322 2016010101 321 5 0.3580981601151092
323 2016010101 322 5 0.9120442264954038
324 2016010101 323 5 0.5968491989965334
325 2016010101 324 5 0.5028516120506729
326 2016010101 325 5 0.30590552314314
327 2016010101 326 5 0.5566430714368423
328 2016010101 327 5 0.6441099124064397
329 2016010101 328 5 0.8765287851559298
330 2016010101 329 5 0.38405928947408385
331 2016010101 330 5 0.29654203975364
332 2016010101 331 5 0.3606921959261904
333 2016010101 332 5 0.9617038824842609
334 2016010101 333 5 0.3103700669261584
335 2016010101 334 5 0.4935170174690311
336 2016010101 335 5 0.34757561267296444
337 2016010101 336 5 0.1236918485545484
338 2016010101 337 5 0.24925258973306597
339 2016010101 338 5 0.4104821367672965
340 2016010101 339 5 0.3621850216936935
341 2016010101 340 6 0.3816099229918041
342 2016010101 341 6 0.9496667754823915
343 2016010101 342 6 0.5594605720642025
344 2016010101 343 6 0.8537860901562698
345 2016010101 344 6 0.74787202967909
346 2016010101 345 6 0.29699361421249604
347 2016010101 346 6 0.035943527086235605
348 2016010101 347 6 0.20106098029261277
349 2016010101 348 6 0.6589994525818863
350 2016010101 349 6 0.3851541727199762
351 2016010101 350 6 0.12262059605539744
352 2016010101 351 6 0.33383436408012057
353 2016010101 352 6 0.5087733967157267
354 2016010101 353 6 0.34978350071897446
355 2016010101 354 6 0.9171509423859847
356 2016010101 355 6 0.6395164525815664
357 2016010101 356 6 0.659637993918835
358 2016010101 357 6 0.5689746534857604
359 2016010101 358 6 0.03266513163571427
360 2016010101 359 6 0.5863675010868861
361 2016010101 360 9 0.8665167898047901
362 2016010101 361 9 0.7933960420424948
363 2016010101 362 9 0.8409667771425247
364 2016010101 363 9 0.9544310598825743
365 2016010101 364 9 0.36206869840549716
366 2016010101 365 9 0.253957983880155
367 2016010101 366 9 0.08496022679431525
368 2016010101 367 9 0.5483782518766319
369 2016010101 368 9 0.41440902281408365
370 2016010101 369 9 0.2947889064970717
371 2016010101 370 9 0.659477180019486
372 2016010101 371 9 0.9016744422830162
373 2016010101 372 9 0.4692828259677926
374 2016010101 373 9 0.4221974527778145
375 2016010101 374 9 0.26318360778150285
376 2016010101 375 9 0.10064081807071767
377 2016010101 376 9 0.7781802619858804
378 2016010101 377 9 0.529215767115243
379 2016010101 378 9 0.21094147073619007
380 2016010101 379 9 0.18894985078463877
381 2016010101 380 5 0.20683422198832369
382 2016010101 381 5 0.9506923735546904
383 2016010101 382 5 0.25734447316063913
384 2016010101 383 5 0.6439025323539892
385 2016010101 384 5 0.9099080819805052
386 2016010101 385 5 0.9331714165375404
387 2016010101 386 5 0.24979840404324272
388 2016010101 387 5 0.40270120064812764
389 2016010101 388 5 0.35895113537427137
390 2016010101 389 5 0.44814114645480074
391 2016010101 390 5 0.437368419580639
392 2016010101 391 5 0.2777496228001308
393 2016010101 392 5 0.09350862521048608
394 2016010101 393 5 0.10366624548706516
395 2016010101 394 5 0.8715309310993357
396 2016010101 395 5 0.8953111125914557
397 2016010101 396 5 0.9410866942183567
398 2016010101 397 5 0.16367286942347592
399 2016010101 398 5 0.6995415361957786
400 2016010101 399 5 0.7170527361072194

View File

@ -31,7 +31,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -28,7 +28,7 @@
<parent>
<artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -29,7 +29,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -30,7 +30,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -28,7 +28,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -23,7 +23,7 @@
<parent>
<artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -30,7 +30,7 @@
<parent>
<artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -23,7 +23,7 @@
<parent>
<artifactId>druid</artifactId>
<groupId>org.apache.druid</groupId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -29,7 +29,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -30,7 +30,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -30,7 +30,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -32,7 +32,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -30,7 +30,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -30,7 +30,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
@ -71,6 +71,7 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.compile.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>commons-cli</groupId>

View File

@ -29,7 +29,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
@ -151,6 +151,130 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.compile.version}</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
</exclusion>
<exclusion>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</exclusion>
<exclusion>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>javax.ws.rs</groupId>
<artifactId>jsr311-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
</exclusion>
<exclusion>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty-util</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.curator</groupId>
<artifactId>curator-client</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</exclusion>
<exclusion>
<groupId>net.java.dev.jets3t</groupId>
<artifactId>jets3t</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
<exclusion>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
</exclusion>
<exclusion>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aws</artifactId>
@ -164,6 +288,13 @@
</dependency>
<!-- Tests -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.compile.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
@ -189,13 +320,6 @@
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.compile.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>

View File

@ -28,7 +28,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -82,7 +82,7 @@ public class BucketsPostAggregator extends ApproximateHistogramPostAggregator
@JsonProperty
public float getOffset()
{
return bucketSize;
return offset;
}
@Override

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.aggregation.histogram;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.junit.Assert;
import org.junit.Test;
public class BucketsPostAggregatorTest
{
@Test
public void testSerde() throws Exception
{
BucketsPostAggregator aggregator1 =
new BucketsPostAggregator("buckets_post_aggregator", "test_field", 2f, 4f);
DefaultObjectMapper mapper = new DefaultObjectMapper();
BucketsPostAggregator aggregator2 = mapper.readValue(
mapper.writeValueAsString(aggregator1),
BucketsPostAggregator.class
);
Assert.assertEquals(aggregator1.getBucketSize(), aggregator2.getBucketSize(), 0.0001);
Assert.assertEquals(aggregator1.getOffset(), aggregator2.getOffset(), 0.0001);
Assert.assertArrayEquals(aggregator1.getCacheKey(), aggregator2.getCacheKey());
Assert.assertEquals(aggregator1.getDependentFields(), aggregator2.getDependentFields());
}
}

View File

@ -29,7 +29,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -30,6 +30,7 @@ import java.util.List;
/**
*/
@Deprecated
public class KafkaEightDruidModule implements DruidModule
{
@Override

View File

@ -47,7 +47,9 @@ import java.util.Properties;
import java.util.Set;
/**
* This class is deprecated and kafka-eight module should be removed completely
*/
@Deprecated
public class KafkaEightFirehoseFactory implements FirehoseFactory<InputRowParser<ByteBuffer>>
{
private static final Logger log = new Logger(KafkaEightFirehoseFactory.class);

View File

@ -29,7 +29,7 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

View File

@ -29,12 +29,12 @@
<parent>
<groupId>org.apache.druid</groupId>
<artifactId>druid</artifactId>
<version>0.14.0-incubating-SNAPSHOT</version>
<version>0.15.0-incubating-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<properties>
<apache.kafka.version>0.10.2.2</apache.kafka.version>
<apache.kafka.version>2.1.0</apache.kafka.version>
</properties>
<dependencies>

View File

@ -108,6 +108,7 @@ public class KafkaIndexTask extends SeekableStreamIndexTask<Integer, Long>
props.setProperty("auto.offset.reset", "none");
props.setProperty("key.deserializer", ByteArrayDeserializer.class.getName());
props.setProperty("value.deserializer", ByteArrayDeserializer.class.getName());
props.setProperty("isolation.level", "read_committed");
return new KafkaConsumer<>(props);
}

View File

@ -45,8 +45,7 @@ public class KafkaIndexTaskIOConfig extends SeekableStreamIndexTaskIOConfig<Inte
@JsonProperty("pollTimeout") Long pollTimeout,
@JsonProperty("useTransaction") Boolean useTransaction,
@JsonProperty("minimumMessageTime") DateTime minimumMessageTime,
@JsonProperty("maximumMessageTime") DateTime maximumMessageTime,
@JsonProperty("skipOffsetGaps") Boolean skipOffsetGaps
@JsonProperty("maximumMessageTime") DateTime maximumMessageTime
)
{
super(
@ -57,7 +56,6 @@ public class KafkaIndexTaskIOConfig extends SeekableStreamIndexTaskIOConfig<Inte
useTransaction,
minimumMessageTime,
maximumMessageTime,
skipOffsetGaps,
null
);
@ -100,7 +98,6 @@ public class KafkaIndexTaskIOConfig extends SeekableStreamIndexTaskIOConfig<Inte
", useTransaction=" + isUseTransaction() +
", minimumMessageTime=" + getMinimumMessageTime() +
", maximumMessageTime=" + getMaximumMessageTime() +
", skipOffsetGaps=" + isSkipOffsetGaps() +
'}';
}
}

View File

@ -36,6 +36,7 @@ import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import javax.annotation.Nonnull;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -110,7 +111,7 @@ public class KafkaRecordSupplier implements RecordSupplier<Integer, Long>
public List<OrderedPartitionableRecord<Integer, Long>> poll(long timeout)
{
List<OrderedPartitionableRecord<Integer, Long>> polledRecords = new ArrayList<>();
for (ConsumerRecord<byte[], byte[]> record : consumer.poll(timeout)) {
for (ConsumerRecord<byte[], byte[]> record : consumer.poll(Duration.ofMillis(timeout))) {
polledRecords.add(new OrderedPartitionableRecord<>(
record.topic(),
record.partition(),

View File

@ -413,23 +413,6 @@ public class LegacyKafkaIndexTaskRunner extends SeekableStreamIndexTaskRunner<In
}
if (record.offset() < endOffsets.get(record.partition())) {
if (record.offset() != nextOffsets.get(record.partition())) {
if (ioConfig.isSkipOffsetGaps()) {
log.warn(
"Skipped to offset[%,d] after offset[%,d] in partition[%d].",
record.offset(),
nextOffsets.get(record.partition()),
record.partition()
);
} else {
throw new ISE(
"WTF?! Got offset[%,d] after offset[%,d] in partition[%d].",
record.offset(),
nextOffsets.get(record.partition()),
record.partition()
);
}
}
try {
final byte[] valueBytes = record.value();
@ -489,7 +472,7 @@ public class LegacyKafkaIndexTaskRunner extends SeekableStreamIndexTaskRunner<In
nextOffsets.put(record.partition(), record.offset() + 1);
}
if (nextOffsets.get(record.partition()).equals(endOffsets.get(record.partition()))
if (nextOffsets.get(record.partition()) >= (endOffsets.get(record.partition()))
&& assignment.remove(record.partition())) {
log.info("Finished reading topic[%s], partition[%,d].", record.topic(), record.partition());
KafkaIndexTask.assignPartitions(consumer, topic, assignment);

View File

@ -212,8 +212,7 @@ public class KafkaSupervisor extends SeekableStreamSupervisor<Integer, Long>
kafkaIoConfig.getPollTimeout(),
true,
minimumMessageTime,
maximumMessageTime,
kafkaIoConfig.isSkipOffsetGaps()
maximumMessageTime
);
}

View File

@ -38,7 +38,7 @@ public class KafkaSupervisorIOConfig extends SeekableStreamSupervisorIOConfig
private final Map<String, Object> consumerProperties;
private final long pollTimeout;
private final boolean skipOffsetGaps;
@JsonCreator
public KafkaSupervisorIOConfig(
@ -53,8 +53,7 @@ public class KafkaSupervisorIOConfig extends SeekableStreamSupervisorIOConfig
@JsonProperty("useEarliestOffset") Boolean useEarliestOffset,
@JsonProperty("completionTimeout") Period completionTimeout,
@JsonProperty("lateMessageRejectionPeriod") Period lateMessageRejectionPeriod,
@JsonProperty("earlyMessageRejectionPeriod") Period earlyMessageRejectionPeriod,
@JsonProperty("skipOffsetGaps") Boolean skipOffsetGaps
@JsonProperty("earlyMessageRejectionPeriod") Period earlyMessageRejectionPeriod
)
{
super(
@ -76,7 +75,6 @@ public class KafkaSupervisorIOConfig extends SeekableStreamSupervisorIOConfig
StringUtils.format("consumerProperties must contain entry for [%s]", BOOTSTRAP_SERVERS_KEY)
);
this.pollTimeout = pollTimeout != null ? pollTimeout : DEFAULT_POLL_TIMEOUT_MILLIS;
this.skipOffsetGaps = skipOffsetGaps != null ? skipOffsetGaps : false;
}
@JsonProperty
@ -103,12 +101,6 @@ public class KafkaSupervisorIOConfig extends SeekableStreamSupervisorIOConfig
return isUseEarliestSequenceNumber();
}
@JsonProperty
public boolean isSkipOffsetGaps()
{
return skipOffsetGaps;
}
@Override
public String toString()
{
@ -125,7 +117,6 @@ public class KafkaSupervisorIOConfig extends SeekableStreamSupervisorIOConfig
", completionTimeout=" + getCompletionTimeout() +
", earlyMessageRejectionPeriod=" + getEarlyMessageRejectionPeriod() +
", lateMessageRejectionPeriod=" + getLateMessageRejectionPeriod() +
", skipOffsetGaps=" + skipOffsetGaps +
'}';
}

View File

@ -77,7 +77,6 @@ public class KafkaIOConfigTest
Assert.assertTrue(config.isUseTransaction());
Assert.assertFalse("minimumMessageTime", config.getMinimumMessageTime().isPresent());
Assert.assertFalse("maximumMessageTime", config.getMaximumMessageTime().isPresent());
Assert.assertFalse("skipOffsetGaps", config.isSkipOffsetGaps());
Assert.assertEquals(Collections.EMPTY_SET, config.getExclusiveStartSequenceNumberPartitions());
}
@ -93,8 +92,7 @@ public class KafkaIOConfigTest
+ " \"consumerProperties\": {\"bootstrap.servers\":\"localhost:9092\"},\n"
+ " \"useTransaction\": false,\n"
+ " \"minimumMessageTime\": \"2016-05-31T12:00Z\",\n"
+ " \"maximumMessageTime\": \"2016-05-31T14:00Z\",\n"
+ " \"skipOffsetGaps\": true\n"
+ " \"maximumMessageTime\": \"2016-05-31T14:00Z\"\n"
+ "}";
KafkaIndexTaskIOConfig config = (KafkaIndexTaskIOConfig) mapper.readValue(
@ -115,9 +113,7 @@ public class KafkaIOConfigTest
Assert.assertFalse(config.isUseTransaction());
Assert.assertEquals(DateTimes.of("2016-05-31T12:00Z"), config.getMinimumMessageTime().get());
Assert.assertEquals(DateTimes.of("2016-05-31T14:00Z"), config.getMaximumMessageTime().get());
Assert.assertTrue("skipOffsetGaps", config.isSkipOffsetGaps());
Assert.assertEquals(Collections.EMPTY_SET, config.getExclusiveStartSequenceNumberPartitions());
}
@Test

Some files were not shown because too many files have changed in this diff Show More