mirror of https://github.com/apache/druid.git
Merge pull request #699 from metamx/postprocessing-operators
query post-processing operators
This commit is contained in:
commit
28e3191503
|
@ -0,0 +1,32 @@
|
||||||
|
/*
|
||||||
|
* Druid - a distributed column store.
|
||||||
|
* Copyright (C) 2014 Metamarkets Group Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.druid.query;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||||
|
|
||||||
|
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
|
||||||
|
@JsonSubTypes(value = {
|
||||||
|
@JsonSubTypes.Type(name = "timewarp", value = TimewarpOperator.class)
|
||||||
|
})
|
||||||
|
public interface PostProcessingOperator<T>
|
||||||
|
{
|
||||||
|
public QueryRunner<T> postProcess(QueryRunner<T> baseQueryRunner);
|
||||||
|
}
|
|
@ -0,0 +1,152 @@
|
||||||
|
/*
|
||||||
|
* Druid - a distributed column store.
|
||||||
|
* Copyright (C) 2014 Metamarkets Group Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.druid.query;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import com.google.common.base.Function;
|
||||||
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
import com.metamx.common.guava.Sequence;
|
||||||
|
import com.metamx.common.guava.Sequences;
|
||||||
|
import io.druid.data.input.MapBasedRow;
|
||||||
|
import io.druid.query.spec.MultipleIntervalSegmentSpec;
|
||||||
|
import io.druid.query.timeboundary.TimeBoundaryQuery;
|
||||||
|
import io.druid.query.timeboundary.TimeBoundaryResultValue;
|
||||||
|
import org.joda.time.DateTime;
|
||||||
|
import org.joda.time.Interval;
|
||||||
|
import org.joda.time.Period;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* TimewarpOperator is an example post-processing operator that maps current time
|
||||||
|
* to the latest period ending withing the specified data interval and truncates
|
||||||
|
* the query interval to discard data that would be mapped to the future.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class TimewarpOperator<T> implements PostProcessingOperator<T>
|
||||||
|
{
|
||||||
|
private final Interval dataInterval;
|
||||||
|
private final long periodMillis;
|
||||||
|
private final long originMillis;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param dataInterval interval containing the actual data
|
||||||
|
* @param period time will be offset by a multiple of the given period
|
||||||
|
* until there is at least a full period ending within the data interval
|
||||||
|
* @param origin origin to be used to align time periods
|
||||||
|
* (e.g. to determine on what day of the week a weekly period starts)
|
||||||
|
*/
|
||||||
|
@JsonCreator
|
||||||
|
public TimewarpOperator(
|
||||||
|
@JsonProperty("dataInterval") Interval dataInterval,
|
||||||
|
@JsonProperty("period") Period period,
|
||||||
|
@JsonProperty("origin") DateTime origin
|
||||||
|
)
|
||||||
|
{
|
||||||
|
this.originMillis = origin.getMillis();
|
||||||
|
this.dataInterval = dataInterval;
|
||||||
|
// this will fail for periods that do not map to millis (e.g. P1M)
|
||||||
|
this.periodMillis = period.toStandardDuration().getMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public QueryRunner<T> postProcess(QueryRunner<T> baseQueryRunner)
|
||||||
|
{
|
||||||
|
return postProcess(baseQueryRunner, DateTime.now().getMillis());
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueryRunner<T> postProcess(final QueryRunner<T> baseRunner, final long now)
|
||||||
|
{
|
||||||
|
return new QueryRunner<T>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Sequence<T> run(Query<T> query)
|
||||||
|
{
|
||||||
|
final long offset = computeOffset(now);
|
||||||
|
|
||||||
|
final Interval interval = query.getIntervals().get(0);
|
||||||
|
final Interval modifiedInterval = new Interval(
|
||||||
|
interval.getStartMillis() + offset,
|
||||||
|
Math.min(interval.getEndMillis() + offset, now + offset)
|
||||||
|
);
|
||||||
|
return Sequences.map(
|
||||||
|
baseRunner.run(
|
||||||
|
query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Arrays.asList(modifiedInterval)))
|
||||||
|
),
|
||||||
|
new Function<T, T>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public T apply(T input)
|
||||||
|
{
|
||||||
|
if (input instanceof Result) {
|
||||||
|
Result res = (Result) input;
|
||||||
|
Object value = res.getValue();
|
||||||
|
if (value instanceof TimeBoundaryResultValue) {
|
||||||
|
TimeBoundaryResultValue boundary = (TimeBoundaryResultValue) value;
|
||||||
|
value = new TimeBoundaryResultValue(
|
||||||
|
ImmutableMap.of(
|
||||||
|
TimeBoundaryQuery.MIN_TIME, boundary.getMinTime().minus(offset),
|
||||||
|
TimeBoundaryQuery.MAX_TIME, new DateTime(Math.min(boundary.getMaxTime().getMillis() - offset, now))
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return (T) new Result(res.getTimestamp().minus(offset), value);
|
||||||
|
} else if (input instanceof MapBasedRow) {
|
||||||
|
MapBasedRow row = (MapBasedRow) input;
|
||||||
|
return (T) new MapBasedRow(row.getTimestamp().minus(offset), row.getEvent());
|
||||||
|
}
|
||||||
|
|
||||||
|
// default to noop for unknown result types
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map time t into the last `period` ending within `dataInterval`
|
||||||
|
*
|
||||||
|
* @param t
|
||||||
|
* @return the offset between the mapped time and time t
|
||||||
|
*/
|
||||||
|
protected long computeOffset(final long t)
|
||||||
|
{
|
||||||
|
// start is the beginning of the last period ending within dataInterval
|
||||||
|
long start = dataInterval.getEndMillis() - periodMillis;
|
||||||
|
long startOffset = start % periodMillis - originMillis % periodMillis;
|
||||||
|
if(startOffset < 0) {
|
||||||
|
startOffset += periodMillis;
|
||||||
|
};
|
||||||
|
start -= startOffset;
|
||||||
|
|
||||||
|
// tOffset is the offset time t within the last period
|
||||||
|
long tOffset = t % periodMillis - originMillis % periodMillis;
|
||||||
|
if(tOffset < 0) {
|
||||||
|
tOffset += periodMillis;
|
||||||
|
}
|
||||||
|
tOffset += start;
|
||||||
|
return tOffset - t;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,126 @@
|
||||||
|
/*
|
||||||
|
* Druid - a distributed column store.
|
||||||
|
* Copyright (C) 2012, 2013, 2014 Metamarkets Group Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.druid.query;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.metamx.common.guava.Sequence;
|
||||||
|
import com.metamx.common.guava.Sequences;
|
||||||
|
import io.druid.query.aggregation.AggregatorFactory;
|
||||||
|
import io.druid.query.aggregation.CountAggregatorFactory;
|
||||||
|
import io.druid.query.timeseries.TimeseriesResultValue;
|
||||||
|
import org.joda.time.DateTime;
|
||||||
|
import org.joda.time.Interval;
|
||||||
|
import org.joda.time.Period;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class TimewarpOperatorTest
|
||||||
|
{
|
||||||
|
TimewarpOperator<Result<TimeseriesResultValue>> testOperator = new TimewarpOperator<>(
|
||||||
|
new Interval(new DateTime("2014-01-01"), new DateTime("2014-01-15")),
|
||||||
|
new Period("P1W"),
|
||||||
|
new DateTime("2014-01-06") // align on Monday
|
||||||
|
);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testComputeOffset() throws Exception
|
||||||
|
{
|
||||||
|
{
|
||||||
|
final DateTime t = new DateTime("2014-01-23");
|
||||||
|
final DateTime tOffset = new DateTime("2014-01-09");
|
||||||
|
|
||||||
|
Assert.assertEquals(
|
||||||
|
new DateTime(tOffset),
|
||||||
|
t.plus(testOperator.computeOffset(t.getMillis()))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
final DateTime t = new DateTime("2014-08-02");
|
||||||
|
final DateTime tOffset = new DateTime("2014-01-11");
|
||||||
|
|
||||||
|
Assert.assertEquals(
|
||||||
|
new DateTime(tOffset),
|
||||||
|
t.plus(testOperator.computeOffset(t.getMillis()))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPostProcess() throws Exception
|
||||||
|
{
|
||||||
|
QueryRunner<Result<TimeseriesResultValue>> queryRunner = testOperator.postProcess(
|
||||||
|
new QueryRunner<Result<TimeseriesResultValue>>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public Sequence<Result<TimeseriesResultValue>> run(Query<Result<TimeseriesResultValue>> query)
|
||||||
|
{
|
||||||
|
return Sequences.simple(
|
||||||
|
ImmutableList.of(
|
||||||
|
new Result<>(
|
||||||
|
new DateTime(new DateTime("2014-01-09")),
|
||||||
|
new TimeseriesResultValue(ImmutableMap.<String, Object>of("metric", 2))
|
||||||
|
),
|
||||||
|
new Result<>(
|
||||||
|
new DateTime(new DateTime("2014-01-11")),
|
||||||
|
new TimeseriesResultValue(ImmutableMap.<String, Object>of("metric", 3))
|
||||||
|
),
|
||||||
|
new Result<>(
|
||||||
|
query.getIntervals().get(0).getEnd(),
|
||||||
|
new TimeseriesResultValue(ImmutableMap.<String, Object>of("metric", 5))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
new DateTime("2014-08-02").getMillis()
|
||||||
|
);
|
||||||
|
|
||||||
|
final Query<Result<TimeseriesResultValue>> query =
|
||||||
|
Druids.newTimeseriesQueryBuilder()
|
||||||
|
.dataSource("dummy")
|
||||||
|
.intervals("2014-07-31/2014-08-05")
|
||||||
|
.aggregators(Arrays.<AggregatorFactory>asList(new CountAggregatorFactory("count")))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
Assert.assertEquals(
|
||||||
|
Lists.newArrayList(
|
||||||
|
new Result<>(
|
||||||
|
new DateTime("2014-07-31"),
|
||||||
|
new TimeseriesResultValue(ImmutableMap.<String, Object>of("metric", 2))
|
||||||
|
),
|
||||||
|
new Result<>(
|
||||||
|
new DateTime("2014-08-02"),
|
||||||
|
new TimeseriesResultValue(ImmutableMap.<String, Object>of("metric", 3))
|
||||||
|
),
|
||||||
|
new Result<>(
|
||||||
|
new DateTime("2014-08-02"),
|
||||||
|
new TimeseriesResultValue(ImmutableMap.<String, Object>of("metric", 5))
|
||||||
|
)
|
||||||
|
),
|
||||||
|
Sequences.toList(queryRunner.run(query), Lists.<Result<TimeseriesResultValue>>newArrayList())
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,6 +19,8 @@
|
||||||
|
|
||||||
package io.druid.server;
|
package io.druid.server;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.base.Function;
|
import com.google.common.base.Function;
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.metamx.emitter.service.ServiceEmitter;
|
import com.metamx.emitter.service.ServiceEmitter;
|
||||||
|
@ -26,6 +28,7 @@ import com.metamx.emitter.service.ServiceMetricEvent;
|
||||||
import io.druid.client.CachingClusteredClient;
|
import io.druid.client.CachingClusteredClient;
|
||||||
import io.druid.query.FinalizeResultsQueryRunner;
|
import io.druid.query.FinalizeResultsQueryRunner;
|
||||||
import io.druid.query.MetricsEmittingQueryRunner;
|
import io.druid.query.MetricsEmittingQueryRunner;
|
||||||
|
import io.druid.query.PostProcessingOperator;
|
||||||
import io.druid.query.Query;
|
import io.druid.query.Query;
|
||||||
import io.druid.query.QueryRunner;
|
import io.druid.query.QueryRunner;
|
||||||
import io.druid.query.QuerySegmentWalker;
|
import io.druid.query.QuerySegmentWalker;
|
||||||
|
@ -44,17 +47,20 @@ public class ClientQuerySegmentWalker implements QuerySegmentWalker
|
||||||
private final ServiceEmitter emitter;
|
private final ServiceEmitter emitter;
|
||||||
private final CachingClusteredClient baseClient;
|
private final CachingClusteredClient baseClient;
|
||||||
private final QueryToolChestWarehouse warehouse;
|
private final QueryToolChestWarehouse warehouse;
|
||||||
|
private final ObjectMapper objectMapper;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public ClientQuerySegmentWalker(
|
public ClientQuerySegmentWalker(
|
||||||
ServiceEmitter emitter,
|
ServiceEmitter emitter,
|
||||||
CachingClusteredClient baseClient,
|
CachingClusteredClient baseClient,
|
||||||
QueryToolChestWarehouse warehouse
|
QueryToolChestWarehouse warehouse,
|
||||||
|
ObjectMapper objectMapper
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
this.emitter = emitter;
|
this.emitter = emitter;
|
||||||
this.baseClient = baseClient;
|
this.baseClient = baseClient;
|
||||||
this.warehouse = warehouse;
|
this.warehouse = warehouse;
|
||||||
|
this.objectMapper = objectMapper;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -69,10 +75,10 @@ public class ClientQuerySegmentWalker implements QuerySegmentWalker
|
||||||
return makeRunner(query);
|
return makeRunner(query);
|
||||||
}
|
}
|
||||||
|
|
||||||
private <T> FinalizeResultsQueryRunner<T> makeRunner(final Query<T> query)
|
private <T> QueryRunner<T> makeRunner(final Query<T> query)
|
||||||
{
|
{
|
||||||
final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
|
final QueryToolChest<T, Query<T>> toolChest = warehouse.getToolChest(query);
|
||||||
return new FinalizeResultsQueryRunner<T>(
|
final FinalizeResultsQueryRunner<T> baseRunner = new FinalizeResultsQueryRunner<T>(
|
||||||
toolChest.postMergeQueryDecoration(
|
toolChest.postMergeQueryDecoration(
|
||||||
toolChest.mergeResults(
|
toolChest.mergeResults(
|
||||||
new UnionQueryRunner<T>(
|
new UnionQueryRunner<T>(
|
||||||
|
@ -94,5 +100,15 @@ public class ClientQuerySegmentWalker implements QuerySegmentWalker
|
||||||
),
|
),
|
||||||
toolChest
|
toolChest
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
final PostProcessingOperator<T> postProcessing = objectMapper.convertValue(
|
||||||
|
query.getContext().get("postProcessing"),
|
||||||
|
new TypeReference<PostProcessingOperator<T>>() {
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
return postProcessing != null ?
|
||||||
|
postProcessing.postProcess(baseRunner) : baseRunner;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue