Add caching support to join queries (#10366)

* Proposed changes for making joins cacheable

* Add unit tests

* Fix tests

* simplify logic

* Pull empty byte array logic out of CachingQueryRunner

* remove useless null check

* Minor refactor

* Fix tests

* Fix segment caching on Broker

* Move join cache key computation in Broker

Move join cache key computation in Broker from ResultLevelCachingQueryRunner to CachingClusteredClient

* Fix compilation

* Review comments

* Add more tests

* Fix inspection errors

* Pushed condition analysis to JoinableFactory

* review comments

* Disable join caching for broker and add prefix key to BroadcastSegmentIndexedTable

* Remove commented lines

* Fix populateCache

* Disable caching for selective datasources

Refactored the code so that we can decide at the data source level, whether to enable cache for broker or data nodes
This commit is contained in:
Abhishek Agarwal 2020-10-10 06:12:30 +05:30 committed by GitHub
parent 4c78b514c9
commit 4d2a92f46a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
58 changed files with 1685 additions and 496 deletions

View File

@ -25,6 +25,7 @@ import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import org.apache.druid.client.CachingClusteredClient;
@ -103,6 +104,7 @@ import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.segment.join.MapJoinableFactory;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.coordination.ServerType;
import org.apache.druid.timeline.DataSegment;
@ -339,7 +341,8 @@ public class CachingClusteredClientBenchmark
new DruidHttpClientConfig(),
processingConfig,
forkJoinPool,
QueryStackTests.DEFAULT_NOOP_SCHEDULER
QueryStackTests.DEFAULT_NOOP_SCHEDULER,
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of())
);
}

View File

@ -366,7 +366,8 @@ public class MovingAverageQueryTest extends InitializedNullHandlingTest
}
},
ForkJoinPool.commonPool(),
QueryStackTests.DEFAULT_NOOP_SCHEDULER
QueryStackTests.DEFAULT_NOOP_SCHEDULER,
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of())
);
ClientQuerySegmentWalker walker = new ClientQuerySegmentWalker(

View File

@ -44,6 +44,7 @@ import org.apache.druid.server.initialization.ServerConfig;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
@ -104,7 +105,8 @@ public class ServerManagerForQueryRetryTest extends ServerManager
QueryToolChest<T, Query<T>> toolChest,
VersionedIntervalTimeline<String, ReferenceCountingSegment> timeline,
Function<SegmentReference, SegmentReference> segmentMapFn,
AtomicLong cpuTimeAccumulator
AtomicLong cpuTimeAccumulator,
Optional<byte[]> cacheKeyPrefix
)
{
if (query.getContextBoolean(QUERY_RETRY_TEST_CONTEXT_KEY, false)) {
@ -135,7 +137,8 @@ public class ServerManagerForQueryRetryTest extends ServerManager
toolChest,
timeline,
segmentMapFn,
cpuTimeAccumulator
cpuTimeAccumulator,
cacheKeyPrefix
);
}
}

View File

@ -59,14 +59,10 @@ public interface DataSource
/**
* Returns true if queries on this dataSource are cacheable at both the result level and per-segment level.
* Currently, dataSources that modify the behavior of per-segment processing are not cacheable (like 'join').
* Nor are dataSources that do not actually reference segments (like 'inline'), since cache keys are always based
* on segment identifiers.
*
* Note: Ideally, queries on 'join' datasources _would_ be cacheable, but we cannot currently do this due to lacking
* the code necessary to compute cache keys properly.
* Currently, dataSources that do not actually reference segments (like 'inline'), are not cacheable since cache keys
* are always based on segment identifiers.
*/
boolean isCacheable();
boolean isCacheable(boolean isBroker);
/**
* Returns true if all servers have a full copy of this datasource. True for things like inline, lookup, etc, or

View File

@ -48,6 +48,16 @@ public class GlobalTableDataSource extends TableDataSource
return true;
}
/**
* Query results from Broadcast datasources should not be cached on broker
* https://github.com/apache/druid/issues/10444
*/
@Override
public boolean isCacheable(boolean isBroker)
{
return !isBroker;
}
@Override
public String toString()
{

View File

@ -168,7 +168,7 @@ public class InlineDataSource implements DataSource
}
@Override
public boolean isCacheable()
public boolean isCacheable(boolean isBroker)
{
return false;
}

View File

@ -28,8 +28,8 @@ import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.segment.join.JoinConditionAnalysis;
import org.apache.druid.segment.join.JoinPrefixUtils;
import org.apache.druid.segment.join.JoinType;
import org.apache.druid.segment.join.Joinables;
import java.util.HashSet;
import java.util.List;
@ -69,7 +69,7 @@ public class JoinDataSource implements DataSource
{
this.left = Preconditions.checkNotNull(left, "left");
this.right = Preconditions.checkNotNull(right, "right");
this.rightPrefix = Joinables.validatePrefix(rightPrefix);
this.rightPrefix = JoinPrefixUtils.validatePrefix(rightPrefix);
this.conditionAnalysis = Preconditions.checkNotNull(conditionAnalysis, "conditionAnalysis");
this.joinType = Preconditions.checkNotNull(joinType, "joinType");
}
@ -175,9 +175,9 @@ public class JoinDataSource implements DataSource
}
@Override
public boolean isCacheable()
public boolean isCacheable(boolean isBroker)
{
return false;
return left.isCacheable(isBroker) && right.isCacheable(isBroker);
}
@Override

View File

@ -79,7 +79,7 @@ public class LookupDataSource implements DataSource
}
@Override
public boolean isCacheable()
public boolean isCacheable(boolean isBroker)
{
return false;
}

View File

@ -71,7 +71,7 @@ public class QueryDataSource implements DataSource
}
@Override
public boolean isCacheable()
public boolean isCacheable(boolean isBroker)
{
return false;
}

View File

@ -76,7 +76,7 @@ public class TableDataSource implements DataSource
}
@Override
public boolean isCacheable()
public boolean isCacheable(boolean isBroker)
{
return true;
}

View File

@ -83,7 +83,7 @@ public class UnionDataSource implements DataSource
}
@Override
public boolean isCacheable()
public boolean isCacheable(boolean isBroker)
{
// Disables result-level caching for 'union' datasources, which doesn't work currently.
// See https://github.com/apache/druid/issues/8713 for reference.

View File

@ -23,6 +23,7 @@ import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import com.google.common.primitives.UnsignedBytes;
import org.apache.druid.guice.annotations.PublicApi;
import org.apache.druid.java.util.common.Cacheable;
@ -65,6 +66,7 @@ public class CacheKeyBuilder
static final byte CACHEABLE_KEY = 9;
static final byte CACHEABLE_LIST_KEY = 10;
static final byte DOUBLE_ARRAY_KEY = 11;
static final byte LONG_KEY = 12;
static final byte[] STRING_SEPARATOR = new byte[]{(byte) 0xFF};
static final byte[] EMPTY_BYTES = StringUtils.EMPTY_BYTES;
@ -250,6 +252,12 @@ public class CacheKeyBuilder
return this;
}
public CacheKeyBuilder appendLong(long input)
{
appendItem(LONG_KEY, Longs.toByteArray(input));
return this;
}
public CacheKeyBuilder appendFloat(float input)
{
appendItem(FLOAT_KEY, ByteBuffer.allocate(Float.BYTES).putFloat(input).array());

View File

@ -280,6 +280,14 @@ public class DataSourceAnalysis
return dataSource instanceof QueryDataSource;
}
/**
* Returns true if this datasource is made out of a join operation
*/
public boolean isJoin()
{
return !preJoinableClauses.isEmpty();
}
@Override
public boolean equals(Object o)
{

View File

@ -22,8 +22,8 @@ package org.apache.druid.query.planning;
import com.google.common.base.Preconditions;
import org.apache.druid.query.DataSource;
import org.apache.druid.segment.join.JoinConditionAnalysis;
import org.apache.druid.segment.join.JoinPrefixUtils;
import org.apache.druid.segment.join.JoinType;
import org.apache.druid.segment.join.Joinables;
import java.util.Objects;
@ -46,7 +46,7 @@ public class PreJoinableClause
final JoinConditionAnalysis condition
)
{
this.prefix = Joinables.validatePrefix(prefix);
this.prefix = JoinPrefixUtils.validatePrefix(prefix);
this.dataSource = Preconditions.checkNotNull(dataSource, "dataSource");
this.joinType = Preconditions.checkNotNull(joinType, "joinType");
this.condition = Preconditions.checkNotNull(condition, "condition");

View File

@ -131,9 +131,9 @@ public class JoinConditionAnalysis
private static boolean isLeftExprAndRightColumn(final Expr a, final Expr b, final String rightPrefix)
{
return a.analyzeInputs().getRequiredBindings().stream().noneMatch(c -> Joinables.isPrefixedBy(c, rightPrefix))
return a.analyzeInputs().getRequiredBindings().stream().noneMatch(c -> JoinPrefixUtils.isPrefixedBy(c, rightPrefix))
&& b.getBindingIfIdentifier() != null
&& Joinables.isPrefixedBy(b.getBindingIfIdentifier(), rightPrefix);
&& JoinPrefixUtils.isPrefixedBy(b.getBindingIfIdentifier(), rightPrefix);
}
/**

View File

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.join;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.segment.column.ColumnHolder;
import javax.annotation.Nullable;
import java.util.Comparator;
import java.util.List;
/**
* Utility class for working with prefixes in join operations
*/
public class JoinPrefixUtils
{
private static final Comparator<String> DESCENDING_LENGTH_STRING_COMPARATOR = (s1, s2) ->
Integer.compare(s2.length(), s1.length());
/**
* Checks that "prefix" is a valid prefix for a join clause (see {@link JoinableClause#getPrefix()}) and, if so,
* returns it. Otherwise, throws an exception.
*/
public static String validatePrefix(@Nullable final String prefix)
{
if (prefix == null || prefix.isEmpty()) {
throw new IAE("Join clause cannot have null or empty prefix");
} else if (isPrefixedBy(ColumnHolder.TIME_COLUMN_NAME, prefix) || ColumnHolder.TIME_COLUMN_NAME.equals(prefix)) {
throw new IAE(
"Join clause cannot have prefix[%s], since it would shadow %s",
prefix,
ColumnHolder.TIME_COLUMN_NAME
);
} else {
return prefix;
}
}
public static boolean isPrefixedBy(final String columnName, final String prefix)
{
return columnName.length() > prefix.length() && columnName.startsWith(prefix);
}
/**
* Check if any prefixes in the provided list duplicate or shadow each other.
*
* @param prefixes A mutable list containing the prefixes to check. This list will be sorted by descending
* string length.
*/
public static void checkPrefixesForDuplicatesAndShadowing(
final List<String> prefixes
)
{
// this is a naive approach that assumes we'll typically handle only a small number of prefixes
prefixes.sort(DESCENDING_LENGTH_STRING_COMPARATOR);
for (int i = 0; i < prefixes.size(); i++) {
String prefix = prefixes.get(i);
for (int k = i + 1; k < prefixes.size(); k++) {
String otherPrefix = prefixes.get(k);
if (prefix.equals(otherPrefix)) {
throw new IAE("Detected duplicate prefix in join clauses: [%s]", prefix);
}
if (isPrefixedBy(prefix, otherPrefix)) {
throw new IAE("Detected conflicting prefixes in join clauses: [%s, %s]", prefix, otherPrefix);
}
}
}
}
}

View File

@ -33,8 +33,8 @@ import java.util.stream.Collectors;
* Represents everything about a join clause except for the left-hand datasource. In other words, if the full join
* clause is "t1 JOIN t2 ON t1.x = t2.x" then this class represents "JOIN t2 ON x = t2.x" -- it does not include
* references to the left-hand "t1".
*
* Created from {@link org.apache.druid.query.planning.PreJoinableClause} by {@link Joinables#createSegmentMapFn}.
* <p>
* Created from {@link org.apache.druid.query.planning.PreJoinableClause} by {@link JoinableFactoryWrapper#createSegmentMapFn}.
*/
public class JoinableClause implements ReferenceCountedObject
{
@ -45,7 +45,7 @@ public class JoinableClause implements ReferenceCountedObject
public JoinableClause(String prefix, Joinable joinable, JoinType joinType, JoinConditionAnalysis condition)
{
this.prefix = Joinables.validatePrefix(prefix);
this.prefix = JoinPrefixUtils.validatePrefix(prefix);
this.joinable = Preconditions.checkNotNull(joinable, "joinable");
this.joinType = Preconditions.checkNotNull(joinType, "joinType");
this.condition = Preconditions.checkNotNull(condition, "condition");
@ -106,7 +106,7 @@ public class JoinableClause implements ReferenceCountedObject
*/
public boolean includesColumn(final String columnName)
{
return Joinables.isPrefixedBy(columnName, prefix);
return JoinPrefixUtils.isPrefixedBy(columnName, prefix);
}
/**

View File

@ -43,8 +43,19 @@ public interface JoinableFactory
*
* @param dataSource the datasource to join on
* @param condition the condition to join on
*
* @return a Joinable if this datasource + condition combo is joinable; empty if not
*/
Optional<Joinable> build(DataSource dataSource, JoinConditionAnalysis condition);
/**
* Compute the cache key for a data source participating in join operation. This is done separately from {{@link #build(DataSource, JoinConditionAnalysis)}}
* which can be an expensive operation and can potentially be avoided if cached results can be used.
*
* @param dataSource the datasource to join on
* @param condition the condition to join on
*/
default Optional<byte[]> computeJoinCacheKey(DataSource dataSource, JoinConditionAnalysis condition)
{
return Optional.empty();
}
}

View File

@ -19,11 +19,14 @@
package org.apache.druid.segment.join;
import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.query.Query;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.query.planning.PreJoinableClause;
import org.apache.druid.segment.SegmentReference;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.filter.Filters;
import org.apache.druid.segment.join.filter.JoinFilterAnalyzer;
import org.apache.druid.segment.join.filter.JoinFilterPreAnalysis;
@ -32,42 +35,25 @@ import org.apache.druid.segment.join.filter.JoinableClauses;
import org.apache.druid.segment.join.filter.rewrite.JoinFilterRewriteConfig;
import org.apache.druid.utils.JvmUtils;
import javax.annotation.Nullable;
import java.util.Comparator;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
/**
* Utility methods for working with {@link Joinable} related classes.
* A wrapper class over {@link JoinableFactory} for working with {@link Joinable} related classes.
*/
public class Joinables
public class JoinableFactoryWrapper
{
private static final Comparator<String> DESCENDING_LENGTH_STRING_COMPARATOR = (s1, s2) ->
Integer.compare(s2.length(), s1.length());
/**
* Checks that "prefix" is a valid prefix for a join clause (see {@link JoinableClause#getPrefix()}) and, if so,
* returns it. Otherwise, throws an exception.
*/
public static String validatePrefix(@Nullable final String prefix)
{
if (prefix == null || prefix.isEmpty()) {
throw new IAE("Join clause cannot have null or empty prefix");
} else if (isPrefixedBy(ColumnHolder.TIME_COLUMN_NAME, prefix) || ColumnHolder.TIME_COLUMN_NAME.equals(prefix)) {
throw new IAE(
"Join clause cannot have prefix[%s], since it would shadow %s",
prefix,
ColumnHolder.TIME_COLUMN_NAME
);
} else {
return prefix;
}
}
private static final byte JOIN_OPERATION = 0x1;
private static final Logger log = new Logger(JoinableFactoryWrapper.class);
public static boolean isPrefixedBy(final String columnName, final String prefix)
private final JoinableFactory joinableFactory;
public JoinableFactoryWrapper(final JoinableFactory joinableFactory)
{
return columnName.length() > prefix.length() && columnName.startsWith(prefix);
this.joinableFactory = Preconditions.checkNotNull(joinableFactory, "joinableFactory");
}
/**
@ -75,17 +61,15 @@ public class Joinables
* clauses is > 0). If mapping is not needed, this method will return {@link Function#identity()}.
*
* @param clauses Pre-joinable clauses
* @param joinableFactory Factory for joinables
* @param cpuTimeAccumulator An accumulator that we will add CPU nanos to; this is part of the function to encourage
* callers to remember to track metrics on CPU time required for creation of Joinables
* @param query The query that will be run on the mapped segments. Usually this should be
* {@code analysis.getBaseQuery().orElse(query)}, where "analysis" is a
* {@link org.apache.druid.query.planning.DataSourceAnalysis} and "query" is the original
* {@link DataSourceAnalysis} and "query" is the original
* query from the end user.
*/
public static Function<SegmentReference, SegmentReference> createSegmentMapFn(
public Function<SegmentReference, SegmentReference> createSegmentMapFn(
final List<PreJoinableClause> clauses,
final JoinableFactory joinableFactory,
final AtomicLong cpuTimeAccumulator,
final Query<?> query
)
@ -119,28 +103,42 @@ public class Joinables
}
/**
* Check if any prefixes in the provided list duplicate or shadow each other.
* Compute a cache key prefix for data sources that participate in the RHS of a join. This key prefix
* can be used in segment level cache or result level cache. The function can return following wrapped in an
* Optional
* - Non-empty byte array - If there is join datasource involved and caching is possible. The result includes
* join condition expression, join type and cache key returned by joinable factory for each {@link PreJoinableClause}
* - NULL - There is a join but caching is not possible. It may happen if one of the participating datasource
* in the JOIN is not cacheable.
*
* @param prefixes A mutable list containing the prefixes to check. This list will be sorted by descending
* string length.
* @param dataSourceAnalysis for the join datasource
* @return the optional cache key to be used as part of query cache key
* @throws {@link IAE} if this operation is called on a non-join data source
*/
public static void checkPrefixesForDuplicatesAndShadowing(
final List<String> prefixes
public Optional<byte[]> computeJoinDataSourceCacheKey(
final DataSourceAnalysis dataSourceAnalysis
)
{
// this is a naive approach that assumes we'll typically handle only a small number of prefixes
prefixes.sort(DESCENDING_LENGTH_STRING_COMPARATOR);
for (int i = 0; i < prefixes.size(); i++) {
String prefix = prefixes.get(i);
for (int k = i + 1; k < prefixes.size(); k++) {
String otherPrefix = prefixes.get(k);
if (prefix.equals(otherPrefix)) {
throw new IAE("Detected duplicate prefix in join clauses: [%s]", prefix);
}
if (isPrefixedBy(prefix, otherPrefix)) {
throw new IAE("Detected conflicting prefixes in join clauses: [%s, %s]", prefix, otherPrefix);
}
}
final List<PreJoinableClause> clauses = dataSourceAnalysis.getPreJoinableClauses();
if (clauses.isEmpty()) {
throw new IAE("No join clauses to build the cache key for data source [%s]", dataSourceAnalysis.getDataSource());
}
final CacheKeyBuilder keyBuilder;
keyBuilder = new CacheKeyBuilder(JOIN_OPERATION);
for (PreJoinableClause clause : clauses) {
Optional<byte[]> bytes = joinableFactory.computeJoinCacheKey(clause.getDataSource(), clause.getCondition());
if (!bytes.isPresent()) {
// Encountered a data source which didn't support cache yet
log.debug("skipping caching for join since [%s] does not support caching", clause.getDataSource());
return Optional.empty();
}
keyBuilder.appendByteArray(bytes.get());
keyBuilder.appendString(clause.getCondition().getOriginalExpression());
keyBuilder.appendString(clause.getPrefix());
keyBuilder.appendString(clause.getJoinType().name());
}
return Optional.of(keyBuilder.build());
}
}

View File

@ -28,6 +28,7 @@ import org.apache.druid.query.DataSource;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
/**
* A {@link JoinableFactory} that delegates to the appropriate factory based on the datasource.
@ -66,18 +67,34 @@ public class MapJoinableFactory implements JoinableFactory
@Override
public Optional<Joinable> build(DataSource dataSource, JoinConditionAnalysis condition)
{
return getSingleResult(dataSource, factory -> factory.build(dataSource, condition));
}
@Override
public Optional<byte[]> computeJoinCacheKey(DataSource dataSource, JoinConditionAnalysis condition)
{
return getSingleResult(dataSource, factory -> factory.computeJoinCacheKey(dataSource, condition));
}
/**
* Computes the given function assuming that only one joinable factory will return a non-empty result. If we get
* results from two {@link JoinableFactory}, then throw an exception.
*
*/
private <T> Optional<T> getSingleResult(DataSource dataSource, Function<JoinableFactory, Optional<T>> function)
{
Set<JoinableFactory> factories = joinableFactories.get(dataSource.getClass());
Optional<Joinable> maybeJoinable = Optional.empty();
for (JoinableFactory factory : factories) {
Optional<Joinable> candidate = factory.build(dataSource, condition);
Optional<T> mayBeFinalResult = Optional.empty();
for (JoinableFactory joinableFactory : factories) {
Optional<T> candidate = function.apply(joinableFactory);
if (candidate.isPresent() && mayBeFinalResult.isPresent()) {
throw new ISE("Multiple joinable factories are valid for table[%s]", dataSource);
}
if (candidate.isPresent()) {
if (maybeJoinable.isPresent()) {
throw new ISE("Multiple joinable factories are valid for table[%s]", dataSource);
}
maybeJoinable = candidate;
mayBeFinalResult = candidate;
}
}
return maybeJoinable;
return mayBeFinalResult;
}
}

View File

@ -23,10 +23,10 @@ import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.planning.PreJoinableClause;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.join.JoinPrefixUtils;
import org.apache.druid.segment.join.Joinable;
import org.apache.druid.segment.join.JoinableClause;
import org.apache.druid.segment.join.JoinableFactory;
import org.apache.druid.segment.join.Joinables;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
@ -138,6 +138,6 @@ public class JoinableClauses
prefixes.add(clause.getPrefix());
}
Joinables.checkPrefixesForDuplicatesAndShadowing(prefixes);
JoinPrefixUtils.checkPrefixesForDuplicatesAndShadowing(prefixes);
}
}

View File

@ -26,6 +26,7 @@ import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.Cursor;
@ -42,6 +43,7 @@ import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.ReadableOffset;
import org.apache.druid.segment.filter.Filters;
import org.apache.druid.timeline.SegmentId;
import org.joda.time.chrono.ISOChronology;
import javax.annotation.Nullable;
@ -57,6 +59,7 @@ import java.util.stream.Collectors;
public class BroadcastSegmentIndexedTable implements IndexedTable
{
private static final Logger LOG = new Logger(BroadcastSegmentIndexedTable.class);
private static final byte CACHE_PREFIX = 0x01;
private final QueryableIndexSegment segment;
private final QueryableIndexStorageAdapter adapter;
@ -246,6 +249,26 @@ public class BroadcastSegmentIndexedTable implements IndexedTable
);
}
@Override
public byte[] computeCacheKey()
{
SegmentId segmentId = segment.getId();
CacheKeyBuilder keyBuilder = new CacheKeyBuilder(CACHE_PREFIX);
return keyBuilder
.appendLong(segmentId.getInterval().getStartMillis())
.appendLong(segmentId.getInterval().getEndMillis())
.appendString(segmentId.getVersion())
.appendString(segmentId.getDataSource())
.appendInt(segmentId.getPartitionNum())
.build();
}
@Override
public boolean isCacheable()
{
return true;
}
@Override
public void close()
{

View File

@ -20,6 +20,7 @@
package org.apache.druid.segment.join.table;
import it.unimi.dsi.fastutil.ints.IntList;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ReferenceCountedObject;
@ -88,6 +89,26 @@ public interface IndexedTable extends ReferenceCountedObject, Closeable
return null;
}
/**
* Computes a {@code byte[]} key for the table that can be used for computing cache keys for join operations.
* see {@link org.apache.druid.segment.join.JoinableFactory#computeJoinCacheKey}
*
* @return the byte array for cache key
* @throws {@link IAE} if caching is not supported
*/
default byte[] computeCacheKey()
{
throw new IAE("Caching is not supported. Check `isCacheable` before calling computeCacheKey");
}
/**
* Returns whether this indexed table can be cached for the join operations
*/
default boolean isCacheable()
{
return false;
}
/**
* Indexes support fast lookups on key columns.
*/

View File

@ -75,4 +75,16 @@ public class ReferenceCountingIndexedTable extends ReferenceCountingCloseableObj
{
return incrementReferenceAndDecrementOnceCloseable();
}
@Override
public byte[] computeCacheKey()
{
return baseObject.computeCacheKey();
}
@Override
public boolean isCacheable()
{
return baseObject.isCacheable();
}
}

View File

@ -19,6 +19,7 @@
package org.apache.druid.segment.join.table;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
@ -26,6 +27,7 @@ import org.apache.druid.segment.RowAdapter;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.column.ValueType;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.util.ArrayList;
import java.util.List;
@ -37,6 +39,13 @@ import java.util.stream.Collectors;
/**
* An IndexedTable composed of a List-based table and Map-based indexes. The implementation is agnostic to the
* specific row type; it uses a {@link RowAdapter} to work with any sort of object.
* The class allows passing in a cache key. If the key is non-null, results of any join on this table can be cached.
* That cache becomes invalidated if this key changes. Creators of this class can pass in a non-null cache key if its
* possible to construct a small identifier
* - that must change when contents of this indexed table chances
* - May remain unchanged when contents of this indexed table
*
* How the cache key is constructed itself, depends on how the RowBasedIndexedTable is being built.
*/
public class RowBasedIndexedTable<RowType> implements IndexedTable
{
@ -46,6 +55,8 @@ public class RowBasedIndexedTable<RowType> implements IndexedTable
private final List<Function<RowType, Object>> columnFunctions;
private final Set<String> keyColumns;
private final String version;
@Nullable
private final byte[] cacheKey;
public RowBasedIndexedTable(
final List<RowType> table,
@ -54,6 +65,19 @@ public class RowBasedIndexedTable<RowType> implements IndexedTable
final Set<String> keyColumns,
final String version
)
{
this(table, rowAdapter, rowSignature, keyColumns, version, null);
}
public RowBasedIndexedTable(
final List<RowType> table,
final RowAdapter<RowType> rowAdapter,
final RowSignature rowSignature,
final Set<String> keyColumns,
final String version,
@Nullable
final byte[] cacheKey
)
{
this.table = table;
this.rowSignature = rowSignature;
@ -61,6 +85,7 @@ public class RowBasedIndexedTable<RowType> implements IndexedTable
rowSignature.getColumnNames().stream().map(rowAdapter::columnFunction).collect(Collectors.toList());
this.keyColumns = keyColumns;
this.version = version;
this.cacheKey = cacheKey;
if (!ImmutableSet.copyOf(rowSignature.getColumnNames()).containsAll(keyColumns)) {
throw new ISE(
@ -132,6 +157,18 @@ public class RowBasedIndexedTable<RowType> implements IndexedTable
return row -> columnFn.apply(table.get(row));
}
@Override
public byte[] computeCacheKey()
{
return Preconditions.checkNotNull(cacheKey, "Cache key can't be null");
}
@Override
public boolean isCacheable()
{
return (null != cacheKey);
}
@Override
public int numRows()
{

View File

@ -139,7 +139,8 @@ public class InlineDataSourceTest
@Test
public void test_isCacheable()
{
Assert.assertFalse(listDataSource.isCacheable());
Assert.assertFalse(listDataSource.isCacheable(true));
Assert.assertFalse(listDataSource.isCacheable(false));
}
@Test

View File

@ -87,13 +87,15 @@ public class JoinDataSourceTest
@Test
public void test_isCacheable_tableToTable()
{
Assert.assertFalse(joinTableToTable.isCacheable());
Assert.assertTrue(joinTableToTable.isCacheable(true));
Assert.assertTrue(joinTableToTable.isCacheable(false));
}
@Test
public void test_isCacheable_lookup()
{
Assert.assertFalse(joinTableToLookup.isCacheable());
Assert.assertFalse(joinTableToLookup.isCacheable(true));
Assert.assertFalse(joinTableToLookup.isCacheable(false));
}
@Test

View File

@ -52,7 +52,8 @@ public class LookupDataSourceTest
@Test
public void test_isCacheable()
{
Assert.assertFalse(lookylooDataSource.isCacheable());
Assert.assertFalse(lookylooDataSource.isCacheable(true));
Assert.assertFalse(lookylooDataSource.isCacheable(false));
}
@Test

View File

@ -83,13 +83,15 @@ public class QueryDataSourceTest
@Test
public void test_isCacheable_table()
{
Assert.assertFalse(queryOnTableDataSource.isCacheable());
Assert.assertFalse(queryOnTableDataSource.isCacheable(true));
Assert.assertFalse(queryOnTableDataSource.isCacheable(false));
}
@Test
public void test_isCacheable_lookup()
{
Assert.assertFalse(queryOnLookupDataSource.isCacheable());
Assert.assertFalse(queryOnLookupDataSource.isCacheable(true));
Assert.assertFalse(queryOnLookupDataSource.isCacheable(false));
}
@Test

View File

@ -53,7 +53,8 @@ public class TableDataSourceTest
@Test
public void test_isCacheable()
{
Assert.assertTrue(fooDataSource.isCacheable());
Assert.assertTrue(fooDataSource.isCacheable(true));
Assert.assertTrue(fooDataSource.isCacheable(false));
}
@Test

View File

@ -95,7 +95,8 @@ public class UnionDataSourceTest
@Test
public void test_isCacheable()
{
Assert.assertFalse(unionDataSource.isCacheable());
Assert.assertFalse(unionDataSource.isCacheable(true));
Assert.assertFalse(unionDataSource.isCacheable(false));
}
@Test

View File

@ -43,6 +43,7 @@ public class CacheKeyBuilderTest
.appendBoolean(false)
.appendString("test")
.appendInt(10)
.appendLong(Long.MAX_VALUE)
.appendFloat(0.1f)
.appendDouble(2.3)
.appendByteArray(CacheKeyBuilder.STRING_SEPARATOR) // test when an item is same with the separator
@ -57,6 +58,7 @@ public class CacheKeyBuilderTest
+ 1 // bool
+ 4 // 'test'
+ Integer.BYTES // 10
+ Long.BYTES // Long.MAX_VALUE
+ Float.BYTES // 0.1f
+ Double.BYTES // 2.3
+ CacheKeyBuilder.STRING_SEPARATOR.length // byte array
@ -64,7 +66,7 @@ public class CacheKeyBuilderTest
+ Integer.BYTES + 5 * 2 + 1 // 'test1' 'test2'
+ cacheable.getCacheKey().length // cacheable
+ Integer.BYTES + 4 // cacheable list
+ 11; // type keys
+ 12; // type keys
Assert.assertEquals(expectedSize, actual.length);
final byte[] expected = ByteBuffer.allocate(expectedSize)
@ -75,6 +77,8 @@ public class CacheKeyBuilderTest
.put(StringUtils.toUtf8("test"))
.put(CacheKeyBuilder.INT_KEY)
.putInt(10)
.put(CacheKeyBuilder.LONG_KEY)
.putLong(Long.MAX_VALUE)
.put(CacheKeyBuilder.FLOAT_KEY)
.putFloat(0.1f)
.put(CacheKeyBuilder.DOUBLE_KEY)

View File

@ -73,6 +73,7 @@ public class DataSourceAnalysisTest
Assert.assertEquals(Optional.empty(), analysis.getBaseQuery());
Assert.assertEquals(Optional.empty(), analysis.getBaseQuerySegmentSpec());
Assert.assertEquals(Collections.emptyList(), analysis.getPreJoinableClauses());
Assert.assertFalse(analysis.isJoin());
}
@Test
@ -92,6 +93,7 @@ public class DataSourceAnalysisTest
Assert.assertEquals(Optional.empty(), analysis.getBaseQuery());
Assert.assertEquals(Optional.empty(), analysis.getBaseQuerySegmentSpec());
Assert.assertEquals(Collections.emptyList(), analysis.getPreJoinableClauses());
Assert.assertFalse(analysis.isJoin());
}
@Test
@ -114,6 +116,7 @@ public class DataSourceAnalysisTest
analysis.getBaseQuerySegmentSpec()
);
Assert.assertEquals(Collections.emptyList(), analysis.getPreJoinableClauses());
Assert.assertFalse(analysis.isJoin());
}
@Test
@ -137,6 +140,7 @@ public class DataSourceAnalysisTest
analysis.getBaseQuerySegmentSpec()
);
Assert.assertEquals(Collections.emptyList(), analysis.getPreJoinableClauses());
Assert.assertFalse(analysis.isJoin());
}
@Test
@ -155,6 +159,7 @@ public class DataSourceAnalysisTest
Assert.assertEquals(Optional.empty(), analysis.getBaseQuery());
Assert.assertEquals(Optional.empty(), analysis.getBaseQuerySegmentSpec());
Assert.assertEquals(Collections.emptyList(), analysis.getPreJoinableClauses());
Assert.assertFalse(analysis.isJoin());
}
@Test
@ -177,6 +182,7 @@ public class DataSourceAnalysisTest
analysis.getBaseQuerySegmentSpec()
);
Assert.assertEquals(Collections.emptyList(), analysis.getPreJoinableClauses());
Assert.assertFalse(analysis.isJoin());
}
@Test
@ -195,6 +201,7 @@ public class DataSourceAnalysisTest
Assert.assertEquals(Optional.empty(), analysis.getBaseQuery());
Assert.assertEquals(Optional.empty(), analysis.getBaseQuerySegmentSpec());
Assert.assertEquals(Collections.emptyList(), analysis.getPreJoinableClauses());
Assert.assertFalse(analysis.isJoin());
}
@Test
@ -242,6 +249,7 @@ public class DataSourceAnalysisTest
),
analysis.getPreJoinableClauses()
);
Assert.assertTrue(analysis.isJoin());
}
@Test
@ -291,6 +299,7 @@ public class DataSourceAnalysisTest
),
analysis.getPreJoinableClauses()
);
Assert.assertTrue(analysis.isJoin());
}
@Test
@ -319,6 +328,7 @@ public class DataSourceAnalysisTest
),
analysis.getPreJoinableClauses()
);
Assert.assertTrue(analysis.isJoin());
}
@Test
@ -350,6 +360,7 @@ public class DataSourceAnalysisTest
),
analysis.getPreJoinableClauses()
);
Assert.assertTrue(analysis.isJoin());
}
@Test
@ -400,6 +411,7 @@ public class DataSourceAnalysisTest
),
analysis.getPreJoinableClauses()
);
Assert.assertTrue(analysis.isJoin());
}
@Test
@ -430,6 +442,7 @@ public class DataSourceAnalysisTest
),
analysis.getPreJoinableClauses()
);
Assert.assertTrue(analysis.isJoin());
}
@Test
@ -460,6 +473,7 @@ public class DataSourceAnalysisTest
),
analysis.getPreJoinableClauses()
);
Assert.assertTrue(analysis.isJoin());
}
@Test

View File

@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.join;
import org.apache.druid.segment.column.ColumnHolder;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
public class JoinPrefixUtilsTest
{
@Rule
public ExpectedException expectedException = ExpectedException.none();
@Test
public void test_validatePrefix_null()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Join clause cannot have null or empty prefix");
JoinPrefixUtils.validatePrefix(null);
}
@Test
public void test_validatePrefix_empty()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Join clause cannot have null or empty prefix");
JoinPrefixUtils.validatePrefix("");
}
@Test
public void test_validatePrefix_underscore()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Join clause cannot have prefix[_]");
JoinPrefixUtils.validatePrefix("_");
}
@Test
public void test_validatePrefix_timeColumn()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Join clause cannot have prefix[__time]");
JoinPrefixUtils.validatePrefix(ColumnHolder.TIME_COLUMN_NAME);
}
@Test
public void test_isPrefixedBy()
{
Assert.assertTrue(JoinPrefixUtils.isPrefixedBy("foo", ""));
Assert.assertTrue(JoinPrefixUtils.isPrefixedBy("foo", "f"));
Assert.assertTrue(JoinPrefixUtils.isPrefixedBy("foo", "fo"));
Assert.assertFalse(JoinPrefixUtils.isPrefixedBy("foo", "foo"));
}
}

View File

@ -143,6 +143,7 @@ public class JoinTestHelper
};
public static final String INDEXED_TABLE_VERSION = DateTimes.nowUtc().toString();
public static final byte[] INDEXED_TABLE_CACHE_KEY = new byte[] {1, 2, 3};
private static RowAdapter<Map<String, Object>> createMapRowAdapter(final RowSignature signature)
{
@ -263,6 +264,21 @@ public class JoinTestHelper
);
}
public static RowBasedIndexedTable<Map<String, Object>> createCountriesIndexedTableWithCacheKey() throws IOException
{
return withRowsFromResource(
"/wikipedia/countries.json",
rows -> new RowBasedIndexedTable<>(
rows,
createMapRowAdapter(COUNTRIES_SIGNATURE),
COUNTRIES_SIGNATURE,
ImmutableSet.of("countryNumber", "countryIsoCode"),
INDEXED_TABLE_VERSION,
INDEXED_TABLE_CACHE_KEY
)
);
}
public static RowBasedIndexedTable<Map<String, Object>> createRegionsIndexedTable() throws IOException
{
return withRowsFromResource(

View File

@ -0,0 +1,434 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.join;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterators;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.GlobalTableDataSource;
import org.apache.druid.query.LookupDataSource;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.TestQuery;
import org.apache.druid.query.extraction.MapLookupExtractor;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.query.planning.PreJoinableClause;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.segment.SegmentReference;
import org.apache.druid.segment.join.filter.rewrite.JoinFilterRewriteConfig;
import org.apache.druid.segment.join.lookup.LookupJoinable;
import org.easymock.EasyMock;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
public class JoinableFactoryWrapperTest
{
private static final JoinFilterRewriteConfig DEFAULT_JOIN_FILTER_REWRITE_CONFIG = new JoinFilterRewriteConfig(
QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_PUSH_DOWN,
QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE,
QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS,
QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE
);
private static final JoinableFactoryWrapper NOOP_JOINABLE_FACTORY_WRAPPER = new JoinableFactoryWrapper(
NoopJoinableFactory.INSTANCE);
@Rule
public ExpectedException expectedException = ExpectedException.none();
@Test
public void test_createSegmentMapFn_noClauses()
{
final Function<SegmentReference, SegmentReference> segmentMapFn = NOOP_JOINABLE_FACTORY_WRAPPER.createSegmentMapFn(
ImmutableList.of(),
new AtomicLong(),
null
);
Assert.assertSame(Function.identity(), segmentMapFn);
}
@Test
public void test_createSegmentMapFn_unusableClause()
{
final LookupDataSource lookupDataSource = new LookupDataSource("lookyloo");
final PreJoinableClause clause = new PreJoinableClause(
"j.",
lookupDataSource,
JoinType.LEFT,
JoinConditionAnalysis.forExpression("x == \"j.x\"", "j.", ExprMacroTable.nil())
);
expectedException.expect(IllegalStateException.class);
expectedException.expectMessage("dataSource is not joinable");
final Function<SegmentReference, SegmentReference> ignored = NOOP_JOINABLE_FACTORY_WRAPPER.createSegmentMapFn(
ImmutableList.of(clause),
new AtomicLong(),
null
);
}
@Test
public void test_createSegmentMapFn_usableClause()
{
final LookupDataSource lookupDataSource = new LookupDataSource("lookyloo");
final JoinConditionAnalysis conditionAnalysis = JoinConditionAnalysis.forExpression(
"x == \"j.x\"",
"j.",
ExprMacroTable.nil()
);
final PreJoinableClause clause = new PreJoinableClause(
"j.",
lookupDataSource,
JoinType.LEFT,
conditionAnalysis
);
JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactory()
{
@Override
public boolean isDirectlyJoinable(DataSource dataSource)
{
return dataSource.equals(lookupDataSource);
}
@Override
public Optional<Joinable> build(DataSource dataSource, JoinConditionAnalysis condition)
{
if (dataSource.equals(lookupDataSource) && condition.equals(conditionAnalysis)) {
return Optional.of(
LookupJoinable.wrap(new MapLookupExtractor(ImmutableMap.of("k", "v"), false))
);
} else {
return Optional.empty();
}
}
});
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(
ImmutableList.of(clause),
new AtomicLong(),
new TestQuery(
new TableDataSource("test"),
new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("0/100"))),
false,
new HashMap()
)
);
Assert.assertNotSame(Function.identity(), segmentMapFn);
}
@Test
public void test_computeJoinDataSourceCacheKey_noClauses()
{
DataSourceAnalysis analysis = EasyMock.mock(DataSourceAnalysis.class);
DataSource dataSource = new NoopDataSource();
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.emptyList());
EasyMock.expect(analysis.getDataSource()).andReturn(dataSource);
EasyMock.replay(analysis);
JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactoryWithCacheKey());
expectedException.expect(IAE.class);
expectedException.expectMessage(StringUtils.format(
"No join clauses to build the cache key for data source [%s]",
dataSource
));
joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
}
@Test
public void test_computeJoinDataSourceCacheKey_noHashJoin()
{
PreJoinableClause clause1 = makeGlobalPreJoinableClause("dataSource_1", "x == \"j.x\"", "j.");
PreJoinableClause clause2 = makeGlobalPreJoinableClause("dataSource_2", "x != \"h.x\"", "h.");
DataSourceAnalysis analysis = EasyMock.mock(DataSourceAnalysis.class);
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Arrays.asList(clause1, clause2)).anyTimes();
EasyMock.replay(analysis);
JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactoryWithCacheKey());
Optional<byte[]> cacheKey = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertFalse(cacheKey.isPresent());
}
@Test
public void test_computeJoinDataSourceCacheKey_cachingUnsupported()
{
PreJoinableClause clause1 = makeGlobalPreJoinableClause("dataSource_1", "x == \"j.x\"", "j.");
DataSource dataSource = new LookupDataSource("lookup");
PreJoinableClause clause2 = makePreJoinableClause(dataSource, "x == \"h.x\"", "h.", JoinType.LEFT);
DataSourceAnalysis analysis = EasyMock.mock(DataSourceAnalysis.class);
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Arrays.asList(clause1, clause2)).anyTimes();
EasyMock.replay(analysis);
JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactoryWithCacheKey());
Optional<byte[]> cacheKey = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertFalse(cacheKey.isPresent());
}
@Test
public void test_computeJoinDataSourceCacheKey_usableClauses()
{
PreJoinableClause clause1 = makeGlobalPreJoinableClause("dataSource_1", "x == \"j.x\"", "j.");
PreJoinableClause clause2 = makeGlobalPreJoinableClause("dataSource_2", "x == \"h.x\"", "h.");
DataSourceAnalysis analysis = EasyMock.mock(DataSourceAnalysis.class);
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Arrays.asList(clause1, clause2)).anyTimes();
EasyMock.replay(analysis);
JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactoryWithCacheKey());
Optional<byte[]> cacheKey = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey.isPresent());
}
@Test
public void test_computeJoinDataSourceCacheKey_keyChangesWithExpression()
{
DataSourceAnalysis analysis = EasyMock.mock(DataSourceAnalysis.class);
JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactoryWithCacheKey());
PreJoinableClause clause1 = makeGlobalPreJoinableClause("dataSource_1", "y == \"j.y\"", "j.");
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause1)).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey1 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey1.isPresent());
Assert.assertNotEquals(0, cacheKey1.get().length);
PreJoinableClause clause2 = makeGlobalPreJoinableClause("dataSource_1", "x == \"j.x\"", "j.");
EasyMock.reset(analysis);
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause2)).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey2 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey2.isPresent());
Assert.assertFalse(Arrays.equals(cacheKey1.get(), cacheKey2.get()));
}
@Test
public void test_computeJoinDataSourceCacheKey_keyChangesWithJoinType()
{
DataSourceAnalysis analysis = EasyMock.mock(DataSourceAnalysis.class);
JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactoryWithCacheKey());
PreJoinableClause clause1 = makeGlobalPreJoinableClause("dataSource_1", "x == \"j.x\"", "j.", JoinType.LEFT);
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause1)).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey1 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey1.isPresent());
Assert.assertNotEquals(0, cacheKey1.get().length);
PreJoinableClause clause2 = makeGlobalPreJoinableClause("dataSource_1", "x == \"j.x\"", "j.", JoinType.INNER);
EasyMock.reset(analysis);
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause2)).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey2 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey2.isPresent());
Assert.assertFalse(Arrays.equals(cacheKey1.get(), cacheKey2.get()));
}
@Test
public void test_computeJoinDataSourceCacheKey_keyChangesWithPrefix()
{
DataSourceAnalysis analysis = EasyMock.mock(DataSourceAnalysis.class);
JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactoryWithCacheKey());
PreJoinableClause clause1 = makeGlobalPreJoinableClause("dataSource_1", "abc == xyz", "ab");
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause1)).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey1 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey1.isPresent());
Assert.assertNotEquals(0, cacheKey1.get().length);
PreJoinableClause clause2 = makeGlobalPreJoinableClause("dataSource_1", "abc == xyz", "xy");
EasyMock.reset(analysis);
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause2)).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey2 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey2.isPresent());
Assert.assertFalse(Arrays.equals(cacheKey1.get(), cacheKey2.get()));
}
@Test
public void test_computeJoinDataSourceCacheKey_keyChangesWithJoinable()
{
DataSourceAnalysis analysis = EasyMock.mock(DataSourceAnalysis.class);
JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactoryWithCacheKey());
PreJoinableClause clause1 = makeGlobalPreJoinableClause("dataSource_1", "x == \"j.x\"", "j.");
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause1)).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey1 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey1.isPresent());
Assert.assertNotEquals(0, cacheKey1.get().length);
PreJoinableClause clause2 = makeGlobalPreJoinableClause("dataSource_2", "x == \"j.x\"", "j.");
EasyMock.reset(analysis);
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause2)).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey2 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey2.isPresent());
Assert.assertFalse(Arrays.equals(cacheKey1.get(), cacheKey2.get()));
}
@Test
public void test_computeJoinDataSourceCacheKey_sameKeyForSameJoin()
{
DataSourceAnalysis analysis = EasyMock.mock(DataSourceAnalysis.class);
JoinableFactoryWrapper joinableFactoryWrapper = new JoinableFactoryWrapper(new JoinableFactoryWithCacheKey());
PreJoinableClause clause1 = makeGlobalPreJoinableClause("dataSource_1", "x == \"j.x\"", "j.");
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause1)).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey1 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey1.isPresent());
Assert.assertNotEquals(0, cacheKey1.get().length);
PreJoinableClause clause2 = makeGlobalPreJoinableClause("dataSource_1", "x == \"j.x\"", "j.");
EasyMock.reset(analysis);
EasyMock.expect(analysis.getPreJoinableClauses()).andReturn(Collections.singletonList(clause2)).anyTimes();
EasyMock.replay(analysis);
Optional<byte[]> cacheKey2 = joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis);
Assert.assertTrue(cacheKey2.isPresent());
Assert.assertArrayEquals(cacheKey1.get(), cacheKey2.get());
}
@Test
public void test_checkClausePrefixesForDuplicatesAndShadowing_noConflicts()
{
List<String> prefixes = Arrays.asList(
"AA",
"AB",
"AC",
"aa",
"ab",
"ac",
"BA"
);
JoinPrefixUtils.checkPrefixesForDuplicatesAndShadowing(prefixes);
}
@Test
public void test_checkClausePrefixesForDuplicatesAndShadowing_duplicate()
{
expectedException.expect(IAE.class);
expectedException.expectMessage("Detected duplicate prefix in join clauses: [AA]");
List<String> prefixes = Arrays.asList(
"AA",
"AA",
"ABCD"
);
JoinPrefixUtils.checkPrefixesForDuplicatesAndShadowing(prefixes);
}
@Test
public void test_checkClausePrefixesForDuplicatesAndShadowing_shadowing()
{
expectedException.expect(IAE.class);
expectedException.expectMessage("Detected conflicting prefixes in join clauses: [ABC.DEF, ABC.]");
List<String> prefixes = Arrays.asList(
"BASE.",
"BASEBALL",
"123.456",
"23.45",
"ABC.",
"ABC.DEF"
);
JoinPrefixUtils.checkPrefixesForDuplicatesAndShadowing(prefixes);
}
private PreJoinableClause makeGlobalPreJoinableClause(String tableName, String expression, String prefix)
{
return makeGlobalPreJoinableClause(tableName, expression, prefix, JoinType.LEFT);
}
private PreJoinableClause makeGlobalPreJoinableClause(
String tableName,
String expression,
String prefix,
JoinType joinType
)
{
GlobalTableDataSource dataSource = new GlobalTableDataSource(tableName);
return makePreJoinableClause(dataSource, expression, prefix, joinType);
}
private PreJoinableClause makePreJoinableClause(
DataSource dataSource,
String expression,
String prefix,
JoinType joinType
)
{
JoinConditionAnalysis conditionAnalysis = JoinConditionAnalysis.forExpression(
expression,
prefix,
ExprMacroTable.nil()
);
return new PreJoinableClause(
prefix,
dataSource,
joinType,
conditionAnalysis
);
}
private static class JoinableFactoryWithCacheKey extends NoopJoinableFactory
{
@Override
public Optional<byte[]> computeJoinCacheKey(DataSource dataSource, JoinConditionAnalysis condition)
{
if (dataSource.isCacheable(false) && condition.canHashJoin()) {
String tableName = Iterators.getOnlyElement(dataSource.getTableNames().iterator());
return Optional.of(StringUtils.toUtf8(tableName));
}
return Optional.empty();
}
}
}

View File

@ -1,241 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.join;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.LookupDataSource;
import org.apache.druid.query.QueryContexts;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.TestQuery;
import org.apache.druid.query.extraction.MapLookupExtractor;
import org.apache.druid.query.planning.PreJoinableClause;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.segment.SegmentReference;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.join.filter.rewrite.JoinFilterRewriteConfig;
import org.apache.druid.segment.join.lookup.LookupJoinable;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
public class JoinablesTest
{
private static final JoinFilterRewriteConfig DEFAULT_JOIN_FILTER_REWRITE_CONFIG = new JoinFilterRewriteConfig(
QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_PUSH_DOWN,
QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE,
QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_VALUE_COLUMN_FILTERS,
QueryContexts.DEFAULT_ENABLE_JOIN_FILTER_REWRITE_MAX_SIZE
);
@Rule
public ExpectedException expectedException = ExpectedException.none();
@Test
public void test_validatePrefix_null()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Join clause cannot have null or empty prefix");
Joinables.validatePrefix(null);
}
@Test
public void test_validatePrefix_empty()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Join clause cannot have null or empty prefix");
Joinables.validatePrefix("");
}
@Test
public void test_validatePrefix_underscore()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Join clause cannot have prefix[_]");
Joinables.validatePrefix("_");
}
@Test
public void test_validatePrefix_timeColumn()
{
expectedException.expect(IllegalArgumentException.class);
expectedException.expectMessage("Join clause cannot have prefix[__time]");
Joinables.validatePrefix(ColumnHolder.TIME_COLUMN_NAME);
}
@Test
public void test_isPrefixedBy()
{
Assert.assertTrue(Joinables.isPrefixedBy("foo", ""));
Assert.assertTrue(Joinables.isPrefixedBy("foo", "f"));
Assert.assertTrue(Joinables.isPrefixedBy("foo", "fo"));
Assert.assertFalse(Joinables.isPrefixedBy("foo", "foo"));
}
@Test
public void test_createSegmentMapFn_noClauses()
{
final Function<SegmentReference, SegmentReference> segmentMapFn = Joinables.createSegmentMapFn(
ImmutableList.of(),
NoopJoinableFactory.INSTANCE,
new AtomicLong(),
null
);
Assert.assertSame(Function.identity(), segmentMapFn);
}
@Test
public void test_createSegmentMapFn_unusableClause()
{
final LookupDataSource lookupDataSource = new LookupDataSource("lookyloo");
final PreJoinableClause clause = new PreJoinableClause(
"j.",
lookupDataSource,
JoinType.LEFT,
JoinConditionAnalysis.forExpression("x == \"j.x\"", "j.", ExprMacroTable.nil())
);
expectedException.expect(IllegalStateException.class);
expectedException.expectMessage("dataSource is not joinable");
final Function<SegmentReference, SegmentReference> ignored = Joinables.createSegmentMapFn(
ImmutableList.of(clause),
NoopJoinableFactory.INSTANCE,
new AtomicLong(),
null
);
}
@Test
public void test_createSegmentMapFn_usableClause()
{
final LookupDataSource lookupDataSource = new LookupDataSource("lookyloo");
final JoinConditionAnalysis conditionAnalysis = JoinConditionAnalysis.forExpression(
"x == \"j.x\"",
"j.",
ExprMacroTable.nil()
);
final PreJoinableClause clause = new PreJoinableClause(
"j.",
lookupDataSource,
JoinType.LEFT,
conditionAnalysis
);
final Function<SegmentReference, SegmentReference> segmentMapFn = Joinables.createSegmentMapFn(
ImmutableList.of(clause),
new JoinableFactory()
{
@Override
public boolean isDirectlyJoinable(DataSource dataSource)
{
return dataSource.equals(lookupDataSource);
}
@Override
public Optional<Joinable> build(DataSource dataSource, JoinConditionAnalysis condition)
{
if (dataSource.equals(lookupDataSource) && condition.equals(conditionAnalysis)) {
return Optional.of(
LookupJoinable.wrap(new MapLookupExtractor(ImmutableMap.of("k", "v"), false))
);
} else {
return Optional.empty();
}
}
},
new AtomicLong(),
new TestQuery(
new TableDataSource("test"),
new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("0/100"))),
false,
new HashMap()
)
);
Assert.assertNotSame(Function.identity(), segmentMapFn);
}
@Test
public void test_checkClausePrefixesForDuplicatesAndShadowing_noConflicts()
{
List<String> prefixes = Arrays.asList(
"AA",
"AB",
"AC",
"aa",
"ab",
"ac",
"BA"
);
Joinables.checkPrefixesForDuplicatesAndShadowing(prefixes);
}
@Test
public void test_checkClausePrefixesForDuplicatesAndShadowing_duplicate()
{
expectedException.expect(IAE.class);
expectedException.expectMessage("Detected duplicate prefix in join clauses: [AA]");
List<String> prefixes = Arrays.asList(
"AA",
"AA",
"ABCD"
);
Joinables.checkPrefixesForDuplicatesAndShadowing(prefixes);
}
@Test
public void test_checkClausePrefixesForDuplicatesAndShadowing_shadowing()
{
expectedException.expect(IAE.class);
expectedException.expectMessage("Detected conflicting prefixes in join clauses: [ABC.DEF, ABC.]");
List<String> prefixes = Arrays.asList(
"BASE.",
"BASEBALL",
"123.456",
"23.45",
"ABC.",
"ABC.DEF"
);
Joinables.checkPrefixesForDuplicatesAndShadowing(prefixes);
}
}

View File

@ -21,6 +21,8 @@ package org.apache.druid.segment.join;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.InlineDataSource;
import org.easymock.EasyMock;
import org.easymock.EasyMockRunner;
@ -28,7 +30,9 @@ import org.easymock.Mock;
import org.easymock.MockType;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.runner.RunWith;
import java.util.Optional;
@ -45,6 +49,8 @@ public class MapJoinableFactoryTest
private JoinConditionAnalysis condition;
@Mock
private Joinable mockJoinable;
@Rule
public ExpectedException expectedException = ExpectedException.none();
private MapJoinableFactory target;
@ -83,6 +89,41 @@ public class MapJoinableFactoryTest
EasyMock.replay(noopJoinableFactory);
Optional<Joinable> joinable = target.build(noopDataSource, condition);
Assert.assertEquals(mockJoinable, joinable.get());
}
@Test
public void testComputeJoinCacheKey()
{
Optional<byte[]> expected = Optional.of(new byte[]{1, 2, 3});
EasyMock.expect(noopJoinableFactory.computeJoinCacheKey(noopDataSource, condition)).andReturn(expected);
EasyMock.replay(noopJoinableFactory);
Optional<byte[]> actual = target.computeJoinCacheKey(noopDataSource, condition);
Assert.assertSame(expected, actual);
}
@Test
public void testBuildExceptionWhenTwoJoinableFactoryForSameDataSource()
{
JoinableFactory anotherNoopJoinableFactory = EasyMock.mock(MapJoinableFactory.class);
target = new MapJoinableFactory(
ImmutableSet.of(noopJoinableFactory, anotherNoopJoinableFactory),
ImmutableMap.of(
noopJoinableFactory.getClass(),
NoopDataSource.class,
anotherNoopJoinableFactory.getClass(),
NoopDataSource.class
)
);
EasyMock.expect(noopJoinableFactory.build(noopDataSource, condition)).andReturn(Optional.of(mockJoinable));
EasyMock.expect(anotherNoopJoinableFactory.build(noopDataSource, condition)).andReturn(Optional.of(mockJoinable));
EasyMock.replay(noopJoinableFactory, anotherNoopJoinableFactory);
expectedException.expect(ISE.class);
expectedException.expectMessage(StringUtils.format(
"Multiple joinable factories are valid for table[%s]",
noopDataSource
));
target.build(noopDataSource, condition);
}
@Test

View File

@ -48,7 +48,7 @@ public class NoopDataSource implements DataSource
}
@Override
public boolean isCacheable()
public boolean isCacheable(boolean isBroker)
{
return false;
}

View File

@ -27,7 +27,7 @@ public class NoopJoinableFactory implements JoinableFactory
{
public static final NoopJoinableFactory INSTANCE = new NoopJoinableFactory();
private NoopJoinableFactory()
protected NoopJoinableFactory()
{
// Singleton.
}

View File

@ -223,6 +223,12 @@ public class BroadcastSegmentIndexedTableTest extends InitializedNullHandlingTes
checkNonIndexedReader("qualityDouble");
}
@Test
public void testIsCacheable()
{
Assert.assertTrue(broadcastTable.isCacheable());
}
@Test
public void testNonexistentColumn()
{

View File

@ -179,4 +179,13 @@ public class RowBasedIndexedTableTest
Assert.assertEquals(JoinTestHelper.INDEXED_TABLE_VERSION, countriesTable.version());
Assert.assertEquals(JoinTestHelper.INDEXED_TABLE_VERSION, regionsTable.version());
}
@Test
public void testIsCacheable() throws IOException
{
Assert.assertFalse(countriesTable.isCacheable());
RowBasedIndexedTable<Map<String, Object>> countriesTableWithCacheKey = JoinTestHelper.createCountriesIndexedTableWithCacheKey();
Assert.assertTrue(countriesTableWithCacheKey.isCacheable());
Assert.assertArrayEquals(JoinTestHelper.INDEXED_TABLE_CACHE_KEY, countriesTableWithCacheKey.computeCacheKey());
}
}

View File

@ -192,6 +192,6 @@ public class CacheUtil
return cacheStrategy != null
&& cacheStrategy.isCacheable(query, serverType.willMergeRunners())
&& cacheConfig.isQueryCacheable(query)
&& query.getDataSource().isCacheable();
&& query.getDataSource().isCacheable(serverType == ServerType.BROKER);
}
}

View File

@ -21,7 +21,9 @@ package org.apache.druid.client;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
@ -30,6 +32,7 @@ import com.google.common.collect.RangeSet;
import com.google.common.collect.Sets;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import com.google.common.primitives.Bytes;
import com.google.inject.Inject;
import org.apache.druid.client.cache.Cache;
import org.apache.druid.client.cache.CacheConfig;
@ -71,6 +74,8 @@ import org.apache.druid.query.context.ResponseContext.Key;
import org.apache.druid.query.filter.DimFilterUtils;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.query.spec.QuerySegmentSpec;
import org.apache.druid.segment.join.JoinableFactory;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.server.QueryResource;
import org.apache.druid.server.QueryScheduler;
import org.apache.druid.server.coordination.DruidServerMetadata;
@ -121,6 +126,7 @@ public class CachingClusteredClient implements QuerySegmentWalker
private final DruidProcessingConfig processingConfig;
private final ForkJoinPool pool;
private final QueryScheduler scheduler;
private final JoinableFactoryWrapper joinableFactoryWrapper;
@Inject
public CachingClusteredClient(
@ -133,7 +139,8 @@ public class CachingClusteredClient implements QuerySegmentWalker
@Client DruidHttpClientConfig httpClientConfig,
DruidProcessingConfig processingConfig,
@Merging ForkJoinPool pool,
QueryScheduler scheduler
QueryScheduler scheduler,
JoinableFactory joinableFactory
)
{
this.warehouse = warehouse;
@ -146,6 +153,7 @@ public class CachingClusteredClient implements QuerySegmentWalker
this.processingConfig = processingConfig;
this.pool = pool;
this.scheduler = scheduler;
this.joinableFactoryWrapper = new JoinableFactoryWrapper(joinableFactory);
if (cacheConfig.isQueryCacheable(Query.GROUP_BY) && (cacheConfig.isUseCache() || cacheConfig.isPopulateCache())) {
log.warn(
@ -274,6 +282,7 @@ public class CachingClusteredClient implements QuerySegmentWalker
private final Map<String, Cache.NamedKey> cachePopulatorKeyMap = new HashMap<>();
private final DataSourceAnalysis dataSourceAnalysis;
private final List<Interval> intervals;
private final CacheKeyManager<T> cacheKeyManager;
SpecificQueryRunnable(final QueryPlus<T> queryPlus, final ResponseContext responseContext)
{
@ -282,6 +291,7 @@ public class CachingClusteredClient implements QuerySegmentWalker
this.query = queryPlus.getQuery();
this.toolChest = warehouse.getToolChest(query);
this.strategy = toolChest.getCacheStrategy(query);
this.dataSourceAnalysis = DataSourceAnalysis.forDataSource(query.getDataSource());
this.useCache = CacheUtil.isUseSegmentCache(query, strategy, cacheConfig, CacheUtil.ServerType.BROKER);
this.populateCache = CacheUtil.isPopulateSegmentCache(query, strategy, cacheConfig, CacheUtil.ServerType.BROKER);
@ -289,11 +299,18 @@ public class CachingClusteredClient implements QuerySegmentWalker
// Note that enabling this leads to putting uncovered intervals information in the response headers
// and might blow up in some cases https://github.com/apache/druid/issues/2108
this.uncoveredIntervalsLimit = QueryContexts.getUncoveredIntervalsLimit(query);
this.dataSourceAnalysis = DataSourceAnalysis.forDataSource(query.getDataSource());
// For nested queries, we need to look at the intervals of the inner most query.
this.intervals = dataSourceAnalysis.getBaseQuerySegmentSpec()
.map(QuerySegmentSpec::getIntervals)
.orElseGet(() -> query.getIntervals());
this.cacheKeyManager = new CacheKeyManager<>(
query,
strategy,
useCache,
populateCache,
dataSourceAnalysis,
joinableFactoryWrapper
);
}
private ImmutableMap<String, Object> makeDownstreamQueryContext()
@ -344,12 +361,15 @@ public class CachingClusteredClient implements QuerySegmentWalker
final Set<SegmentServerSelector> segmentServers = computeSegmentsToQuery(timeline, specificSegments);
@Nullable
final byte[] queryCacheKey = computeQueryCacheKey();
final byte[] queryCacheKey = cacheKeyManager.computeSegmentLevelQueryCacheKey();
if (query.getContext().get(QueryResource.HEADER_IF_NONE_MATCH) != null) {
@Nullable
final String prevEtag = (String) query.getContext().get(QueryResource.HEADER_IF_NONE_MATCH);
@Nullable
final String currentEtag = computeCurrentEtag(segmentServers, queryCacheKey);
final String currentEtag = cacheKeyManager.computeResultLevelCachingEtag(segmentServers, queryCacheKey);
if (null != currentEtag) {
responseContext.put(Key.ETAG, currentEtag);
}
if (currentEtag != null && currentEtag.equals(prevEtag)) {
return new ClusterQueryResult<>(Sequences.empty(), 0);
}
@ -489,46 +509,6 @@ public class CachingClusteredClient implements QuerySegmentWalker
}
}
@Nullable
private byte[] computeQueryCacheKey()
{
if ((populateCache || useCache) // implies strategy != null
&& !isBySegment) { // explicit bySegment queries are never cached
assert strategy != null;
return strategy.computeCacheKey(query);
} else {
return null;
}
}
@Nullable
private String computeCurrentEtag(final Set<SegmentServerSelector> segments, @Nullable byte[] queryCacheKey)
{
Hasher hasher = Hashing.sha1().newHasher();
boolean hasOnlyHistoricalSegments = true;
for (SegmentServerSelector p : segments) {
if (!p.getServer().pick().getServer().isSegmentReplicationTarget()) {
hasOnlyHistoricalSegments = false;
break;
}
hasher.putString(p.getServer().getSegment().getId().toString(), StandardCharsets.UTF_8);
// it is important to add the "query interval" as part ETag calculation
// to have result level cache work correctly for queries with different
// intervals covering the same set of segments
hasher.putString(p.rhs.getInterval().toString(), StandardCharsets.UTF_8);
}
if (hasOnlyHistoricalSegments) {
hasher.putBytes(queryCacheKey == null ? strategy.computeCacheKey(query) : queryCacheKey);
String currEtag = StringUtils.encodeBase64String(hasher.hash().asBytes());
responseContext.put(ResponseContext.Key.ETAG, currEtag);
return currEtag;
} else {
return null;
}
}
private List<Pair<Interval, byte[]>> pruneSegmentsWithCachedResults(
final byte[] queryCacheKey,
final Set<SegmentServerSelector> segments
@ -776,4 +756,103 @@ public class CachingClusteredClient implements QuerySegmentWalker
.flatMerge(seq -> seq, query.getResultOrdering());
}
}
/**
* An inner class that is used solely for computing cache keys. Its a separate class to allow extensive unit testing
* of cache key generation.
*/
@VisibleForTesting
static class CacheKeyManager<T>
{
private final Query<T> query;
private final CacheStrategy<T, Object, Query<T>> strategy;
private final DataSourceAnalysis dataSourceAnalysis;
private final JoinableFactoryWrapper joinableFactoryWrapper;
private final boolean isSegmentLevelCachingEnable;
CacheKeyManager(
final Query<T> query,
final CacheStrategy<T, Object, Query<T>> strategy,
final boolean useCache,
final boolean populateCache,
final DataSourceAnalysis dataSourceAnalysis,
final JoinableFactoryWrapper joinableFactoryWrapper
)
{
this.query = query;
this.strategy = strategy;
this.dataSourceAnalysis = dataSourceAnalysis;
this.joinableFactoryWrapper = joinableFactoryWrapper;
this.isSegmentLevelCachingEnable = ((populateCache || useCache)
&& !QueryContexts.isBySegment(query)); // explicit bySegment queries are never cached
}
@Nullable
byte[] computeSegmentLevelQueryCacheKey()
{
if (isSegmentLevelCachingEnable) {
return computeQueryCacheKeyWithJoin();
}
return null;
}
/**
* It computes the ETAG which is used by {@link org.apache.druid.query.ResultLevelCachingQueryRunner} for
* result level caches. queryCacheKey can be null if segment level cache is not being used. However, ETAG
* is still computed since result level cache may still be on.
*/
@Nullable
String computeResultLevelCachingEtag(
final Set<SegmentServerSelector> segments,
@Nullable byte[] queryCacheKey
)
{
Hasher hasher = Hashing.sha1().newHasher();
boolean hasOnlyHistoricalSegments = true;
for (SegmentServerSelector p : segments) {
if (!p.getServer().pick().getServer().isSegmentReplicationTarget()) {
hasOnlyHistoricalSegments = false;
break;
}
hasher.putString(p.getServer().getSegment().getId().toString(), StandardCharsets.UTF_8);
// it is important to add the "query interval" as part ETag calculation
// to have result level cache work correctly for queries with different
// intervals covering the same set of segments
hasher.putString(p.rhs.getInterval().toString(), StandardCharsets.UTF_8);
}
if (!hasOnlyHistoricalSegments) {
return null;
}
// query cache key can be null if segment level caching is disabled
final byte[] queryCacheKeyFinal = (queryCacheKey == null) ? computeQueryCacheKeyWithJoin() : queryCacheKey;
if (queryCacheKeyFinal == null) {
return null;
}
hasher.putBytes(queryCacheKeyFinal);
String currEtag = StringUtils.encodeBase64String(hasher.hash().asBytes());
return currEtag;
}
/**
* Adds the cache key prefix for join data sources. Return null if its a join but caching is not supported
*/
@Nullable
private byte[] computeQueryCacheKeyWithJoin()
{
Preconditions.checkNotNull(strategy, "strategy cannot be null");
if (dataSourceAnalysis.isJoin()) {
byte[] joinDataSourceCacheKey = joinableFactoryWrapper.computeJoinDataSourceCacheKey(dataSourceAnalysis)
.orElse(null);
if (null == joinDataSourceCacheKey) {
return null; // A join operation that does not support caching
}
return Bytes.concat(joinDataSourceCacheKey, strategy.computeCacheKey(query));
}
return strategy.computeCacheKey(query);
}
}
}

View File

@ -21,7 +21,9 @@ package org.apache.druid.client;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.primitives.Bytes;
import org.apache.druid.client.cache.Cache;
import org.apache.druid.client.cache.CacheConfig;
import org.apache.druid.client.cache.CachePopulator;
@ -39,11 +41,13 @@ import org.apache.druid.query.context.ResponseContext;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.Optional;
public class CachingQueryRunner<T> implements QueryRunner<T>
{
private final String cacheId;
private final SegmentDescriptor segmentDescriptor;
private final Optional<byte[]> cacheKeyPrefix;
private final QueryRunner<T> base;
private final QueryToolChest toolChest;
private final Cache cache;
@ -53,6 +57,7 @@ public class CachingQueryRunner<T> implements QueryRunner<T>
public CachingQueryRunner(
String cacheId,
Optional<byte[]> cacheKeyPrefix,
SegmentDescriptor segmentDescriptor,
ObjectMapper mapper,
Cache cache,
@ -62,6 +67,7 @@ public class CachingQueryRunner<T> implements QueryRunner<T>
CacheConfig cacheConfig
)
{
this.cacheKeyPrefix = cacheKeyPrefix;
this.base = base;
this.cacheId = cacheId;
this.segmentDescriptor = segmentDescriptor;
@ -77,20 +83,15 @@ public class CachingQueryRunner<T> implements QueryRunner<T>
{
Query<T> query = queryPlus.getQuery();
final CacheStrategy strategy = toolChest.getCacheStrategy(query);
final boolean populateCache = CacheUtil.isPopulateSegmentCache(
query,
strategy,
cacheConfig,
CacheUtil.ServerType.DATA
);
final boolean useCache = CacheUtil.isUseSegmentCache(query, strategy, cacheConfig, CacheUtil.ServerType.DATA);
final boolean populateCache = canPopulateCache(query, strategy);
final boolean useCache = canUseCache(query, strategy);
final Cache.NamedKey key;
if (strategy != null && (useCache || populateCache)) {
if (useCache || populateCache) {
key = CacheUtil.computeSegmentCacheKey(
cacheId,
segmentDescriptor,
strategy.computeCacheKey(query)
Bytes.concat(cacheKeyPrefix.get(), strategy.computeCacheKey(query))
);
} else {
key = null;
@ -143,4 +144,32 @@ public class CachingQueryRunner<T> implements QueryRunner<T>
}
}
/**
* @return whether the segment level cache should be used or not. False if strategy is null
*/
@VisibleForTesting
boolean canUseCache(Query<T> query, CacheStrategy strategy)
{
return CacheUtil.isUseSegmentCache(
query,
strategy,
cacheConfig,
CacheUtil.ServerType.DATA
) && cacheKeyPrefix.isPresent();
}
/**
* @return whether the segment level cache should be populated or not. False if strategy is null
*/
@VisibleForTesting
boolean canPopulateCache(Query<T> query, CacheStrategy strategy)
{
return CacheUtil.isPopulateSegmentCache(
query,
strategy,
cacheConfig,
CacheUtil.ServerType.DATA
) && cacheKeyPrefix.isPresent();
}
}

View File

@ -162,6 +162,7 @@ public class ResultLevelCachingQueryRunner<T> implements QueryRunner<T>
}
}
@Nullable
private byte[] fetchResultsFromResultLevelCache(
final String queryCacheKey
)

View File

@ -25,6 +25,7 @@ import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.GlobalTableDataSource;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.segment.join.table.IndexedTable;
import org.apache.druid.segment.join.table.IndexedTableJoinable;
import org.apache.druid.segment.join.table.ReferenceCountingIndexedTable;
import org.apache.druid.server.SegmentManager;
@ -55,25 +56,39 @@ public class BroadcastTableJoinableFactory implements JoinableFactory
JoinConditionAnalysis condition
)
{
GlobalTableDataSource broadcastDatasource = (GlobalTableDataSource) dataSource;
if (condition.canHashJoin()) {
DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(broadcastDatasource);
return segmentManager.getIndexedTables(analysis).map(tables -> {
Iterator<ReferenceCountingIndexedTable> tableIterator = tables.iterator();
if (!tableIterator.hasNext()) {
return null;
}
try {
return new IndexedTableJoinable(Iterators.getOnlyElement(tableIterator));
}
catch (IllegalArgumentException iae) {
throw new ISE(
"Currently only single segment datasources are supported for broadcast joins, dataSource[%s] has multiple segments. Reingest the data so that it is entirely contained within a single segment to use in JOIN queries.",
broadcastDatasource.getName()
);
}
});
if (!condition.canHashJoin()) {
return Optional.empty();
}
return Optional.empty();
return getOnlyIndexedTable(dataSource).map(IndexedTableJoinable::new);
}
@Override
public Optional<byte[]> computeJoinCacheKey(DataSource dataSource, JoinConditionAnalysis condition)
{
if (!condition.canHashJoin()) {
return Optional.empty();
}
return getOnlyIndexedTable(dataSource).filter(IndexedTable::isCacheable).map(IndexedTable::computeCacheKey);
}
private Optional<ReferenceCountingIndexedTable> getOnlyIndexedTable(DataSource dataSource)
{
GlobalTableDataSource broadcastDataSource = (GlobalTableDataSource) dataSource;
DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(dataSource);
return segmentManager.getIndexedTables(analysis).flatMap(tables -> {
Iterator<ReferenceCountingIndexedTable> tableIterator = tables.iterator();
if (!tableIterator.hasNext()) {
return Optional.empty();
}
try {
return Optional.of(Iterators.getOnlyElement(tableIterator));
}
catch (IllegalArgumentException iae) {
throw new ISE(
"Currently only single segment datasources are supported for broadcast joins, dataSource[%s] has multiple segments. Reingest the data so that it is entirely contained within a single segment to use in JOIN queries.",
broadcastDataSource.getName()
);
}
});
}
}

View File

@ -29,6 +29,7 @@ import org.apache.druid.client.cache.CachePopulatorStats;
import org.apache.druid.client.cache.ForegroundCachePopulator;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.concurrent.Execs;
import org.apache.druid.java.util.common.guava.CloseQuietly;
import org.apache.druid.java.util.common.guava.FunctionalIterable;
@ -56,7 +57,7 @@ import org.apache.druid.query.spec.SpecificSegmentQueryRunner;
import org.apache.druid.query.spec.SpecificSegmentSpec;
import org.apache.druid.segment.SegmentReference;
import org.apache.druid.segment.join.JoinableFactory;
import org.apache.druid.segment.join.Joinables;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.segment.realtime.FireHydrant;
import org.apache.druid.segment.realtime.plumber.Sink;
import org.apache.druid.timeline.SegmentId;
@ -86,7 +87,7 @@ public class SinkQuerySegmentWalker implements QuerySegmentWalker
private final ServiceEmitter emitter;
private final QueryRunnerFactoryConglomerate conglomerate;
private final ExecutorService queryExecutorService;
private final JoinableFactory joinableFactory;
private final JoinableFactoryWrapper joinableFactoryWrapper;
private final Cache cache;
private final CacheConfig cacheConfig;
private final CachePopulatorStats cachePopulatorStats;
@ -110,7 +111,7 @@ public class SinkQuerySegmentWalker implements QuerySegmentWalker
this.emitter = Preconditions.checkNotNull(emitter, "emitter");
this.conglomerate = Preconditions.checkNotNull(conglomerate, "conglomerate");
this.queryExecutorService = Preconditions.checkNotNull(queryExecutorService, "queryExecutorService");
this.joinableFactory = Preconditions.checkNotNull(joinableFactory, "joinableFactory");
this.joinableFactoryWrapper = new JoinableFactoryWrapper(joinableFactory);
this.cache = Preconditions.checkNotNull(cache, "cache");
this.cacheConfig = Preconditions.checkNotNull(cacheConfig, "cacheConfig");
this.cachePopulatorStats = Preconditions.checkNotNull(cachePopulatorStats, "cachePopulatorStats");
@ -167,13 +168,17 @@ public class SinkQuerySegmentWalker implements QuerySegmentWalker
}
// segmentMapFn maps each base Segment into a joined Segment if necessary.
final Function<SegmentReference, SegmentReference> segmentMapFn = Joinables.createSegmentMapFn(
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(
analysis.getPreJoinableClauses(),
joinableFactory,
cpuTimeAccumulator,
analysis.getBaseQuery().orElse(query)
);
// We compute the join cache key here itself so it doesn't need to be re-computed for every segment
final Optional<byte[]> cacheKeyPrefix = analysis.isJoin()
? joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis)
: Optional.of(StringUtils.EMPTY_BYTES);
Iterable<QueryRunner<T>> perSegmentRunners = Iterables.transform(
specs,
descriptor -> {
@ -226,6 +231,7 @@ public class SinkQuerySegmentWalker implements QuerySegmentWalker
if (hydrantDefinitelySwapped && cache.isLocal()) {
runner = new CachingQueryRunner<>(
makeHydrantCacheIdentifier(hydrant),
cacheKeyPrefix,
descriptor,
objectMapper,
cache,

View File

@ -37,7 +37,7 @@ import org.apache.druid.segment.ReferenceCountingSegment;
import org.apache.druid.segment.SegmentReference;
import org.apache.druid.segment.SegmentWrangler;
import org.apache.druid.segment.join.JoinableFactory;
import org.apache.druid.segment.join.Joinables;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.joda.time.Interval;
import java.util.concurrent.atomic.AtomicLong;
@ -57,7 +57,7 @@ public class LocalQuerySegmentWalker implements QuerySegmentWalker
{
private final QueryRunnerFactoryConglomerate conglomerate;
private final SegmentWrangler segmentWrangler;
private final JoinableFactory joinableFactory;
private final JoinableFactoryWrapper joinableFactoryWrapper;
private final QueryScheduler scheduler;
private final ServiceEmitter emitter;
@ -72,7 +72,7 @@ public class LocalQuerySegmentWalker implements QuerySegmentWalker
{
this.conglomerate = conglomerate;
this.segmentWrangler = segmentWrangler;
this.joinableFactory = joinableFactory;
this.joinableFactoryWrapper = new JoinableFactoryWrapper(joinableFactory);
this.scheduler = scheduler;
this.emitter = emitter;
}
@ -94,9 +94,8 @@ public class LocalQuerySegmentWalker implements QuerySegmentWalker
final AtomicLong cpuAccumulator = new AtomicLong(0L);
final Function<SegmentReference, SegmentReference> segmentMapFn = Joinables.createSegmentMapFn(
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(
analysis.getPreJoinableClauses(),
joinableFactory,
cpuAccumulator,
analysis.getBaseQuery().orElse(query)
);

View File

@ -58,7 +58,7 @@ import org.apache.druid.query.spec.SpecificSegmentSpec;
import org.apache.druid.segment.ReferenceCountingSegment;
import org.apache.druid.segment.SegmentReference;
import org.apache.druid.segment.join.JoinableFactory;
import org.apache.druid.segment.join.Joinables;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.server.SegmentManager;
import org.apache.druid.server.SetAndVerifyContextQueryRunner;
import org.apache.druid.server.initialization.ServerConfig;
@ -76,7 +76,7 @@ import java.util.function.Function;
/**
* Query handler for Historical processes (see CliHistorical).
*
* <p>
* In tests, this class's behavior is partially mimicked by TestClusterQuerySegmentWalker.
*/
public class ServerManager implements QuerySegmentWalker
@ -90,7 +90,7 @@ public class ServerManager implements QuerySegmentWalker
private final ObjectMapper objectMapper;
private final CacheConfig cacheConfig;
private final SegmentManager segmentManager;
private final JoinableFactory joinableFactory;
private final JoinableFactoryWrapper joinableFactoryWrapper;
private final ServerConfig serverConfig;
@Inject
@ -117,7 +117,7 @@ public class ServerManager implements QuerySegmentWalker
this.cacheConfig = cacheConfig;
this.segmentManager = segmentManager;
this.joinableFactory = joinableFactory;
this.joinableFactoryWrapper = new JoinableFactoryWrapper(joinableFactory);
this.serverConfig = serverConfig;
}
@ -197,12 +197,15 @@ public class ServerManager implements QuerySegmentWalker
}
// segmentMapFn maps each base Segment into a joined Segment if necessary.
final Function<SegmentReference, SegmentReference> segmentMapFn = Joinables.createSegmentMapFn(
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(
analysis.getPreJoinableClauses(),
joinableFactory,
cpuTimeAccumulator,
analysis.getBaseQuery().orElse(query)
);
// We compute the join cache key here itself so it doesn't need to be re-computed for every segment
final Optional<byte[]> cacheKeyPrefix = analysis.isJoin()
? joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis)
: Optional.of(StringUtils.EMPTY_BYTES);
final FunctionalIterable<QueryRunner<T>> queryRunners = FunctionalIterable
.create(specs)
@ -215,7 +218,8 @@ public class ServerManager implements QuerySegmentWalker
toolChest,
timeline,
segmentMapFn,
cpuTimeAccumulator
cpuTimeAccumulator,
cacheKeyPrefix
)
)
);
@ -239,7 +243,8 @@ public class ServerManager implements QuerySegmentWalker
final QueryToolChest<T, Query<T>> toolChest,
final VersionedIntervalTimeline<String, ReferenceCountingSegment> timeline,
final Function<SegmentReference, SegmentReference> segmentMapFn,
final AtomicLong cpuTimeAccumulator
final AtomicLong cpuTimeAccumulator,
Optional<byte[]> cacheKeyPrefix
)
{
final PartitionHolder<ReferenceCountingSegment> entry = timeline.findEntry(
@ -261,6 +266,7 @@ public class ServerManager implements QuerySegmentWalker
factory,
toolChest,
segmentMapFn.apply(segment),
cacheKeyPrefix,
descriptor,
cpuTimeAccumulator
);
@ -270,6 +276,7 @@ public class ServerManager implements QuerySegmentWalker
final QueryRunnerFactory<T, Query<T>> factory,
final QueryToolChest<T, Query<T>> toolChest,
final SegmentReference segment,
final Optional<byte[]> cacheKeyPrefix,
final SegmentDescriptor segmentDescriptor,
final AtomicLong cpuTimeAccumulator
)
@ -295,6 +302,7 @@ public class ServerManager implements QuerySegmentWalker
CachingQueryRunner<T> cachingQueryRunner = new CachingQueryRunner<>(
segmentIdString,
cacheKeyPrefix,
segmentDescriptor,
objectMapper,
cache,

View File

@ -27,6 +27,7 @@ import org.apache.druid.client.cache.CacheConfig;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.query.CacheStrategy;
import org.apache.druid.query.Druids;
import org.apache.druid.query.GlobalTableDataSource;
import org.apache.druid.query.LookupDataSource;
import org.apache.druid.query.Query;
import org.apache.druid.query.timeseries.TimeseriesQuery;
@ -111,14 +112,27 @@ public class CacheUtilTest
}
@Test
public void test_isQueryCacheable_unCacheableDataSource()
public void test_isQueryCacheable_unCacheableDataSourceOnBroker()
{
Assert.assertFalse(
CacheUtil.isQueryCacheable(
timeseriesQuery.withDataSource(new GlobalTableDataSource("global")),
new DummyCacheStrategy<>(true, true),
makeCacheConfig(ImmutableMap.of()),
CacheUtil.ServerType.BROKER
)
);
}
@Test
public void test_isQueryCacheable_unCacheableDataSourceOnDataServer()
{
Assert.assertFalse(
CacheUtil.isQueryCacheable(
timeseriesQuery.withDataSource(new LookupDataSource("lookyloo")),
new DummyCacheStrategy<>(true, true),
makeCacheConfig(ImmutableMap.of()),
CacheUtil.ServerType.BROKER
CacheUtil.ServerType.DATA
)
);
}

View File

@ -0,0 +1,341 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.client;
import com.google.common.collect.ImmutableSet;
import com.google.common.primitives.Bytes;
import org.apache.druid.client.selector.QueryableDruidServer;
import org.apache.druid.client.selector.ServerSelector;
import org.apache.druid.query.CacheStrategy;
import org.apache.druid.query.Query;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.partition.NumberedShardSpec;
import org.easymock.EasyMockRunner;
import org.easymock.EasyMockSupport;
import org.easymock.Mock;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import java.util.Optional;
import java.util.Set;
import static org.easymock.EasyMock.expect;
import static org.easymock.EasyMock.replay;
import static org.easymock.EasyMock.reset;
@RunWith(EasyMockRunner.class)
public class CachingClusteredClientCacheKeyManagerTest extends EasyMockSupport
{
@Mock
private CacheStrategy<Object, Object, Query<Object>> strategy;
@Mock
private Query<Object> query;
@Mock
private JoinableFactoryWrapper joinableFactoryWrapper;
@Mock
private DataSourceAnalysis dataSourceAnalysis;
private static final byte[] QUERY_CACHE_KEY = new byte[]{1, 2, 3};
private static final byte[] JOIN_KEY = new byte[]{4, 5};
@Before
public void setup()
{
expect(strategy.computeCacheKey(query)).andReturn(QUERY_CACHE_KEY).anyTimes();
expect(query.getContextValue("bySegment")).andReturn(false).anyTimes();
}
@After
public void teardown()
{
verifyAllUnexpectedCalls();
}
@Test
public void testComputeEtag_nonHistorical()
{
replayAll();
CachingClusteredClient.CacheKeyManager<Object> keyManager = makeKeyManager();
Set<SegmentServerSelector> selectors = ImmutableSet.of(
makeHistoricalServerSelector(0),
makeRealtimeServerSelector(1)
);
String actual = keyManager.computeResultLevelCachingEtag(selectors, QUERY_CACHE_KEY);
Assert.assertNull(actual);
}
@Test
public void testComputeEtag_DifferentHistoricals()
{
replayAll();
CachingClusteredClient.CacheKeyManager<Object> keyManager = makeKeyManager();
Set<SegmentServerSelector> selectors = ImmutableSet.of(
makeHistoricalServerSelector(1),
makeHistoricalServerSelector(1)
);
String actual1 = keyManager.computeResultLevelCachingEtag(selectors, QUERY_CACHE_KEY);
Assert.assertNotNull(actual1);
selectors = ImmutableSet.of(
makeHistoricalServerSelector(1),
makeHistoricalServerSelector(1)
);
String actual2 = keyManager.computeResultLevelCachingEtag(selectors, QUERY_CACHE_KEY);
Assert.assertNotNull(actual2);
Assert.assertEquals("cache key should not change for same server selectors", actual1, actual2);
selectors = ImmutableSet.of(
makeHistoricalServerSelector(2),
makeHistoricalServerSelector(1)
);
String actual3 = keyManager.computeResultLevelCachingEtag(selectors, QUERY_CACHE_KEY);
Assert.assertNotNull(actual3);
Assert.assertNotEquals(actual1, actual3);
}
@Test
public void testComputeEtag_DifferentQueryCacheKey()
{
replayAll();
CachingClusteredClient.CacheKeyManager<Object> keyManager = makeKeyManager();
Set<SegmentServerSelector> selectors = ImmutableSet.of(
makeHistoricalServerSelector(1),
makeHistoricalServerSelector(1)
);
String actual1 = keyManager.computeResultLevelCachingEtag(selectors, new byte[]{1, 2});
Assert.assertNotNull(actual1);
String actual2 = keyManager.computeResultLevelCachingEtag(selectors, new byte[]{3, 4});
Assert.assertNotNull(actual2);
Assert.assertNotEquals(actual1, actual2);
}
@Test
public void testComputeEtag_nonJoinDataSource()
{
expect(dataSourceAnalysis.isJoin()).andReturn(false);
replayAll();
CachingClusteredClient.CacheKeyManager<Object> keyManager = makeKeyManager();
Set<SegmentServerSelector> selectors = ImmutableSet.of(
makeHistoricalServerSelector(1),
makeHistoricalServerSelector(1)
);
String actual1 = keyManager.computeResultLevelCachingEtag(selectors, QUERY_CACHE_KEY);
Assert.assertNotNull(actual1);
selectors = ImmutableSet.of(
makeHistoricalServerSelector(1),
makeHistoricalServerSelector(1)
);
String actual2 = keyManager.computeResultLevelCachingEtag(selectors, null);
Assert.assertNotNull(actual2);
Assert.assertEquals(actual1, actual2);
}
@Test
public void testComputeEtag_joinWithUnsupportedCaching()
{
expect(dataSourceAnalysis.isJoin()).andReturn(true);
expect(joinableFactoryWrapper.computeJoinDataSourceCacheKey(dataSourceAnalysis)).andReturn(Optional.empty());
replayAll();
CachingClusteredClient.CacheKeyManager<Object> keyManager = makeKeyManager();
Set<SegmentServerSelector> selectors = ImmutableSet.of(
makeHistoricalServerSelector(1),
makeHistoricalServerSelector(1)
);
String actual = keyManager.computeResultLevelCachingEtag(selectors, null);
Assert.assertNull(actual);
}
@Test
public void testComputeEtag_joinWithSupportedCaching()
{
expect(dataSourceAnalysis.isJoin()).andReturn(true).anyTimes();
expect(joinableFactoryWrapper.computeJoinDataSourceCacheKey(dataSourceAnalysis)).andReturn(Optional.of(JOIN_KEY));
replayAll();
CachingClusteredClient.CacheKeyManager<Object> keyManager = makeKeyManager();
Set<SegmentServerSelector> selectors = ImmutableSet.of(
makeHistoricalServerSelector(1),
makeHistoricalServerSelector(1)
);
String actual1 = keyManager.computeResultLevelCachingEtag(selectors, null);
Assert.assertNotNull(actual1);
reset(joinableFactoryWrapper);
expect(joinableFactoryWrapper.computeJoinDataSourceCacheKey(dataSourceAnalysis)).andReturn(Optional.of(new byte[]{9}));
replay(joinableFactoryWrapper);
selectors = ImmutableSet.of(
makeHistoricalServerSelector(1),
makeHistoricalServerSelector(1)
);
String actual2 = keyManager.computeResultLevelCachingEtag(selectors, null);
Assert.assertNotNull(actual2);
Assert.assertNotEquals(actual1, actual2);
}
@Test
public void testComputeEtag_noEffectifBySegment()
{
expect(dataSourceAnalysis.isJoin()).andReturn(false);
reset(query);
expect(query.getContextValue("bySegment")).andReturn(true).anyTimes();
replayAll();
CachingClusteredClient.CacheKeyManager<Object> keyManager = makeKeyManager();
Set<SegmentServerSelector> selectors = ImmutableSet.of(
makeHistoricalServerSelector(1),
makeHistoricalServerSelector(1)
);
String actual = keyManager.computeResultLevelCachingEtag(selectors, null);
Assert.assertNotNull(actual);
}
@Test
public void testComputeEtag_noEffectIfUseAndPopulateFalse()
{
expect(dataSourceAnalysis.isJoin()).andReturn(false);
replayAll();
CachingClusteredClient.CacheKeyManager<Object> keyManager = new CachingClusteredClient.CacheKeyManager<>(
query,
strategy,
false,
false,
dataSourceAnalysis,
joinableFactoryWrapper
);
Set<SegmentServerSelector> selectors = ImmutableSet.of(
makeHistoricalServerSelector(1),
makeHistoricalServerSelector(1)
);
String actual = keyManager.computeResultLevelCachingEtag(selectors, null);
Assert.assertNotNull(actual);
}
@Test
public void testSegmentQueryCacheKey_nonJoinDataSource()
{
expect(dataSourceAnalysis.isJoin()).andReturn(false);
replayAll();
CachingClusteredClient.CacheKeyManager<Object> keyManager = makeKeyManager();
byte[] cacheKey = keyManager.computeSegmentLevelQueryCacheKey();
Assert.assertArrayEquals(QUERY_CACHE_KEY, cacheKey);
}
@Test
public void testSegmentQueryCacheKey_joinWithUnsupportedCaching()
{
expect(dataSourceAnalysis.isJoin()).andReturn(true);
expect(joinableFactoryWrapper.computeJoinDataSourceCacheKey(dataSourceAnalysis)).andReturn(Optional.empty());
replayAll();
CachingClusteredClient.CacheKeyManager<Object> keyManager = makeKeyManager();
byte[] cacheKey = keyManager.computeSegmentLevelQueryCacheKey();
Assert.assertNull(cacheKey);
}
@Test
public void testSegmentQueryCacheKey_joinWithSupportedCaching()
{
expect(dataSourceAnalysis.isJoin()).andReturn(true);
expect(joinableFactoryWrapper.computeJoinDataSourceCacheKey(dataSourceAnalysis)).andReturn(Optional.of(JOIN_KEY));
replayAll();
CachingClusteredClient.CacheKeyManager<Object> keyManager = makeKeyManager();
byte[] cacheKey = keyManager.computeSegmentLevelQueryCacheKey();
Assert.assertArrayEquals(Bytes.concat(JOIN_KEY, QUERY_CACHE_KEY), cacheKey);
}
@Test
public void testSegmentQueryCacheKey_noCachingIfBySegment()
{
reset(query);
expect(query.getContextValue("bySegment")).andReturn(true).anyTimes();
replayAll();
byte[] cacheKey = makeKeyManager().computeSegmentLevelQueryCacheKey();
Assert.assertNull(cacheKey);
}
@Test
public void testSegmentQueryCacheKey_useAndPopulateCacheFalse()
{
replayAll();
Assert.assertNull(new CachingClusteredClient.CacheKeyManager<>(
query,
strategy,
false,
false,
dataSourceAnalysis,
joinableFactoryWrapper
).computeSegmentLevelQueryCacheKey());
}
private CachingClusteredClient.CacheKeyManager<Object> makeKeyManager()
{
return new CachingClusteredClient.CacheKeyManager<>(
query,
strategy,
true,
true,
dataSourceAnalysis,
joinableFactoryWrapper
);
}
private SegmentServerSelector makeHistoricalServerSelector(int partitionNumber)
{
return makeServerSelector(true, partitionNumber);
}
private SegmentServerSelector makeRealtimeServerSelector(int partitionNumber)
{
return makeServerSelector(false, partitionNumber);
}
/**
* using partitionNumber, its possible to create segments with different ids
*/
private SegmentServerSelector makeServerSelector(boolean isHistorical, int partitionNumber)
{
ServerSelector serverSelector = mock(ServerSelector.class);
QueryableDruidServer queryableDruidServer = mock(QueryableDruidServer.class);
DruidServer server = mock(DruidServer.class);
SegmentId segmentId = SegmentId.dummy("data-source", partitionNumber);
DataSegment segment = new DataSegment(
segmentId,
null,
null,
null,
new NumberedShardSpec(partitionNumber, 10),
null,
0,
0
);
expect(server.isSegmentReplicationTarget()).andReturn(isHistorical).anyTimes();
expect(serverSelector.pick()).andReturn(queryableDruidServer).anyTimes();
expect(queryableDruidServer.getServer()).andReturn(server).anyTimes();
expect(serverSelector.getSegment()).andReturn(segment).anyTimes();
replay(serverSelector, queryableDruidServer, server);
return new SegmentServerSelector(serverSelector, segmentId.toDescriptor());
}
}

View File

@ -21,6 +21,7 @@ package org.apache.druid.client;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import it.unimi.dsi.fastutil.ints.Int2ObjectRBTreeMap;
@ -47,6 +48,7 @@ import org.apache.druid.query.QueryToolChestWarehouse;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.context.ResponseContext;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.segment.join.MapJoinableFactory;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.server.coordination.ServerType;
import org.apache.druid.timeline.DataSegment;
@ -332,7 +334,8 @@ public class CachingClusteredClientFunctionalityTest
}
},
ForkJoinPool.commonPool(),
QueryStackTests.DEFAULT_NOOP_SCHEDULER
QueryStackTests.DEFAULT_NOOP_SCHEDULER,
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of())
);
}

View File

@ -26,6 +26,7 @@ import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
@ -119,6 +120,7 @@ import org.apache.druid.query.topn.TopNQueryConfig;
import org.apache.druid.query.topn.TopNQueryQueryToolChest;
import org.apache.druid.query.topn.TopNResultValue;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.join.MapJoinableFactory;
import org.apache.druid.server.QueryScheduler;
import org.apache.druid.server.ServerTestHelper;
import org.apache.druid.server.coordination.ServerType;
@ -2847,7 +2849,8 @@ public class CachingClusteredClientTest
ManualQueryPrioritizationStrategy.INSTANCE,
NoQueryLaningStrategy.INSTANCE,
new ServerConfig()
)
),
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of())
);
}

View File

@ -26,6 +26,8 @@ import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.primitives.Bytes;
import org.apache.commons.lang3.RandomUtils;
import org.apache.druid.client.cache.BackgroundCachePopulator;
import org.apache.druid.client.cache.Cache;
import org.apache.druid.client.cache.CacheConfig;
@ -64,6 +66,7 @@ import org.apache.druid.query.topn.TopNQueryBuilder;
import org.apache.druid.query.topn.TopNQueryConfig;
import org.apache.druid.query.topn.TopNQueryQueryToolChest;
import org.apache.druid.query.topn.TopNResultValue;
import org.easymock.EasyMock;
import org.joda.time.DateTime;
import org.junit.Assert;
import org.junit.Test;
@ -77,6 +80,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CountDownLatch;
@ -105,6 +109,12 @@ public class CachingQueryRunnerTest
DateTimes.of("2011-01-08"), "a", 50, 4988, "b", 50, 4987, "c", 50, 4986,
DateTimes.of("2011-01-09"), "a", 50, 4985, "b", 50, 4984, "c", 50, 4983
};
private static final SegmentDescriptor SEGMENT_DESCRIPTOR = new SegmentDescriptor(
Intervals.of("2011/2012"),
"version",
0
);
private static final String CACHE_ID = "segment";
private ObjectMapper objectMapper;
private CachePopulator cachePopulator;
@ -193,6 +203,53 @@ public class CachingQueryRunnerTest
}
}
@Test
public void testNullCacheKeyPrefix()
{
Query query = new TopNQueryBuilder()
.dataSource("ds")
.dimension("top_dim")
.metric("imps")
.threshold(3)
.intervals("2011-01-05/2011-01-10")
.aggregators(AGGS)
.granularity(Granularities.ALL)
.build();
QueryToolChest toolchest = new TopNQueryQueryToolChest(new TopNQueryConfig());
Cache cache = EasyMock.mock(Cache.class);
EasyMock.replay(cache);
CachingQueryRunner queryRunner = makeCachingQueryRunner(null, cache, toolchest, Sequences.empty());
Assert.assertFalse(queryRunner.canPopulateCache(query, toolchest.getCacheStrategy(query)));
Assert.assertFalse(queryRunner.canUseCache(query, toolchest.getCacheStrategy(query)));
queryRunner.run(QueryPlus.wrap(query));
EasyMock.verifyUnexpectedCalls(cache);
}
@Test
public void testNullStrategy()
{
Query query = new TopNQueryBuilder()
.dataSource("ds")
.dimension("top_dim")
.metric("imps")
.threshold(3)
.intervals("2011-01-05/2011-01-10")
.aggregators(AGGS)
.granularity(Granularities.ALL)
.build();
QueryToolChest toolchest = EasyMock.mock(QueryToolChest.class);
Cache cache = EasyMock.mock(Cache.class);
EasyMock.expect(toolchest.getCacheStrategy(query)).andReturn(null);
EasyMock.replay(cache, toolchest);
CachingQueryRunner queryRunner = makeCachingQueryRunner(new byte[0], cache, toolchest, Sequences.empty());
Assert.assertFalse(queryRunner.canPopulateCache(query, null));
Assert.assertFalse(queryRunner.canUseCache(query, null));
queryRunner.run(QueryPlus.wrap(query));
EasyMock.verifyUnexpectedCalls(cache);
}
private void testCloseAndPopulate(
List<Result> expectedRes,
List<Result> expectedCacheRes,
@ -272,48 +329,21 @@ public class CachingQueryRunnerTest
}
};
String cacheId = "segment";
SegmentDescriptor segmentDescriptor = new SegmentDescriptor(Intervals.of("2011/2012"), "version", 0);
DefaultObjectMapper objectMapper = new DefaultObjectMapper();
CachingQueryRunner runner = new CachingQueryRunner(
cacheId,
segmentDescriptor,
objectMapper,
byte[] keyPrefix = RandomUtils.nextBytes(10);
CachingQueryRunner runner = makeCachingQueryRunner(
keyPrefix,
cache,
toolchest,
new QueryRunner()
{
@Override
public Sequence run(QueryPlus queryPlus, ResponseContext responseContext)
{
return resultSeq;
}
},
cachePopulator,
new CacheConfig()
{
@Override
public boolean isPopulateCache()
{
return true;
}
@Override
public boolean isUseCache()
{
return true;
}
}
resultSeq
);
CacheStrategy cacheStrategy = toolchest.getCacheStrategy(query);
Cache.NamedKey cacheKey = CacheUtil.computeSegmentCacheKey(
cacheId,
segmentDescriptor,
cacheStrategy.computeCacheKey(query)
CACHE_ID,
SEGMENT_DESCRIPTOR,
Bytes.concat(keyPrefix, cacheStrategy.computeCacheKey(query))
);
Assert.assertTrue(runner.canPopulateCache(query, cacheStrategy));
Sequence res = runner.run(QueryPlus.wrap(query));
// base sequence is not closed yet
Assert.assertFalse("sequence must not be closed", closable.isClosed());
@ -348,23 +378,41 @@ public class CachingQueryRunnerTest
QueryToolChest toolchest
) throws IOException
{
DefaultObjectMapper objectMapper = new DefaultObjectMapper();
String cacheId = "segment";
SegmentDescriptor segmentDescriptor = new SegmentDescriptor(Intervals.of("2011/2012"), "version", 0);
byte[] cacheKeyPrefix = RandomUtils.nextBytes(10);
CacheStrategy cacheStrategy = toolchest.getCacheStrategy(query);
Cache.NamedKey cacheKey = CacheUtil.computeSegmentCacheKey(
cacheId,
segmentDescriptor,
cacheStrategy.computeCacheKey(query)
CACHE_ID,
SEGMENT_DESCRIPTOR,
Bytes.concat(cacheKeyPrefix, cacheStrategy.computeCacheKey(query))
);
Cache cache = MapCache.create(1024 * 1024);
cache.put(cacheKey, toByteArray(Iterables.transform(expectedResults, cacheStrategy.prepareForSegmentLevelCache())));
CachingQueryRunner runner = new CachingQueryRunner(
cacheId,
segmentDescriptor,
CachingQueryRunner runner = makeCachingQueryRunner(
cacheKeyPrefix,
cache,
toolchest,
Sequences.empty()
);
Assert.assertTrue(runner.canUseCache(query, toolchest.getCacheStrategy(query)));
List<Result> results = runner.run(QueryPlus.wrap(query)).toList();
Assert.assertEquals(expectedResults.toString(), results.toString());
}
private CachingQueryRunner makeCachingQueryRunner(
byte[] cacheKeyPrefix,
Cache cache,
QueryToolChest toolchest,
Sequence<Object> results
)
{
return new CachingQueryRunner(
CACHE_ID,
Optional.ofNullable(cacheKeyPrefix),
SEGMENT_DESCRIPTOR,
objectMapper,
cache,
toolchest,
@ -374,7 +422,7 @@ public class CachingQueryRunnerTest
@Override
public Sequence run(QueryPlus queryPlus, ResponseContext responseContext)
{
return Sequences.empty();
return results;
}
},
cachePopulator,
@ -394,8 +442,6 @@ public class CachingQueryRunnerTest
}
);
List<Result> results = runner.run(QueryPlus.wrap(query)).toList();
Assert.assertEquals(expectedResults.toString(), results.toString());
}
private List<Result> makeTopNResults(boolean cachedResults, Object... objects)

View File

@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.client.CachingClusteredClient;
import org.apache.druid.client.DirectDruidClient;
import org.apache.druid.client.DruidServer;
@ -49,6 +50,7 @@ import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.generator.GeneratorBasicSchemas;
import org.apache.druid.segment.generator.GeneratorSchemaInfo;
import org.apache.druid.segment.generator.SegmentGenerator;
import org.apache.druid.segment.join.MapJoinableFactory;
import org.apache.druid.server.QueryStackTests;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.partition.NumberedShardSpec;
@ -139,7 +141,8 @@ public abstract class QueryRunnerBasedOnClusteredClientTestBase
new DruidHttpClientConfig(),
QueryStackTests.getProcessingConfig(USE_PARALLEL_MERGE_POOL_CONFIGURED),
ForkJoinPool.commonPool(),
QueryStackTests.DEFAULT_NOOP_SCHEDULER
QueryStackTests.DEFAULT_NOOP_SCHEDULER,
new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of())
);
servers = new ArrayList<>();
}

View File

@ -59,7 +59,9 @@ import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFacto
import org.apache.druid.server.metrics.NoopServiceEmitter;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.apache.druid.timeline.DataSegment;
import org.apache.druid.timeline.SegmentId;
import org.apache.druid.timeline.partition.NumberedShardSpec;
import org.easymock.EasyMock;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@ -70,6 +72,7 @@ import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
@ -79,6 +82,11 @@ public class SegmentManagerBroadcastJoinIndexedTableTest extends InitializedNull
{
private static final String TABLE_NAME = "test";
private static final String PREFIX = "j0";
private static final JoinConditionAnalysis JOIN_CONDITION_ANALYSIS = JoinConditionAnalysis.forExpression(
StringUtils.format("market == \"%s.market\"", PREFIX),
PREFIX,
ExprMacroTable.nil()
);
private static final Set<String> KEY_COLUMNS =
ImmutableSet.of("market", "longNumericNull", "doubleNumericNull", "floatNumericNull", "partial_null_column");
@ -170,6 +178,9 @@ public class SegmentManagerBroadcastJoinIndexedTableTest extends InitializedNull
false
)
);
Optional<byte[]> bytes = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertTrue(bytes.isPresent());
assertSegmentIdEquals(segment.getId(), bytes.get());
// dropping the segment should make the table no longer available
segmentManager.dropSegment(segment);
@ -177,6 +188,9 @@ public class SegmentManagerBroadcastJoinIndexedTableTest extends InitializedNull
maybeJoinable = makeJoinable(dataSource);
Assert.assertFalse(maybeJoinable.isPresent());
bytes = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertFalse(bytes.isPresent());
}
@Test
@ -215,6 +229,9 @@ public class SegmentManagerBroadcastJoinIndexedTableTest extends InitializedNull
false
)
);
Optional<byte[]> cacheKey = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertTrue(cacheKey.isPresent());
assertSegmentIdEquals(segment2.getId(), cacheKey.get());
segmentManager.dropSegment(segment2);
@ -236,6 +253,9 @@ public class SegmentManagerBroadcastJoinIndexedTableTest extends InitializedNull
false
)
);
cacheKey = joinableFactory.computeJoinCacheKey(dataSource, JOIN_CONDITION_ANALYSIS);
Assert.assertTrue(cacheKey.isPresent());
assertSegmentIdEquals(segment1.getId(), cacheKey.get());
}
@ -287,16 +307,19 @@ public class SegmentManagerBroadcastJoinIndexedTableTest extends InitializedNull
makeJoinable(dataSource);
}
@Test
public void emptyCacheKeyForUnsupportedCondition()
{
final DataSource dataSource = new GlobalTableDataSource(TABLE_NAME);
JoinConditionAnalysis condition = EasyMock.mock(JoinConditionAnalysis.class);
EasyMock.expect(condition.canHashJoin()).andReturn(false);
EasyMock.replay(condition);
Assert.assertNull(joinableFactory.build(dataSource, condition).orElse(null));
}
private Optional<Joinable> makeJoinable(DataSource dataSource)
{
return joinableFactory.build(
dataSource,
JoinConditionAnalysis.forExpression(
StringUtils.format("market == \"%s.market\"", PREFIX),
PREFIX,
ExprMacroTable.nil()
)
);
return joinableFactory.build(dataSource, JOIN_CONDITION_ANALYSIS);
}
private DataSegment createSegment(IncrementalIndex data, String interval, String version) throws IOException
@ -340,4 +363,22 @@ public class SegmentManagerBroadcastJoinIndexedTableTest extends InitializedNull
ImmutableMap.of("type", "local", "path", segmentDir.getAbsolutePath())
);
}
private void assertSegmentIdEquals(SegmentId id, byte[] bytes)
{
// Call byteBuffer.get to skip the type keys
ByteBuffer byteBuffer = ByteBuffer.wrap(bytes);
byteBuffer.get(); // skip the cache key prefix
byteBuffer.get();
long start = byteBuffer.getLong();
byteBuffer.get();
long end = byteBuffer.getLong();
byteBuffer.get();
String version = StringUtils.fromUtf8(byteBuffer, StringUtils.estimatedBinaryLengthAsUTF8(id.getVersion()));
byteBuffer.get();
String dataSource = StringUtils.fromUtf8(byteBuffer, StringUtils.estimatedBinaryLengthAsUTF8(id.getDataSource()));
byteBuffer.get();
int partition = byteBuffer.getInt();
Assert.assertEquals(id, SegmentId.of(dataSource, Intervals.utc(start, end), version, partition));
}
}

View File

@ -48,7 +48,7 @@ import org.apache.druid.segment.ReferenceCountingSegment;
import org.apache.druid.segment.Segment;
import org.apache.druid.segment.SegmentReference;
import org.apache.druid.segment.join.JoinableFactory;
import org.apache.druid.segment.join.Joinables;
import org.apache.druid.segment.join.JoinableFactoryWrapper;
import org.apache.druid.timeline.TimelineObjectHolder;
import org.apache.druid.timeline.VersionedIntervalTimeline;
import org.apache.druid.timeline.partition.PartitionChunk;
@ -76,7 +76,7 @@ import java.util.function.Function;
public class TestClusterQuerySegmentWalker implements QuerySegmentWalker
{
private final Map<String, VersionedIntervalTimeline<String, ReferenceCountingSegment>> timelines;
private final JoinableFactory joinableFactory;
private final JoinableFactoryWrapper joinableFactoryWrapper;
private final QueryRunnerFactoryConglomerate conglomerate;
@Nullable
private final QueryScheduler scheduler;
@ -89,7 +89,7 @@ public class TestClusterQuerySegmentWalker implements QuerySegmentWalker
)
{
this.timelines = timelines;
this.joinableFactory = joinableFactory;
this.joinableFactoryWrapper = new JoinableFactoryWrapper(joinableFactory);
this.conglomerate = conglomerate;
this.scheduler = scheduler;
}
@ -142,9 +142,8 @@ public class TestClusterQuerySegmentWalker implements QuerySegmentWalker
throw new ISE("Cannot handle subquery: %s", analysis.getDataSource());
}
final Function<SegmentReference, SegmentReference> segmentMapFn = Joinables.createSegmentMapFn(
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(
analysis.getPreJoinableClauses(),
joinableFactory,
new AtomicLong(),
analysis.getBaseQuery().orElse(query)
);