mirror of https://github.com/apache/druid.git
Add logging for downsampling sketches in MSQ (#14580)
* Add more logs for downsampling sketches * Fix builds * Lower log level * Add new log message
This commit is contained in:
parent
955734ba8d
commit
6837a7be19
|
@ -30,6 +30,8 @@ import org.apache.druid.frame.key.RowKey;
|
||||||
import org.apache.druid.frame.key.RowKeyReader;
|
import org.apache.druid.frame.key.RowKeyReader;
|
||||||
import org.apache.druid.java.util.common.IAE;
|
import org.apache.druid.java.util.common.IAE;
|
||||||
import org.apache.druid.java.util.common.ISE;
|
import org.apache.druid.java.util.common.ISE;
|
||||||
|
import org.apache.druid.java.util.common.Pair;
|
||||||
|
import org.apache.druid.java.util.common.logger.Logger;
|
||||||
import org.apache.druid.segment.column.RowSignature;
|
import org.apache.druid.segment.column.RowSignature;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -48,6 +50,7 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
|
||||||
// for an objectively faster and more accurate solution instead of finding the best match with the following parameters
|
// for an objectively faster and more accurate solution instead of finding the best match with the following parameters
|
||||||
private static final int MAX_COUNT_MAX_ITERATIONS = 500;
|
private static final int MAX_COUNT_MAX_ITERATIONS = 500;
|
||||||
private static final double MAX_COUNT_ITERATION_GROWTH_FACTOR = 1.05;
|
private static final double MAX_COUNT_ITERATION_GROWTH_FACTOR = 1.05;
|
||||||
|
private final Logger log = new Logger(ClusterByStatisticsCollectorImpl.class);
|
||||||
|
|
||||||
private final ClusterBy clusterBy;
|
private final ClusterBy clusterBy;
|
||||||
private final RowKeyReader keyReader;
|
private final RowKeyReader keyReader;
|
||||||
|
@ -127,6 +130,7 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
|
||||||
|
|
||||||
totalRetainedBytes += bucketHolder.updateRetainedBytes();
|
totalRetainedBytes += bucketHolder.updateRetainedBytes();
|
||||||
if (totalRetainedBytes > maxRetainedBytes) {
|
if (totalRetainedBytes > maxRetainedBytes) {
|
||||||
|
log.debug("Downsampling ClusterByStatisticsCollector as totalRetainedBytes[%s] is greater than maxRetainedBytes[%s]", totalRetainedBytes, maxRetainedBytes);
|
||||||
downSample();
|
downSample();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,6 +152,7 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
|
||||||
|
|
||||||
totalRetainedBytes += bucketHolder.updateRetainedBytes();
|
totalRetainedBytes += bucketHolder.updateRetainedBytes();
|
||||||
if (totalRetainedBytes > maxRetainedBytes) {
|
if (totalRetainedBytes > maxRetainedBytes) {
|
||||||
|
log.debug("Downsampling ClusterByStatisticsCollector as totalRetainedBytes[%s] is greater than maxRetainedBytes[%s]", totalRetainedBytes, maxRetainedBytes);
|
||||||
downSample();
|
downSample();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -179,6 +184,7 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
|
||||||
|
|
||||||
totalRetainedBytes += bucketHolder.updateRetainedBytes();
|
totalRetainedBytes += bucketHolder.updateRetainedBytes();
|
||||||
if (totalRetainedBytes > maxRetainedBytes) {
|
if (totalRetainedBytes > maxRetainedBytes) {
|
||||||
|
log.debug("Downsampling ClusterByStatisticsCollector as totalRetainedBytes[%s] is greater than maxRetainedBytes[%s]", totalRetainedBytes, maxRetainedBytes);
|
||||||
downSample();
|
downSample();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -227,6 +233,8 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
|
||||||
@Override
|
@Override
|
||||||
public ClusterByPartitions generatePartitionsWithTargetWeight(final long targetWeight)
|
public ClusterByPartitions generatePartitionsWithTargetWeight(final long targetWeight)
|
||||||
{
|
{
|
||||||
|
logSketches();
|
||||||
|
|
||||||
if (targetWeight < 1) {
|
if (targetWeight < 1) {
|
||||||
throw new IAE("Target weight must be positive");
|
throw new IAE("Target weight must be positive");
|
||||||
}
|
}
|
||||||
|
@ -270,6 +278,8 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
|
||||||
@Override
|
@Override
|
||||||
public ClusterByPartitions generatePartitionsWithMaxCount(final int maxNumPartitions)
|
public ClusterByPartitions generatePartitionsWithMaxCount(final int maxNumPartitions)
|
||||||
{
|
{
|
||||||
|
logSketches();
|
||||||
|
|
||||||
if (maxNumPartitions < 1) {
|
if (maxNumPartitions < 1) {
|
||||||
throw new IAE("Must have at least one partition");
|
throw new IAE("Must have at least one partition");
|
||||||
} else if (buckets.isEmpty()) {
|
} else if (buckets.isEmpty()) {
|
||||||
|
@ -311,6 +321,28 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
|
||||||
return ranges;
|
return ranges;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void logSketches()
|
||||||
|
{
|
||||||
|
if (log.isDebugEnabled()) {
|
||||||
|
// Log all sketches
|
||||||
|
List<KeyCollector<?>> keyCollectors = buckets.values()
|
||||||
|
.stream()
|
||||||
|
.map(bucketHolder -> bucketHolder.keyCollector)
|
||||||
|
.sorted(Comparator.comparingInt(KeyCollector::sketchAccuracyFactor))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
log.debug("KeyCollectors at partition generation: [%s]", keyCollectors);
|
||||||
|
} else {
|
||||||
|
// Log the 5 least accurate sketches
|
||||||
|
List<KeyCollector<?>> limitedKeyCollectors = buckets.values()
|
||||||
|
.stream()
|
||||||
|
.map(bucketHolder -> bucketHolder.keyCollector)
|
||||||
|
.sorted(Comparator.comparingInt(KeyCollector::sketchAccuracyFactor))
|
||||||
|
.limit(5)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
log.info("Most downsampled keyCollectors: [%s]", limitedKeyCollectors);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ClusterByStatisticsSnapshot snapshot()
|
public ClusterByStatisticsSnapshot snapshot()
|
||||||
{
|
{
|
||||||
|
@ -380,32 +412,37 @@ public class ClusterByStatisticsCollectorImpl implements ClusterByStatisticsColl
|
||||||
long newTotalRetainedBytes = totalRetainedBytes;
|
long newTotalRetainedBytes = totalRetainedBytes;
|
||||||
final long targetTotalRetainedBytes = totalRetainedBytes / 2;
|
final long targetTotalRetainedBytes = totalRetainedBytes / 2;
|
||||||
|
|
||||||
final List<BucketHolder> sortedHolders = new ArrayList<>(buckets.size());
|
final List<Pair<Long, BucketHolder>> sortedHolders = new ArrayList<>(buckets.size());
|
||||||
|
final RowKeyReader trimmedRowReader = keyReader.trimmedKeyReader(clusterBy.getBucketByCount());
|
||||||
|
|
||||||
// Only consider holders with more than one retained key. Holders with a single retained key cannot be downsampled.
|
// Only consider holders with more than one retained key. Holders with a single retained key cannot be downsampled.
|
||||||
for (final BucketHolder holder : buckets.values()) {
|
for (final Map.Entry<RowKey, BucketHolder> entry : buckets.entrySet()) {
|
||||||
if (holder.keyCollector.estimatedRetainedKeys() > 1) {
|
BucketHolder bucketHolder = entry.getValue();
|
||||||
sortedHolders.add(holder);
|
if (bucketHolder != null && bucketHolder.keyCollector.estimatedRetainedKeys() > 1) {
|
||||||
|
Long timeChunk = clusterBy.getBucketByCount() == 0 ? null : (Long) trimmedRowReader.read(entry.getKey(), 0);
|
||||||
|
sortedHolders.add(Pair.of(timeChunk, bucketHolder));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Downsample least-dense buckets first. (They're less likely to need high resolution.)
|
// Downsample least-dense buckets first. (They're less likely to need high resolution.)
|
||||||
sortedHolders.sort(
|
sortedHolders.sort(
|
||||||
Comparator.comparing((BucketHolder holder) ->
|
Comparator.comparing((Pair<Long, BucketHolder> pair) ->
|
||||||
(double) holder.keyCollector.estimatedTotalWeight()
|
(double) pair.rhs.keyCollector.estimatedTotalWeight()
|
||||||
/ holder.keyCollector.estimatedRetainedKeys())
|
/ pair.rhs.keyCollector.estimatedRetainedKeys())
|
||||||
);
|
);
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while (i < sortedHolders.size() && newTotalRetainedBytes > targetTotalRetainedBytes) {
|
while (i < sortedHolders.size() && newTotalRetainedBytes > targetTotalRetainedBytes) {
|
||||||
final BucketHolder bucketHolder = sortedHolders.get(i);
|
final Long timeChunk = sortedHolders.get(i).lhs;
|
||||||
|
final BucketHolder bucketHolder = sortedHolders.get(i).rhs;
|
||||||
|
|
||||||
// Ignore false return, because we wrap all collectors in DelegateOrMinKeyCollector and can be assured that
|
// Ignore false return, because we wrap all collectors in DelegateOrMinKeyCollector and can be assured that
|
||||||
// it will downsample all the way to one if needed. Can't do better than that.
|
// it will downsample all the way to one if needed. Can't do better than that.
|
||||||
|
log.debug("Downsampling sketch for timeChunk [%s]: [%s]", timeChunk, bucketHolder.keyCollector);
|
||||||
bucketHolder.keyCollector.downSample();
|
bucketHolder.keyCollector.downSample();
|
||||||
newTotalRetainedBytes += bucketHolder.updateRetainedBytes();
|
newTotalRetainedBytes += bucketHolder.updateRetainedBytes();
|
||||||
|
|
||||||
if (i == sortedHolders.size() - 1 || sortedHolders.get(i + 1).retainedBytes > bucketHolder.retainedBytes || bucketHolder.keyCollector.estimatedRetainedKeys() <= 1) {
|
if (i == sortedHolders.size() - 1 || sortedHolders.get(i + 1).rhs.retainedBytes > bucketHolder.retainedBytes || bucketHolder.keyCollector.estimatedRetainedKeys() <= 1) {
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,11 +34,11 @@ import java.util.Optional;
|
||||||
/**
|
/**
|
||||||
* Delegates to some other kind of {@link KeyCollector} at first, until its {@link #downSample()} fails to downsample.
|
* Delegates to some other kind of {@link KeyCollector} at first, until its {@link #downSample()} fails to downsample.
|
||||||
* At that point, the delegate collector is nulled out and this collector starts tracking the min key instead.
|
* At that point, the delegate collector is nulled out and this collector starts tracking the min key instead.
|
||||||
*
|
* <br>
|
||||||
* This is useful because it allows us to wrap any {@link KeyCollector} and enable downsampling to a single key, even
|
* This is useful because it allows us to wrap any {@link KeyCollector} and enable downsampling to a single key, even
|
||||||
* if the original collector does not support that. For example, {@link QuantilesSketchKeyCollector} cannot downsample
|
* if the original collector does not support that. For example, {@link QuantilesSketchKeyCollector} cannot downsample
|
||||||
* below K = 2, which retains more than one key.
|
* below K = 2, which retains more than one key.
|
||||||
*
|
* <br>
|
||||||
* Created by {@link DelegateOrMinKeyCollectorFactory}.
|
* Created by {@link DelegateOrMinKeyCollectorFactory}.
|
||||||
*/
|
*/
|
||||||
public class DelegateOrMinKeyCollector<TDelegate extends KeyCollector<TDelegate>>
|
public class DelegateOrMinKeyCollector<TDelegate extends KeyCollector<TDelegate>>
|
||||||
|
@ -177,4 +177,19 @@ public class DelegateOrMinKeyCollector<TDelegate extends KeyCollector<TDelegate>
|
||||||
return ClusterByPartitions.oneUniversalPartition();
|
return ClusterByPartitions.oneUniversalPartition();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int sketchAccuracyFactor()
|
||||||
|
{
|
||||||
|
return delegate == null ? Integer.MIN_VALUE : delegate.sketchAccuracyFactor();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return "DelegateOrMinKeyCollector{" +
|
||||||
|
"delegate=" + delegate +
|
||||||
|
", minKey=" + minKey +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -245,6 +245,12 @@ public class DistinctKeyCollector implements KeyCollector<DistinctKeyCollector>
|
||||||
return new ClusterByPartitions(partitions);
|
return new ClusterByPartitions(partitions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int sketchAccuracyFactor()
|
||||||
|
{
|
||||||
|
return -spaceReductionFactor;
|
||||||
|
}
|
||||||
|
|
||||||
@JsonProperty("keys")
|
@JsonProperty("keys")
|
||||||
Map<RowKey, Long> getRetainedKeys()
|
Map<RowKey, Long> getRetainedKeys()
|
||||||
{
|
{
|
||||||
|
@ -312,4 +318,15 @@ public class DistinctKeyCollector implements KeyCollector<DistinctKeyCollector>
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return "DistinctKeyCollector{" +
|
||||||
|
"maxBytes=" + maxBytes +
|
||||||
|
", retainedBytes=" + retainedBytes +
|
||||||
|
", spaceReductionFactor=" + spaceReductionFactor +
|
||||||
|
", totalWeightUnadjusted=" + totalWeightUnadjusted +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,7 +26,7 @@ public interface KeyCollector<CollectorType extends KeyCollector<CollectorType>>
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* Add a key with a certain weight to this collector.
|
* Add a key with a certain weight to this collector.
|
||||||
*
|
* <br>
|
||||||
* See {@link ClusterByStatisticsCollector#add} for the meaning of "weight".
|
* See {@link ClusterByStatisticsCollector#add} for the meaning of "weight".
|
||||||
*/
|
*/
|
||||||
void add(RowKey key, long weight);
|
void add(RowKey key, long weight);
|
||||||
|
@ -80,4 +80,12 @@ public interface KeyCollector<CollectorType extends KeyCollector<CollectorType>>
|
||||||
* or lower than the provided target.
|
* or lower than the provided target.
|
||||||
*/
|
*/
|
||||||
ClusterByPartitions generatePartitionsWithTargetWeight(long targetWeight);
|
ClusterByPartitions generatePartitionsWithTargetWeight(long targetWeight);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an integer which indicates the accuracy of the sketch used. The higher the value, the more accurate it is.
|
||||||
|
* This can be compared to check which sketches have been downsampled the most and are thus the least accurate. The
|
||||||
|
* exact value returned is decided by the implementation, and it is only meaningful to compare sketches of the same
|
||||||
|
* implementation in this way.
|
||||||
|
*/
|
||||||
|
int sketchAccuracyFactor();
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ import java.util.NoSuchElementException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A key collector that is used when not aggregating. It uses a quantiles sketch to track keys.
|
* A key collector that is used when not aggregating. It uses a quantiles sketch to track keys.
|
||||||
*
|
* <br>
|
||||||
* The collector maintains the averageKeyLength for all keys added through {@link #add(RowKey, long)} or
|
* The collector maintains the averageKeyLength for all keys added through {@link #add(RowKey, long)} or
|
||||||
* {@link #addAll(QuantilesSketchKeyCollector)}. The average is calculated as a running average and accounts for
|
* {@link #addAll(QuantilesSketchKeyCollector)}. The average is calculated as a running average and accounts for
|
||||||
* weight of the key added. The averageKeyLength is assumed to be unaffected by {@link #downSample()}.
|
* weight of the key added. The averageKeyLength is assumed to be unaffected by {@link #downSample()}.
|
||||||
|
@ -171,6 +171,12 @@ public class QuantilesSketchKeyCollector implements KeyCollector<QuantilesSketch
|
||||||
return new ClusterByPartitions(partitions);
|
return new ClusterByPartitions(partitions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int sketchAccuracyFactor()
|
||||||
|
{
|
||||||
|
return sketch.getK();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the backing sketch. Exists for usage by {@link QuantilesSketchKeyCollectorFactory}.
|
* Retrieves the backing sketch. Exists for usage by {@link QuantilesSketchKeyCollectorFactory}.
|
||||||
*/
|
*/
|
||||||
|
@ -186,4 +192,15 @@ public class QuantilesSketchKeyCollector implements KeyCollector<QuantilesSketch
|
||||||
{
|
{
|
||||||
return averageKeyLength;
|
return averageKeyLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return "QuantilesSketchKeyCollector{" +
|
||||||
|
"sketch=ItemsSketch{N=" + sketch.getN() +
|
||||||
|
", K=" + sketch.getK() +
|
||||||
|
", retainedKeys=" + sketch.getNumRetained() +
|
||||||
|
"}, averageKeyLength=" + averageKeyLength +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,7 +81,7 @@ export const RuleEditor = React.memo(function RuleEditor(props: RuleEditorProps)
|
||||||
if (!tieredReplicantsList.length) {
|
if (!tieredReplicantsList.length) {
|
||||||
return (
|
return (
|
||||||
<FormGroup>
|
<FormGroup>
|
||||||
There is no historical replication configured, data will not be loaded on hisotricals.
|
There is no historical replication configured, data will not be loaded on historicals.
|
||||||
</FormGroup>
|
</FormGroup>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue