mirror of https://github.com/apache/druid.git
Improve rowkey object size estimate (#13319)
* Improve rowkey object size estimate * Address review comments * Update comment * Fix test
This commit is contained in:
parent
48528a0c98
commit
a28b8c2674
|
@ -133,7 +133,7 @@ public class DelegateOrMinKeyCollector<TDelegate extends KeyCollector<TDelegate>
|
|||
if (delegate != null) {
|
||||
return delegate.estimatedRetainedBytes();
|
||||
} else {
|
||||
return minKey != null ? minKey.getNumberOfBytes() : 0;
|
||||
return minKey != null ? minKey.estimatedObjectSizeBytes() : 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -121,13 +121,13 @@ public class DistinctKeyCollector implements KeyCollector<DistinctKeyCollector>
|
|||
if (isNewMin && !retainedKeys.isEmpty() && !isKeySelected(retainedKeys.firstKey())) {
|
||||
// Old min should be kicked out.
|
||||
totalWeightUnadjusted -= retainedKeys.removeLong(retainedKeys.firstKey());
|
||||
retainedBytes -= retainedKeys.firstKey().getNumberOfBytes();
|
||||
retainedBytes -= retainedKeys.firstKey().estimatedObjectSizeBytes();
|
||||
}
|
||||
|
||||
if (retainedKeys.putIfAbsent(key, weight) == MISSING_KEY_WEIGHT) {
|
||||
// We did add this key. (Previous value was zero, meaning absent.)
|
||||
totalWeightUnadjusted += weight;
|
||||
retainedBytes += key.getNumberOfBytes();
|
||||
retainedBytes += key.estimatedObjectSizeBytes();
|
||||
}
|
||||
|
||||
while (retainedBytes >= maxBytes) {
|
||||
|
@ -305,7 +305,7 @@ public class DistinctKeyCollector implements KeyCollector<DistinctKeyCollector>
|
|||
|
||||
if (!isKeySelected(key)) {
|
||||
totalWeightUnadjusted -= entry.getLongValue();
|
||||
retainedBytes -= entry.getKey().getNumberOfBytes();
|
||||
retainedBytes -= entry.getKey().estimatedObjectSizeBytes();
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,7 +64,7 @@ public class QuantilesSketchKeyCollector implements KeyCollector<QuantilesSketch
|
|||
{
|
||||
double estimatedTotalSketchSizeInBytes = averageKeyLength * sketch.getN();
|
||||
// The key is added "weight" times to the sketch, we can update the total weight directly.
|
||||
estimatedTotalSketchSizeInBytes += key.getNumberOfBytes() * weight;
|
||||
estimatedTotalSketchSizeInBytes += key.estimatedObjectSizeBytes() * weight;
|
||||
for (int i = 0; i < weight; i++) {
|
||||
// Add the same key multiple times to make it "heavier".
|
||||
sketch.update(key);
|
||||
|
|
|
@ -106,7 +106,7 @@ public class QuantilesSketchKeyCollectorFactory
|
|||
int serializedSize = Integer.BYTES * items.length;
|
||||
|
||||
for (final RowKey key : items) {
|
||||
serializedSize += key.getNumberOfBytes();
|
||||
serializedSize += key.array().length;
|
||||
}
|
||||
|
||||
final byte[] serializedBytes = new byte[serializedSize];
|
||||
|
|
|
@ -89,7 +89,7 @@ public class DelegateOrMinKeyCollectorTest
|
|||
Assert.assertTrue(collector.getDelegate().isPresent());
|
||||
Assert.assertFalse(collector.isEmpty());
|
||||
Assert.assertEquals(key, collector.minKey());
|
||||
Assert.assertEquals(key.getNumberOfBytes(), collector.estimatedRetainedBytes(), 0);
|
||||
Assert.assertEquals(key.estimatedObjectSizeBytes(), collector.estimatedRetainedBytes(), 0);
|
||||
Assert.assertEquals(1, collector.estimatedTotalWeight());
|
||||
}
|
||||
|
||||
|
@ -110,7 +110,7 @@ public class DelegateOrMinKeyCollectorTest
|
|||
Assert.assertTrue(collector.getDelegate().isPresent());
|
||||
Assert.assertFalse(collector.isEmpty());
|
||||
Assert.assertEquals(key, collector.minKey());
|
||||
Assert.assertEquals(key.getNumberOfBytes(), collector.estimatedRetainedBytes(), 0);
|
||||
Assert.assertEquals(key.estimatedObjectSizeBytes(), collector.estimatedRetainedBytes(), 0);
|
||||
Assert.assertEquals(1, collector.estimatedTotalWeight());
|
||||
|
||||
// Should not have actually downsampled, because the quantiles-based collector does nothing when
|
||||
|
@ -133,7 +133,7 @@ public class DelegateOrMinKeyCollectorTest
|
|||
RowKey key = createKey(1L);
|
||||
collector.add(key, 1);
|
||||
collector.add(key, 1);
|
||||
int expectedRetainedBytes = 2 * key.getNumberOfBytes();
|
||||
int expectedRetainedBytes = 2 * key.estimatedObjectSizeBytes();
|
||||
|
||||
Assert.assertTrue(collector.getDelegate().isPresent());
|
||||
Assert.assertFalse(collector.isEmpty());
|
||||
|
@ -144,7 +144,7 @@ public class DelegateOrMinKeyCollectorTest
|
|||
while (collector.getDelegate().isPresent()) {
|
||||
Assert.assertTrue(collector.downSample());
|
||||
}
|
||||
expectedRetainedBytes = key.getNumberOfBytes();
|
||||
expectedRetainedBytes = key.estimatedObjectSizeBytes();
|
||||
|
||||
Assert.assertFalse(collector.getDelegate().isPresent());
|
||||
Assert.assertFalse(collector.isEmpty());
|
||||
|
|
|
@ -195,13 +195,13 @@ public class QuantilesSketchKeyCollectorTest
|
|||
|
||||
|
||||
collector.add(smallKey, 3);
|
||||
Assert.assertEquals(smallKey.getNumberOfBytes(), collector.getAverageKeyLength(), 0);
|
||||
Assert.assertEquals(smallKey.estimatedObjectSizeBytes(), collector.getAverageKeyLength(), 0);
|
||||
|
||||
other.add(largeKey, 5);
|
||||
Assert.assertEquals(largeKey.getNumberOfBytes(), other.getAverageKeyLength(), 0);
|
||||
Assert.assertEquals(largeKey.estimatedObjectSizeBytes(), other.getAverageKeyLength(), 0);
|
||||
|
||||
collector.addAll(other);
|
||||
Assert.assertEquals((smallKey.getNumberOfBytes() * 3 + largeKey.getNumberOfBytes() * 5) / 8.0, collector.getAverageKeyLength(), 0);
|
||||
Assert.assertEquals((smallKey.estimatedObjectSizeBytes() * 3 + largeKey.estimatedObjectSizeBytes() * 5) / 8.0, collector.getAverageKeyLength(), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -32,6 +32,10 @@ public class RowKey
|
|||
{
|
||||
private static final RowKey EMPTY_KEY = new RowKey(new byte[0]);
|
||||
|
||||
// Constant to account for hashcode and object overhead
|
||||
// 24 bytes (header) + 8 bytes (reference) + 8 bytes (hashCode long) + 4 bytes (safe estimate of hashCodeComputed)
|
||||
static final int OBJECT_OVERHEAD_SIZE_BYTES = 44;
|
||||
|
||||
private final byte[] key;
|
||||
|
||||
// Cached hashcode. Computed on demand, not in the constructor, to avoid unnecessary computation.
|
||||
|
@ -109,8 +113,12 @@ public class RowKey
|
|||
return Arrays.toString(key);
|
||||
}
|
||||
|
||||
public int getNumberOfBytes()
|
||||
/**
|
||||
* Estimate number of bytes taken by an object of {@link RowKey}. Only returns an estimate and does not account for
|
||||
* platform or JVM specific implementation.
|
||||
*/
|
||||
public int estimatedObjectSizeBytes()
|
||||
{
|
||||
return array().length;
|
||||
return OBJECT_OVERHEAD_SIZE_BYTES + array().length;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,11 +97,11 @@ public class RowKeyTest extends InitializedNullHandlingTest
|
|||
{
|
||||
final RowSignature signatureLong = RowSignature.builder().add("1", ColumnType.LONG).build();
|
||||
final RowKey longKey = KeyTestUtils.createKey(signatureLong, 1L, "abc");
|
||||
Assert.assertEquals(longKey.array().length, longKey.getNumberOfBytes());
|
||||
Assert.assertEquals(RowKey.OBJECT_OVERHEAD_SIZE_BYTES + longKey.array().length, longKey.estimatedObjectSizeBytes());
|
||||
|
||||
final RowSignature signatureLongString =
|
||||
RowSignature.builder().add("1", ColumnType.LONG).add("2", ColumnType.STRING).build();
|
||||
final RowKey longStringKey = KeyTestUtils.createKey(signatureLongString, 1L, "abc");
|
||||
Assert.assertEquals(longStringKey.array().length, longStringKey.getNumberOfBytes());
|
||||
Assert.assertEquals(RowKey.OBJECT_OVERHEAD_SIZE_BYTES + longStringKey.array().length, longStringKey.estimatedObjectSizeBytes());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue