Improve rowkey object size estimate (#13319)

* Improve rowkey object size estimate

* Address review comments

* Update comment

* Fix test
This commit is contained in:
Adarsh Sanjeev 2022-11-08 10:12:07 +05:30 committed by GitHub
parent 48528a0c98
commit a28b8c2674
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 25 additions and 17 deletions

View File

@ -133,7 +133,7 @@ public class DelegateOrMinKeyCollector<TDelegate extends KeyCollector<TDelegate>
if (delegate != null) {
return delegate.estimatedRetainedBytes();
} else {
return minKey != null ? minKey.getNumberOfBytes() : 0;
return minKey != null ? minKey.estimatedObjectSizeBytes() : 0;
}
}

View File

@ -121,13 +121,13 @@ public class DistinctKeyCollector implements KeyCollector<DistinctKeyCollector>
if (isNewMin && !retainedKeys.isEmpty() && !isKeySelected(retainedKeys.firstKey())) {
// Old min should be kicked out.
totalWeightUnadjusted -= retainedKeys.removeLong(retainedKeys.firstKey());
retainedBytes -= retainedKeys.firstKey().getNumberOfBytes();
retainedBytes -= retainedKeys.firstKey().estimatedObjectSizeBytes();
}
if (retainedKeys.putIfAbsent(key, weight) == MISSING_KEY_WEIGHT) {
// We did add this key. (Previous value was zero, meaning absent.)
totalWeightUnadjusted += weight;
retainedBytes += key.getNumberOfBytes();
retainedBytes += key.estimatedObjectSizeBytes();
}
while (retainedBytes >= maxBytes) {
@ -305,7 +305,7 @@ public class DistinctKeyCollector implements KeyCollector<DistinctKeyCollector>
if (!isKeySelected(key)) {
totalWeightUnadjusted -= entry.getLongValue();
retainedBytes -= entry.getKey().getNumberOfBytes();
retainedBytes -= entry.getKey().estimatedObjectSizeBytes();
iterator.remove();
}
}

View File

@ -64,7 +64,7 @@ public class QuantilesSketchKeyCollector implements KeyCollector<QuantilesSketch
{
double estimatedTotalSketchSizeInBytes = averageKeyLength * sketch.getN();
// The key is added "weight" times to the sketch, we can update the total weight directly.
estimatedTotalSketchSizeInBytes += key.getNumberOfBytes() * weight;
estimatedTotalSketchSizeInBytes += key.estimatedObjectSizeBytes() * weight;
for (int i = 0; i < weight; i++) {
// Add the same key multiple times to make it "heavier".
sketch.update(key);

View File

@ -106,7 +106,7 @@ public class QuantilesSketchKeyCollectorFactory
int serializedSize = Integer.BYTES * items.length;
for (final RowKey key : items) {
serializedSize += key.getNumberOfBytes();
serializedSize += key.array().length;
}
final byte[] serializedBytes = new byte[serializedSize];

View File

@ -89,7 +89,7 @@ public class DelegateOrMinKeyCollectorTest
Assert.assertTrue(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty());
Assert.assertEquals(key, collector.minKey());
Assert.assertEquals(key.getNumberOfBytes(), collector.estimatedRetainedBytes(), 0);
Assert.assertEquals(key.estimatedObjectSizeBytes(), collector.estimatedRetainedBytes(), 0);
Assert.assertEquals(1, collector.estimatedTotalWeight());
}
@ -110,7 +110,7 @@ public class DelegateOrMinKeyCollectorTest
Assert.assertTrue(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty());
Assert.assertEquals(key, collector.minKey());
Assert.assertEquals(key.getNumberOfBytes(), collector.estimatedRetainedBytes(), 0);
Assert.assertEquals(key.estimatedObjectSizeBytes(), collector.estimatedRetainedBytes(), 0);
Assert.assertEquals(1, collector.estimatedTotalWeight());
// Should not have actually downsampled, because the quantiles-based collector does nothing when
@ -133,7 +133,7 @@ public class DelegateOrMinKeyCollectorTest
RowKey key = createKey(1L);
collector.add(key, 1);
collector.add(key, 1);
int expectedRetainedBytes = 2 * key.getNumberOfBytes();
int expectedRetainedBytes = 2 * key.estimatedObjectSizeBytes();
Assert.assertTrue(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty());
@ -144,7 +144,7 @@ public class DelegateOrMinKeyCollectorTest
while (collector.getDelegate().isPresent()) {
Assert.assertTrue(collector.downSample());
}
expectedRetainedBytes = key.getNumberOfBytes();
expectedRetainedBytes = key.estimatedObjectSizeBytes();
Assert.assertFalse(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty());

View File

@ -195,13 +195,13 @@ public class QuantilesSketchKeyCollectorTest
collector.add(smallKey, 3);
Assert.assertEquals(smallKey.getNumberOfBytes(), collector.getAverageKeyLength(), 0);
Assert.assertEquals(smallKey.estimatedObjectSizeBytes(), collector.getAverageKeyLength(), 0);
other.add(largeKey, 5);
Assert.assertEquals(largeKey.getNumberOfBytes(), other.getAverageKeyLength(), 0);
Assert.assertEquals(largeKey.estimatedObjectSizeBytes(), other.getAverageKeyLength(), 0);
collector.addAll(other);
Assert.assertEquals((smallKey.getNumberOfBytes() * 3 + largeKey.getNumberOfBytes() * 5) / 8.0, collector.getAverageKeyLength(), 0);
Assert.assertEquals((smallKey.estimatedObjectSizeBytes() * 3 + largeKey.estimatedObjectSizeBytes() * 5) / 8.0, collector.getAverageKeyLength(), 0);
}
@Test

View File

@ -32,6 +32,10 @@ public class RowKey
{
private static final RowKey EMPTY_KEY = new RowKey(new byte[0]);
// Constant to account for hashcode and object overhead
// 24 bytes (header) + 8 bytes (reference) + 8 bytes (hashCode long) + 4 bytes (safe estimate of hashCodeComputed)
static final int OBJECT_OVERHEAD_SIZE_BYTES = 44;
private final byte[] key;
// Cached hashcode. Computed on demand, not in the constructor, to avoid unnecessary computation.
@ -109,8 +113,12 @@ public class RowKey
return Arrays.toString(key);
}
public int getNumberOfBytes()
/**
* Estimate number of bytes taken by an object of {@link RowKey}. Only returns an estimate and does not account for
* platform or JVM specific implementation.
*/
public int estimatedObjectSizeBytes()
{
return array().length;
return OBJECT_OVERHEAD_SIZE_BYTES + array().length;
}
}

View File

@ -97,11 +97,11 @@ public class RowKeyTest extends InitializedNullHandlingTest
{
final RowSignature signatureLong = RowSignature.builder().add("1", ColumnType.LONG).build();
final RowKey longKey = KeyTestUtils.createKey(signatureLong, 1L, "abc");
Assert.assertEquals(longKey.array().length, longKey.getNumberOfBytes());
Assert.assertEquals(RowKey.OBJECT_OVERHEAD_SIZE_BYTES + longKey.array().length, longKey.estimatedObjectSizeBytes());
final RowSignature signatureLongString =
RowSignature.builder().add("1", ColumnType.LONG).add("2", ColumnType.STRING).build();
final RowKey longStringKey = KeyTestUtils.createKey(signatureLongString, 1L, "abc");
Assert.assertEquals(longStringKey.array().length, longStringKey.getNumberOfBytes());
Assert.assertEquals(RowKey.OBJECT_OVERHEAD_SIZE_BYTES + longStringKey.array().length, longStringKey.estimatedObjectSizeBytes());
}
}