Improve rowkey object size estimate (#13319)

* Improve rowkey object size estimate

* Address review comments

* Update comment

* Fix test
This commit is contained in:
Adarsh Sanjeev 2022-11-08 10:12:07 +05:30 committed by GitHub
parent 48528a0c98
commit a28b8c2674
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 25 additions and 17 deletions

View File

@ -133,7 +133,7 @@ public class DelegateOrMinKeyCollector<TDelegate extends KeyCollector<TDelegate>
if (delegate != null) { if (delegate != null) {
return delegate.estimatedRetainedBytes(); return delegate.estimatedRetainedBytes();
} else { } else {
return minKey != null ? minKey.getNumberOfBytes() : 0; return minKey != null ? minKey.estimatedObjectSizeBytes() : 0;
} }
} }

View File

@ -121,13 +121,13 @@ public class DistinctKeyCollector implements KeyCollector<DistinctKeyCollector>
if (isNewMin && !retainedKeys.isEmpty() && !isKeySelected(retainedKeys.firstKey())) { if (isNewMin && !retainedKeys.isEmpty() && !isKeySelected(retainedKeys.firstKey())) {
// Old min should be kicked out. // Old min should be kicked out.
totalWeightUnadjusted -= retainedKeys.removeLong(retainedKeys.firstKey()); totalWeightUnadjusted -= retainedKeys.removeLong(retainedKeys.firstKey());
retainedBytes -= retainedKeys.firstKey().getNumberOfBytes(); retainedBytes -= retainedKeys.firstKey().estimatedObjectSizeBytes();
} }
if (retainedKeys.putIfAbsent(key, weight) == MISSING_KEY_WEIGHT) { if (retainedKeys.putIfAbsent(key, weight) == MISSING_KEY_WEIGHT) {
// We did add this key. (Previous value was zero, meaning absent.) // We did add this key. (Previous value was zero, meaning absent.)
totalWeightUnadjusted += weight; totalWeightUnadjusted += weight;
retainedBytes += key.getNumberOfBytes(); retainedBytes += key.estimatedObjectSizeBytes();
} }
while (retainedBytes >= maxBytes) { while (retainedBytes >= maxBytes) {
@ -305,7 +305,7 @@ public class DistinctKeyCollector implements KeyCollector<DistinctKeyCollector>
if (!isKeySelected(key)) { if (!isKeySelected(key)) {
totalWeightUnadjusted -= entry.getLongValue(); totalWeightUnadjusted -= entry.getLongValue();
retainedBytes -= entry.getKey().getNumberOfBytes(); retainedBytes -= entry.getKey().estimatedObjectSizeBytes();
iterator.remove(); iterator.remove();
} }
} }

View File

@ -64,7 +64,7 @@ public class QuantilesSketchKeyCollector implements KeyCollector<QuantilesSketch
{ {
double estimatedTotalSketchSizeInBytes = averageKeyLength * sketch.getN(); double estimatedTotalSketchSizeInBytes = averageKeyLength * sketch.getN();
// The key is added "weight" times to the sketch, we can update the total weight directly. // The key is added "weight" times to the sketch, we can update the total weight directly.
estimatedTotalSketchSizeInBytes += key.getNumberOfBytes() * weight; estimatedTotalSketchSizeInBytes += key.estimatedObjectSizeBytes() * weight;
for (int i = 0; i < weight; i++) { for (int i = 0; i < weight; i++) {
// Add the same key multiple times to make it "heavier". // Add the same key multiple times to make it "heavier".
sketch.update(key); sketch.update(key);

View File

@ -106,7 +106,7 @@ public class QuantilesSketchKeyCollectorFactory
int serializedSize = Integer.BYTES * items.length; int serializedSize = Integer.BYTES * items.length;
for (final RowKey key : items) { for (final RowKey key : items) {
serializedSize += key.getNumberOfBytes(); serializedSize += key.array().length;
} }
final byte[] serializedBytes = new byte[serializedSize]; final byte[] serializedBytes = new byte[serializedSize];

View File

@ -89,7 +89,7 @@ public class DelegateOrMinKeyCollectorTest
Assert.assertTrue(collector.getDelegate().isPresent()); Assert.assertTrue(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty()); Assert.assertFalse(collector.isEmpty());
Assert.assertEquals(key, collector.minKey()); Assert.assertEquals(key, collector.minKey());
Assert.assertEquals(key.getNumberOfBytes(), collector.estimatedRetainedBytes(), 0); Assert.assertEquals(key.estimatedObjectSizeBytes(), collector.estimatedRetainedBytes(), 0);
Assert.assertEquals(1, collector.estimatedTotalWeight()); Assert.assertEquals(1, collector.estimatedTotalWeight());
} }
@ -110,7 +110,7 @@ public class DelegateOrMinKeyCollectorTest
Assert.assertTrue(collector.getDelegate().isPresent()); Assert.assertTrue(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty()); Assert.assertFalse(collector.isEmpty());
Assert.assertEquals(key, collector.minKey()); Assert.assertEquals(key, collector.minKey());
Assert.assertEquals(key.getNumberOfBytes(), collector.estimatedRetainedBytes(), 0); Assert.assertEquals(key.estimatedObjectSizeBytes(), collector.estimatedRetainedBytes(), 0);
Assert.assertEquals(1, collector.estimatedTotalWeight()); Assert.assertEquals(1, collector.estimatedTotalWeight());
// Should not have actually downsampled, because the quantiles-based collector does nothing when // Should not have actually downsampled, because the quantiles-based collector does nothing when
@ -133,7 +133,7 @@ public class DelegateOrMinKeyCollectorTest
RowKey key = createKey(1L); RowKey key = createKey(1L);
collector.add(key, 1); collector.add(key, 1);
collector.add(key, 1); collector.add(key, 1);
int expectedRetainedBytes = 2 * key.getNumberOfBytes(); int expectedRetainedBytes = 2 * key.estimatedObjectSizeBytes();
Assert.assertTrue(collector.getDelegate().isPresent()); Assert.assertTrue(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty()); Assert.assertFalse(collector.isEmpty());
@ -144,7 +144,7 @@ public class DelegateOrMinKeyCollectorTest
while (collector.getDelegate().isPresent()) { while (collector.getDelegate().isPresent()) {
Assert.assertTrue(collector.downSample()); Assert.assertTrue(collector.downSample());
} }
expectedRetainedBytes = key.getNumberOfBytes(); expectedRetainedBytes = key.estimatedObjectSizeBytes();
Assert.assertFalse(collector.getDelegate().isPresent()); Assert.assertFalse(collector.getDelegate().isPresent());
Assert.assertFalse(collector.isEmpty()); Assert.assertFalse(collector.isEmpty());

View File

@ -195,13 +195,13 @@ public class QuantilesSketchKeyCollectorTest
collector.add(smallKey, 3); collector.add(smallKey, 3);
Assert.assertEquals(smallKey.getNumberOfBytes(), collector.getAverageKeyLength(), 0); Assert.assertEquals(smallKey.estimatedObjectSizeBytes(), collector.getAverageKeyLength(), 0);
other.add(largeKey, 5); other.add(largeKey, 5);
Assert.assertEquals(largeKey.getNumberOfBytes(), other.getAverageKeyLength(), 0); Assert.assertEquals(largeKey.estimatedObjectSizeBytes(), other.getAverageKeyLength(), 0);
collector.addAll(other); collector.addAll(other);
Assert.assertEquals((smallKey.getNumberOfBytes() * 3 + largeKey.getNumberOfBytes() * 5) / 8.0, collector.getAverageKeyLength(), 0); Assert.assertEquals((smallKey.estimatedObjectSizeBytes() * 3 + largeKey.estimatedObjectSizeBytes() * 5) / 8.0, collector.getAverageKeyLength(), 0);
} }
@Test @Test

View File

@ -32,6 +32,10 @@ public class RowKey
{ {
private static final RowKey EMPTY_KEY = new RowKey(new byte[0]); private static final RowKey EMPTY_KEY = new RowKey(new byte[0]);
// Constant to account for hashcode and object overhead
// 24 bytes (header) + 8 bytes (reference) + 8 bytes (hashCode long) + 4 bytes (safe estimate of hashCodeComputed)
static final int OBJECT_OVERHEAD_SIZE_BYTES = 44;
private final byte[] key; private final byte[] key;
// Cached hashcode. Computed on demand, not in the constructor, to avoid unnecessary computation. // Cached hashcode. Computed on demand, not in the constructor, to avoid unnecessary computation.
@ -109,8 +113,12 @@ public class RowKey
return Arrays.toString(key); return Arrays.toString(key);
} }
public int getNumberOfBytes() /**
* Estimate number of bytes taken by an object of {@link RowKey}. Only returns an estimate and does not account for
* platform or JVM specific implementation.
*/
public int estimatedObjectSizeBytes()
{ {
return array().length; return OBJECT_OVERHEAD_SIZE_BYTES + array().length;
} }
} }

View File

@ -97,11 +97,11 @@ public class RowKeyTest extends InitializedNullHandlingTest
{ {
final RowSignature signatureLong = RowSignature.builder().add("1", ColumnType.LONG).build(); final RowSignature signatureLong = RowSignature.builder().add("1", ColumnType.LONG).build();
final RowKey longKey = KeyTestUtils.createKey(signatureLong, 1L, "abc"); final RowKey longKey = KeyTestUtils.createKey(signatureLong, 1L, "abc");
Assert.assertEquals(longKey.array().length, longKey.getNumberOfBytes()); Assert.assertEquals(RowKey.OBJECT_OVERHEAD_SIZE_BYTES + longKey.array().length, longKey.estimatedObjectSizeBytes());
final RowSignature signatureLongString = final RowSignature signatureLongString =
RowSignature.builder().add("1", ColumnType.LONG).add("2", ColumnType.STRING).build(); RowSignature.builder().add("1", ColumnType.LONG).add("2", ColumnType.STRING).build();
final RowKey longStringKey = KeyTestUtils.createKey(signatureLongString, 1L, "abc"); final RowKey longStringKey = KeyTestUtils.createKey(signatureLongString, 1L, "abc");
Assert.assertEquals(longStringKey.array().length, longStringKey.getNumberOfBytes()); Assert.assertEquals(RowKey.OBJECT_OVERHEAD_SIZE_BYTES + longStringKey.array().length, longStringKey.estimatedObjectSizeBytes());
} }
} }