mirror of
https://github.com/apache/druid.git
synced 2025-03-08 18:39:45 +00:00
fix goldilocks bug with HashVectorGrouper improperly initializing memory (#11649)
* fix goldilocks bug with HashVectorGrouper improperly initializing memory that causes failure when there exists room to only grow one time * fix unintended change * cleanup
This commit is contained in:
parent
3ff1c2b8ce
commit
59d257816b
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
package org.apache.druid.query.groupby.epinephelinae;
|
package org.apache.druid.query.groupby.epinephelinae;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Supplier;
|
import com.google.common.base.Supplier;
|
||||||
import it.unimi.dsi.fastutil.HashCommon;
|
import it.unimi.dsi.fastutil.HashCommon;
|
||||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||||
@ -202,17 +203,7 @@ public class HashVectorGrouper implements VectorGrouper
|
|||||||
tableStart = buffer.capacity() - bucketSize * (maxNumBuckets - numBuckets);
|
tableStart = buffer.capacity() - bucketSize * (maxNumBuckets - numBuckets);
|
||||||
}
|
}
|
||||||
|
|
||||||
final ByteBuffer tableBuffer = buffer.duplicate();
|
this.hashTable = createTable(buffer, tableStart, numBuckets);
|
||||||
tableBuffer.position(0);
|
|
||||||
tableBuffer.limit(MemoryOpenHashTable.memoryNeeded(numBuckets, bucketSize));
|
|
||||||
|
|
||||||
this.hashTable = new MemoryOpenHashTable(
|
|
||||||
WritableMemory.wrap(tableBuffer.slice(), ByteOrder.nativeOrder()),
|
|
||||||
numBuckets,
|
|
||||||
Math.max(1, Math.min(bufferGrouperMaxSize, (int) (numBuckets * maxLoadFactor))),
|
|
||||||
keySize,
|
|
||||||
aggregators.spaceNeeded()
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -268,6 +259,27 @@ public class HashVectorGrouper implements VectorGrouper
|
|||||||
aggregators.close();
|
aggregators.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public int getTableStart()
|
||||||
|
{
|
||||||
|
return tableStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
private MemoryOpenHashTable createTable(ByteBuffer buffer, int tableStart, int numBuckets)
|
||||||
|
{
|
||||||
|
final ByteBuffer tableBuffer = buffer.duplicate();
|
||||||
|
tableBuffer.position(tableStart);
|
||||||
|
assert tableStart + MemoryOpenHashTable.memoryNeeded(numBuckets, bucketSize) <= buffer.capacity();
|
||||||
|
tableBuffer.limit(tableStart + MemoryOpenHashTable.memoryNeeded(numBuckets, bucketSize));
|
||||||
|
|
||||||
|
return new MemoryOpenHashTable(
|
||||||
|
WritableMemory.wrap(tableBuffer.slice(), ByteOrder.nativeOrder()),
|
||||||
|
numBuckets,
|
||||||
|
Math.max(1, Math.min(bufferGrouperMaxSize, (int) (numBuckets * maxLoadFactor))),
|
||||||
|
keySize,
|
||||||
|
aggregators.spaceNeeded()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the given bucket with the given key and fresh, empty aggregation state. Must only be called if
|
* Initializes the given bucket with the given key and fresh, empty aggregation state. Must only be called if
|
||||||
@ -307,17 +319,7 @@ public class HashVectorGrouper implements VectorGrouper
|
|||||||
final int newNumBuckets = nextTableNumBuckets();
|
final int newNumBuckets = nextTableNumBuckets();
|
||||||
final int newTableStart = nextTableStart();
|
final int newTableStart = nextTableStart();
|
||||||
|
|
||||||
final ByteBuffer newTableBuffer = buffer.duplicate();
|
final MemoryOpenHashTable newHashTable = createTable(buffer, newTableStart, newNumBuckets);
|
||||||
newTableBuffer.position(newTableStart);
|
|
||||||
newTableBuffer.limit(newTableStart + MemoryOpenHashTable.memoryNeeded(newNumBuckets, bucketSize));
|
|
||||||
|
|
||||||
final MemoryOpenHashTable newHashTable = new MemoryOpenHashTable(
|
|
||||||
WritableMemory.wrap(newTableBuffer.slice(), ByteOrder.nativeOrder()),
|
|
||||||
newNumBuckets,
|
|
||||||
maxSizeForNumBuckets(newNumBuckets, maxLoadFactor, bufferGrouperMaxSize),
|
|
||||||
keySize,
|
|
||||||
aggregators.spaceNeeded()
|
|
||||||
);
|
|
||||||
|
|
||||||
hashTable.copyTo(newHashTable, new HashVectorGrouperBucketCopyHandler(aggregators, hashTable.bucketValueOffset()));
|
hashTable.copyTo(newHashTable, new HashVectorGrouperBucketCopyHandler(aggregators, hashTable.bucketValueOffset()));
|
||||||
hashTable = newHashTable;
|
hashTable = newHashTable;
|
||||||
@ -382,15 +384,6 @@ public class HashVectorGrouper implements VectorGrouper
|
|||||||
return nextTableStart;
|
return nextTableStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the maximum number of elements (size) for a given number of buckets. When the table hits this size,
|
|
||||||
* we must either grow it or return a table-full error.
|
|
||||||
*/
|
|
||||||
private static int maxSizeForNumBuckets(final int numBuckets, final double maxLoadFactor, final int configuredMaxSize)
|
|
||||||
{
|
|
||||||
return Math.max(1, Math.min(configuredMaxSize, (int) (numBuckets * maxLoadFactor)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute the initial table bucket count given a particular buffer capacity, bucket size, and user-configured
|
* Compute the initial table bucket count given a particular buffer capacity, bucket size, and user-configured
|
||||||
* initial bucket count.
|
* initial bucket count.
|
||||||
|
@ -20,7 +20,9 @@
|
|||||||
package org.apache.druid.query.groupby.epinephelinae;
|
package org.apache.druid.query.groupby.epinephelinae;
|
||||||
|
|
||||||
import com.google.common.base.Suppliers;
|
import com.google.common.base.Suppliers;
|
||||||
|
import org.apache.datasketches.memory.WritableMemory;
|
||||||
import org.apache.druid.query.aggregation.AggregatorAdapters;
|
import org.apache.druid.query.aggregation.AggregatorAdapters;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
@ -45,4 +47,256 @@ public class HashVectorGrouperTest
|
|||||||
grouper.close();
|
grouper.close();
|
||||||
Mockito.verify(aggregatorAdapters, Mockito.times(1)).close();
|
Mockito.verify(aggregatorAdapters, Mockito.times(1)).close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTableStartIsNotMemoryStartIfNotMaxSized()
|
||||||
|
{
|
||||||
|
final int maxVectorSize = 512;
|
||||||
|
final int keySize = 4;
|
||||||
|
final int bufferSize = 100 * 1024;
|
||||||
|
final WritableMemory keySpace = WritableMemory.allocate(keySize * maxVectorSize);
|
||||||
|
final ByteBuffer buffer = ByteBuffer.wrap(new byte[bufferSize]);
|
||||||
|
final AggregatorAdapters aggregatorAdapters = Mockito.mock(AggregatorAdapters.class);
|
||||||
|
final HashVectorGrouper grouper = new HashVectorGrouper(
|
||||||
|
Suppliers.ofInstance(buffer),
|
||||||
|
keySize,
|
||||||
|
aggregatorAdapters,
|
||||||
|
8,
|
||||||
|
0.f,
|
||||||
|
4
|
||||||
|
);
|
||||||
|
grouper.initVectorized(maxVectorSize);
|
||||||
|
Assert.assertNotEquals(0, grouper.getTableStart());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTableStartIsNotMemoryStartIfIsMaxSized()
|
||||||
|
{
|
||||||
|
final int maxVectorSize = 512;
|
||||||
|
final int keySize = 10000;
|
||||||
|
final int bufferSize = 100 * 1024;
|
||||||
|
final ByteBuffer buffer = ByteBuffer.wrap(new byte[bufferSize]);
|
||||||
|
final AggregatorAdapters aggregatorAdapters = Mockito.mock(AggregatorAdapters.class);
|
||||||
|
final HashVectorGrouper grouper = new HashVectorGrouper(
|
||||||
|
Suppliers.ofInstance(buffer),
|
||||||
|
keySize,
|
||||||
|
aggregatorAdapters,
|
||||||
|
4,
|
||||||
|
0.f,
|
||||||
|
4
|
||||||
|
);
|
||||||
|
grouper.initVectorized(maxVectorSize);
|
||||||
|
Assert.assertEquals(0, grouper.getTableStart());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGrowOnce()
|
||||||
|
{
|
||||||
|
final int maxVectorSize = 512;
|
||||||
|
final int keySize = 4;
|
||||||
|
final int aggSize = 8;
|
||||||
|
final WritableMemory keySpace = WritableMemory.allocate(keySize * maxVectorSize);
|
||||||
|
|
||||||
|
final AggregatorAdapters aggregatorAdapters = Mockito.mock(AggregatorAdapters.class);
|
||||||
|
Mockito.when(aggregatorAdapters.spaceNeeded()).thenReturn(aggSize);
|
||||||
|
|
||||||
|
int startingNumBuckets = 4;
|
||||||
|
int maxBuckets = 16;
|
||||||
|
final int bufferSize = (keySize + aggSize) * maxBuckets;
|
||||||
|
final ByteBuffer buffer = ByteBuffer.wrap(new byte[bufferSize]);
|
||||||
|
final HashVectorGrouper grouper = new HashVectorGrouper(
|
||||||
|
Suppliers.ofInstance(buffer),
|
||||||
|
keySize,
|
||||||
|
aggregatorAdapters,
|
||||||
|
maxBuckets,
|
||||||
|
0.f,
|
||||||
|
startingNumBuckets
|
||||||
|
);
|
||||||
|
grouper.initVectorized(maxVectorSize);
|
||||||
|
|
||||||
|
int tableStart = grouper.getTableStart();
|
||||||
|
|
||||||
|
// two keys should not cause buffer to grow
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 2);
|
||||||
|
AggregateResult result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertEquals(tableStart, grouper.getTableStart());
|
||||||
|
|
||||||
|
// 3rd key should cause buffer to grow
|
||||||
|
// buffer should grow to maximum size
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 3);
|
||||||
|
result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertEquals(0, grouper.getTableStart());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGrowTwice()
|
||||||
|
{
|
||||||
|
final int maxVectorSize = 512;
|
||||||
|
final int keySize = 4;
|
||||||
|
final int aggSize = 8;
|
||||||
|
final WritableMemory keySpace = WritableMemory.allocate(keySize * maxVectorSize);
|
||||||
|
|
||||||
|
final AggregatorAdapters aggregatorAdapters = Mockito.mock(AggregatorAdapters.class);
|
||||||
|
Mockito.when(aggregatorAdapters.spaceNeeded()).thenReturn(aggSize);
|
||||||
|
|
||||||
|
int startingNumBuckets = 4;
|
||||||
|
int maxBuckets = 32;
|
||||||
|
final int bufferSize = (keySize + aggSize) * maxBuckets;
|
||||||
|
final ByteBuffer buffer = ByteBuffer.wrap(new byte[bufferSize]);
|
||||||
|
final HashVectorGrouper grouper = new HashVectorGrouper(
|
||||||
|
Suppliers.ofInstance(buffer),
|
||||||
|
keySize,
|
||||||
|
aggregatorAdapters,
|
||||||
|
maxBuckets,
|
||||||
|
0.f,
|
||||||
|
startingNumBuckets
|
||||||
|
);
|
||||||
|
grouper.initVectorized(maxVectorSize);
|
||||||
|
|
||||||
|
int tableStart = grouper.getTableStart();
|
||||||
|
|
||||||
|
// two keys should not cause buffer to grow
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 2);
|
||||||
|
AggregateResult result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertEquals(tableStart, grouper.getTableStart());
|
||||||
|
|
||||||
|
// 3rd key should cause buffer to grow
|
||||||
|
// buffer should grow to next size, but is not full
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 3);
|
||||||
|
result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertTrue(grouper.getTableStart() > tableStart);
|
||||||
|
|
||||||
|
// this time should be all the way
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 6);
|
||||||
|
result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertEquals(0, grouper.getTableStart());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGrowThreeTimes()
|
||||||
|
{
|
||||||
|
final int maxVectorSize = 512;
|
||||||
|
final int keySize = 4;
|
||||||
|
final int aggSize = 8;
|
||||||
|
final WritableMemory keySpace = WritableMemory.allocate(keySize * maxVectorSize);
|
||||||
|
|
||||||
|
final AggregatorAdapters aggregatorAdapters = Mockito.mock(AggregatorAdapters.class);
|
||||||
|
Mockito.when(aggregatorAdapters.spaceNeeded()).thenReturn(aggSize);
|
||||||
|
|
||||||
|
int startingNumBuckets = 4;
|
||||||
|
int maxBuckets = 64;
|
||||||
|
final int bufferSize = (keySize + aggSize) * maxBuckets;
|
||||||
|
final ByteBuffer buffer = ByteBuffer.wrap(new byte[bufferSize]);
|
||||||
|
final HashVectorGrouper grouper = new HashVectorGrouper(
|
||||||
|
Suppliers.ofInstance(buffer),
|
||||||
|
keySize,
|
||||||
|
aggregatorAdapters,
|
||||||
|
maxBuckets,
|
||||||
|
0.f,
|
||||||
|
startingNumBuckets
|
||||||
|
);
|
||||||
|
grouper.initVectorized(maxVectorSize);
|
||||||
|
|
||||||
|
int tableStart = grouper.getTableStart();
|
||||||
|
|
||||||
|
// two keys should cause buffer to grow
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 2);
|
||||||
|
AggregateResult result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertEquals(tableStart, grouper.getTableStart());
|
||||||
|
|
||||||
|
// 3rd key should cause buffer to grow
|
||||||
|
// buffer should grow to next size, but is not full
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 3);
|
||||||
|
result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertTrue(grouper.getTableStart() > tableStart);
|
||||||
|
tableStart = grouper.getTableStart();
|
||||||
|
|
||||||
|
// grow it again
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 6);
|
||||||
|
result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertTrue(grouper.getTableStart() > tableStart);
|
||||||
|
|
||||||
|
// this time should be all the way
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 14);
|
||||||
|
result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertEquals(0, grouper.getTableStart());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGrowFourTimes()
|
||||||
|
{
|
||||||
|
final int maxVectorSize = 512;
|
||||||
|
final int keySize = 4;
|
||||||
|
final int aggSize = 8;
|
||||||
|
final WritableMemory keySpace = WritableMemory.allocate(keySize * maxVectorSize);
|
||||||
|
|
||||||
|
final AggregatorAdapters aggregatorAdapters = Mockito.mock(AggregatorAdapters.class);
|
||||||
|
Mockito.when(aggregatorAdapters.spaceNeeded()).thenReturn(aggSize);
|
||||||
|
|
||||||
|
int startingNumBuckets = 4;
|
||||||
|
int maxBuckets = 128;
|
||||||
|
final int bufferSize = (keySize + aggSize) * maxBuckets;
|
||||||
|
final ByteBuffer buffer = ByteBuffer.wrap(new byte[bufferSize]);
|
||||||
|
final HashVectorGrouper grouper = new HashVectorGrouper(
|
||||||
|
Suppliers.ofInstance(buffer),
|
||||||
|
keySize,
|
||||||
|
aggregatorAdapters,
|
||||||
|
maxBuckets,
|
||||||
|
0.f,
|
||||||
|
startingNumBuckets
|
||||||
|
);
|
||||||
|
grouper.initVectorized(maxVectorSize);
|
||||||
|
|
||||||
|
int tableStart = grouper.getTableStart();
|
||||||
|
|
||||||
|
// two keys should cause buffer to grow
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 2);
|
||||||
|
AggregateResult result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertEquals(tableStart, grouper.getTableStart());
|
||||||
|
|
||||||
|
// 3rd key should cause buffer to grow
|
||||||
|
// buffer should grow to next size, but is not full
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 3);
|
||||||
|
result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertTrue(grouper.getTableStart() > tableStart);
|
||||||
|
tableStart = grouper.getTableStart();
|
||||||
|
|
||||||
|
// grow it again
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 6);
|
||||||
|
result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertTrue(grouper.getTableStart() > tableStart);
|
||||||
|
tableStart = grouper.getTableStart();
|
||||||
|
|
||||||
|
// more
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 14);
|
||||||
|
result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertTrue(grouper.getTableStart() > tableStart);
|
||||||
|
|
||||||
|
// this time should be all the way
|
||||||
|
fillKeyspace(keySpace, maxVectorSize, 25);
|
||||||
|
result = grouper.aggregateVector(keySpace, 0, maxVectorSize);
|
||||||
|
Assert.assertTrue(result.isOk());
|
||||||
|
Assert.assertEquals(0, grouper.getTableStart());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void fillKeyspace(WritableMemory keySpace, int maxVectorSize, int distinctKeys)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < maxVectorSize; i++) {
|
||||||
|
int bucket = i % distinctKeys;
|
||||||
|
keySpace.putInt(((long) Integer.BYTES * i), bucket);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user