mirror of https://github.com/apache/druid.git
InDimFilter: Fix cache key computation to avoid collisions. (#11168)
The prior code did not include separation between values, and encoded null ambiguously. This patch fixes both of those issues by encoding strings as length + value instead of just value. I think cache key computation was OK prior to #9800. Prior to that patch, the cache key was computed using CacheKeyBuilder.appendStrings, which encodes strings as UTF-8 and inserts a separator byte (0xff) between them that cannot appear in a UTF-8 stream.
This commit is contained in:
parent
ad028de538
commit
7d808e357c
|
@ -432,14 +432,18 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
|
|||
sortedValues = sortedValuesList;
|
||||
}
|
||||
|
||||
// Hash all values, in sorted order, as their length followed by their content.
|
||||
final Hasher hasher = Hashing.sha256().newHasher();
|
||||
for (String v : sortedValues) {
|
||||
if (v == null) {
|
||||
hasher.putInt(0);
|
||||
// Encode null as length -1, no content.
|
||||
hasher.putInt(-1);
|
||||
} else {
|
||||
hasher.putInt(v.length());
|
||||
hasher.putString(v, StandardCharsets.UTF_8);
|
||||
}
|
||||
}
|
||||
|
||||
return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
|
||||
.appendString(dimension)
|
||||
.appendByte(DimFilterUtils.STRING_SEPARATOR)
|
||||
|
|
|
@ -97,6 +97,19 @@ public class InDimFilterTest extends InitializedNullHandlingTest
|
|||
Assert.assertArrayEquals(dimFilter1.getCacheKey(), dimFilter2.getCacheKey());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCacheKeyForNullVsEmptyString()
|
||||
{
|
||||
final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList(null, "abc"), null);
|
||||
final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("", "abc"), null);
|
||||
|
||||
if (NullHandling.sqlCompatible()) {
|
||||
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
|
||||
} else {
|
||||
Assert.assertArrayEquals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCacheKeyReturningSameKeyForSetsOfDifferentTypesAndComparators()
|
||||
{
|
||||
|
@ -119,6 +132,22 @@ public class InDimFilterTest extends InitializedNullHandlingTest
|
|||
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCacheKeyDifferentKeysForNullAndFourZeroChars()
|
||||
{
|
||||
final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList(null, "abc"), null);
|
||||
final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("\0\0\0\0", "abc"), null);
|
||||
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCacheKeyDifferentKeysWhenStringBoundariesMove()
|
||||
{
|
||||
final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList("bar", "foo"), null);
|
||||
final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("barf", "oo"), null);
|
||||
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfListsWithExtractFn()
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue