InDimFilter: Fix cache key computation to avoid collisions. (#11168)

The prior code did not include separation between values, and encoded
null ambiguously. This patch fixes both of those issues by encoding
strings as length + value instead of just value.

I think cache key computation was OK prior to #9800. Prior to that
patch, the cache key was computed using CacheKeyBuilder.appendStrings,
which encodes strings as UTF-8 and inserts a separator byte (0xff)
between them that cannot appear in a UTF-8 stream.
This commit is contained in:
Gian Merlino 2021-04-28 17:28:29 -07:00 committed by GitHub
parent ad028de538
commit 7d808e357c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 1 deletions

View File

@ -432,14 +432,18 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
sortedValues = sortedValuesList;
}
// Hash all values, in sorted order, as their length followed by their content.
final Hasher hasher = Hashing.sha256().newHasher();
for (String v : sortedValues) {
if (v == null) {
hasher.putInt(0);
// Encode null as length -1, no content.
hasher.putInt(-1);
} else {
hasher.putInt(v.length());
hasher.putString(v, StandardCharsets.UTF_8);
}
}
return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
.appendString(dimension)
.appendByte(DimFilterUtils.STRING_SEPARATOR)

View File

@ -97,6 +97,19 @@ public class InDimFilterTest extends InitializedNullHandlingTest
Assert.assertArrayEquals(dimFilter1.getCacheKey(), dimFilter2.getCacheKey());
}
@Test
public void testGetCacheKeyForNullVsEmptyString()
{
final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList(null, "abc"), null);
final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("", "abc"), null);
if (NullHandling.sqlCompatible()) {
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
} else {
Assert.assertArrayEquals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey());
}
}
@Test
public void testGetCacheKeyReturningSameKeyForSetsOfDifferentTypesAndComparators()
{
@ -119,6 +132,22 @@ public class InDimFilterTest extends InitializedNullHandlingTest
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
}
@Test
public void testGetCacheKeyDifferentKeysForNullAndFourZeroChars()
{
final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList(null, "abc"), null);
final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("\0\0\0\0", "abc"), null);
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
}
@Test
public void testGetCacheKeyDifferentKeysWhenStringBoundariesMove()
{
final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList("bar", "foo"), null);
final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("barf", "oo"), null);
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
}
@Test
public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfListsWithExtractFn()
{