InDimFilter: Fix cache key computation to avoid collisions. (#11168)

The prior code did not include separation between values, and encoded
null ambiguously. This patch fixes both of those issues by encoding
strings as length + value instead of just value.

I think cache key computation was OK prior to #9800. Prior to that
patch, the cache key was computed using CacheKeyBuilder.appendStrings,
which encodes strings as UTF-8 and inserts a separator byte (0xff)
between them that cannot appear in a UTF-8 stream.
This commit is contained in:
Gian Merlino 2021-04-28 17:28:29 -07:00 committed by GitHub
parent ad028de538
commit 7d808e357c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 1 deletions

View File

@ -432,14 +432,18 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
sortedValues = sortedValuesList; sortedValues = sortedValuesList;
} }
// Hash all values, in sorted order, as their length followed by their content.
final Hasher hasher = Hashing.sha256().newHasher(); final Hasher hasher = Hashing.sha256().newHasher();
for (String v : sortedValues) { for (String v : sortedValues) {
if (v == null) { if (v == null) {
hasher.putInt(0); // Encode null as length -1, no content.
hasher.putInt(-1);
} else { } else {
hasher.putInt(v.length());
hasher.putString(v, StandardCharsets.UTF_8); hasher.putString(v, StandardCharsets.UTF_8);
} }
} }
return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID) return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
.appendString(dimension) .appendString(dimension)
.appendByte(DimFilterUtils.STRING_SEPARATOR) .appendByte(DimFilterUtils.STRING_SEPARATOR)

View File

@ -97,6 +97,19 @@ public class InDimFilterTest extends InitializedNullHandlingTest
Assert.assertArrayEquals(dimFilter1.getCacheKey(), dimFilter2.getCacheKey()); Assert.assertArrayEquals(dimFilter1.getCacheKey(), dimFilter2.getCacheKey());
} }
@Test
public void testGetCacheKeyForNullVsEmptyString()
{
final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList(null, "abc"), null);
final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("", "abc"), null);
if (NullHandling.sqlCompatible()) {
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
} else {
Assert.assertArrayEquals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey());
}
}
@Test @Test
public void testGetCacheKeyReturningSameKeyForSetsOfDifferentTypesAndComparators() public void testGetCacheKeyReturningSameKeyForSetsOfDifferentTypesAndComparators()
{ {
@ -119,6 +132,22 @@ public class InDimFilterTest extends InitializedNullHandlingTest
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey())); Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
} }
@Test
public void testGetCacheKeyDifferentKeysForNullAndFourZeroChars()
{
final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList(null, "abc"), null);
final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("\0\0\0\0", "abc"), null);
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
}
@Test
public void testGetCacheKeyDifferentKeysWhenStringBoundariesMove()
{
final InDimFilter inDimFilter1 = new InDimFilter("dimTest", Arrays.asList("bar", "foo"), null);
final InDimFilter inDimFilter2 = new InDimFilter("dimTest", Arrays.asList("barf", "oo"), null);
Assert.assertFalse(Arrays.equals(inDimFilter1.getCacheKey(), inDimFilter2.getCacheKey()));
}
@Test @Test
public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfListsWithExtractFn() public void testGetCacheKeyDifferentKeysForListOfStringsAndSingleStringOfListsWithExtractFn()
{ {