diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 555c759ba76..736fa634916 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -159,6 +159,10 @@ Bug Fixes * SOLR-7929: SimplePostTool (also bin/post) -filetypes "*" now works properly in 'web' mode (Erik Hatcher) +* SOLR-7954: Fixed an integer overflow bug in the HyperLogLog code used by the 'cardinality' option + of stats.field to prevent ArrayIndexOutOfBoundsException in a distributed search when a large precision + is selected and a large number of values exist in each shard (hossman) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java b/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java index 3245d1bf6e6..559ec86097f 100644 --- a/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java +++ b/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordDeserializer.java @@ -99,7 +99,7 @@ class BigEndianAscendingWordDeserializer implements IWordDeserializer { } // First bit of the word - final long firstBitIndex = (position * wordLength); + final long firstBitIndex = ((long)position) * ((long)wordLength); final int firstByteIndex = (bytePadding + (int)(firstBitIndex / BITS_PER_BYTE)); final int firstByteSkipBits = (int)(firstBitIndex % BITS_PER_BYTE); diff --git a/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java b/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java index 6bf46fc1a83..dd7d281effa 100644 --- a/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java +++ b/solr/core/src/java/org/apache/solr/util/hll/BigEndianAscendingWordSerializer.java @@ -85,7 +85,7 @@ class BigEndianAscendingWordSerializer implements IWordSerializer { this.wordLength = wordLength; this.wordCount = wordCount; - final long bitsRequired = (wordLength * wordCount); + final long bitsRequired = ((long)wordLength) * ((long)wordCount); final boolean leftoverBits = ((bitsRequired % BITS_PER_BYTE) != 0); final int bytesRequired = (int)(bitsRequired / BITS_PER_BYTE) + (leftoverBits ? 1 : 0) + bytePadding; bytes = new byte[bytesRequired]; diff --git a/solr/core/src/test/org/apache/solr/util/hll/HLLSerializationTest.java b/solr/core/src/test/org/apache/solr/util/hll/HLLSerializationTest.java index 1717ac3b2ad..0ae4181e5b7 100644 --- a/solr/core/src/test/org/apache/solr/util/hll/HLLSerializationTest.java +++ b/solr/core/src/test/org/apache/solr/util/hll/HLLSerializationTest.java @@ -18,6 +18,8 @@ package org.apache.solr.util.hll; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; + import org.junit.Test; import static com.carrotsearch.randomizedtesting.RandomizedTest.*; @@ -27,6 +29,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Random; +import java.util.EnumSet; import static org.apache.solr.util.hll.HLL.*; @@ -34,55 +37,190 @@ import static org.apache.solr.util.hll.HLL.*; * Serialization smoke-tests. */ public class HLLSerializationTest extends LuceneTestCase { - /** - * A smoke-test that covers serialization/deserialization of an HLL - * under all possible parameters. - */ - @Test - @Slow - @Nightly - public void serializationSmokeTest() throws Exception { - final Random random = new Random(randomLong()); - final int randomCount = 250; - final List randoms = new ArrayList(randomCount); - for (int i=0; i randoms = new ArrayList(randomCount); + for (int i=0; i items) - throws CloneNotSupportedException { - for(int log2m=MINIMUM_LOG2M_PARAM; log2m<=16; log2m++) { - for(int regw=MINIMUM_REGWIDTH_PARAM; regw<=MAXIMUM_REGWIDTH_PARAM; regw++) { - for(int expthr=MINIMUM_EXPTHRESH_PARAM; expthr<=MAXIMUM_EXPTHRESH_PARAM; expthr++ ) { - for(final boolean sparse: new boolean[]{true, false}) { - HLL hll = new HLL(log2m, regw, expthr, sparse, hllType); - for(final Long item: items) { - hll.addRaw(item); - } - HLL copy = HLL.fromBytes(hll.toBytes()); - assertEquals(copy.cardinality(), hll.cardinality()); - assertEquals(copy.getType(), hll.getType()); - assertTrue(Arrays.equals(copy.toBytes(), hll.toBytes())); - - HLL clone = hll.clone(); - assertEquals(clone.cardinality(), hll.cardinality()); - assertEquals(clone.getType(), hll.getType()); - assertTrue(Arrays.equals(clone.toBytes(), hll.toBytes())); - } - } - } - } + // testing larger values + final int maxLog2m = 16; + for (HLLType type : EnumSet.allOf(HLLType.class)) { + assertCardinality(type, maxLog2m, randoms); } + } + + /** + * A smoke-test that covers serialization/deserialization of HLLs + * under the max possible numeric init parameters, iterating over all possible combinations of + * the other params. + * + * @see #manyValuesHLLSerializationTest + */ + @Test + @Slow + @Monster("needs roughly -Dtests.heapsize=8g because of the (multiple) massive data structs") + public void monsterHLLSerializationTest() throws Exception { + final Random random = new Random(randomLong()); + final int randomCount = 250; + final List randoms = new ArrayList(randomCount); + for (int i=0; i items) throws CloneNotSupportedException { + for(int regw=MINIMUM_REGWIDTH_PARAM; regw<=MAXIMUM_REGWIDTH_PARAM; regw++) { + for(int expthr=MINIMUM_EXPTHRESH_PARAM; expthr<=MAXIMUM_EXPTHRESH_PARAM; expthr++ ) { + for(final boolean sparse: new boolean[]{true, false}) { + for(int log2m=MINIMUM_LOG2M_PARAM; log2m<=maxLog2m; log2m++) { + assertCardinality(new HLL(log2m, regw, expthr, sparse, hllType), items); + } + } + } + } + } + + /** + * Adds all of the items to the specified hll, then does a round trip serialize/deserialize and confirms + * equality of several properties (including the byte serialization). Repeats process with a clone. + */ + private static void assertCardinality(HLL hll, final Collection items) + throws CloneNotSupportedException { + + for (final Long item: items) { + hll.addRaw(item); + } + + final long hllCardinality = hll.cardinality(); + final HLLType hllType = hll.getType(); + final byte[] hllBytes = hll.toBytes(); + hll = null; // allow some GC + + HLL copy = HLL.fromBytes(hllBytes); + assertEquals(copy.cardinality(), hllCardinality); + assertEquals(copy.getType(), hllType); + assertTrue(Arrays.equals(copy.toBytes(), hllBytes)); + + HLL clone = copy.clone(); + copy = null; // allow some GC + + assertEquals(clone.cardinality(), hllCardinality); + assertEquals(clone.getType(), hllType); + assertTrue(Arrays.equals(clone.toBytes(), hllBytes)); + } }