Ported tests as well.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr7787@1691351 13f79535-47bb-0310-9956-ffa450edef68
2015-07-16 10:47:37 +00:00 · 2015-07-16 10:47:37 +00:00 · 139460e8c5
parent 1842589815
commit 139460e8c5
10 changed files with 2643 additions and 0 deletions
--- a/solr/core/src/test/org/apache/solr/util/hll/BigEndianAscendingWordDeserializerTest.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/BigEndianAscendingWordDeserializerTest.java
@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.hll;
+
+import java.util.Random;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+import static com.carrotsearch.randomizedtesting.RandomizedTest.*;
+
+/**
+ * Unit and smoke tests for {@link BigEndianAscendingWordDeserializer}.
+ *
+ * @author timon
+ */
+public class BigEndianAscendingWordDeserializerTest extends LuceneTestCase {
+    /**
+     * Error checking tests for constructor.
+     */
+    @Test
+    public void constructorErrorTest() {
+        // word length too small
+        try {
+            new BigEndianAscendingWordDeserializer(0/*wordLength, below minimum of 1*/, 0/*bytePadding, arbitrary*/, new byte[1]/*bytes, arbitrary, not used here*/);
+            fail("Should complain about too-short words.");
+        } catch(final IllegalArgumentException e) {
+            assertTrue(e.getMessage().contains("Word length must be"));
+        }
+
+        // word length too large
+        try {
+            new BigEndianAscendingWordDeserializer(65/*wordLength, above maximum of 64*/, 0/*bytePadding, arbitrary*/, new byte[1]/*bytes, arbitrary, not used here*/);
+            fail("Should complain about too-long words.");
+        } catch(final IllegalArgumentException e) {
+            assertTrue(e.getMessage().contains("Word length must be"));
+        }
+
+        // byte padding negative
+        try {
+            new BigEndianAscendingWordDeserializer(5/*wordLength, arbitrary*/, -1/*bytePadding, too small*/, new byte[1]/*bytes, arbitrary, not used here*/);
+            fail("Should complain about negative byte padding.");
+        } catch(final IllegalArgumentException e) {
+            assertTrue(e.getMessage().contains("Byte padding must be"));
+        }
+    }
+
+    /**
+     * Smoke test using 64-bit short words and special word values.
+     */
+    @Test
+    public void smokeTest64BitWord() {
+        final BigEndianAscendingWordSerializer serializer =
+            new BigEndianAscendingWordSerializer(64/*wordLength*/,
+                                                 5/*wordCount*/,
+                                                 0/*bytePadding, arbitrary*/);
+
+        // Check that the sign bit is being preserved.
+        serializer.writeWord(-1L);
+        serializer.writeWord(-112894714L);
+
+        // Check "special" values
+        serializer.writeWord(0L);
+        serializer.writeWord(Long.MAX_VALUE);
+        serializer.writeWord(Long.MIN_VALUE);
+
+        final byte[] bytes = serializer.getBytes();
+
+        final BigEndianAscendingWordDeserializer deserializer =
+            new BigEndianAscendingWordDeserializer(64/*wordLength*/, 0/*bytePadding*/, bytes);
+
+        assertEquals(deserializer.totalWordCount(), 5/*wordCount*/);
+
+        assertEquals(deserializer.readWord(), -1L);
+        assertEquals(deserializer.readWord(), -112894714L);
+        assertEquals(deserializer.readWord(), 0L);
+        assertEquals(deserializer.readWord(), Long.MAX_VALUE);
+        assertEquals(deserializer.readWord(), Long.MIN_VALUE);
+    }
+
+    /**
+     * A smoke/fuzz test for ascending (from zero) word values.
+     */
+    @Test
+    public void ascendingSmokeTest() {
+        for(int wordLength=5; wordLength<65; wordLength++) {
+            runAscendingTest(wordLength, 3/*bytePadding, arbitrary*/, 100000/*wordCount, arbitrary*/);
+        }
+    }
+
+    /**
+     * A smoke/fuzz test for random word values.
+     */
+    @Test
+    public void randomSmokeTest() {
+        for(int wordLength=5; wordLength<65; wordLength++) {
+            runRandomTest(wordLength, 3/*bytePadding, arbitrary*/, 100000/*wordCount, arbitrary*/);
+        }
+    }
+
+    // ------------------------------------------------------------------------
+    /**
+     * Runs a test which serializes and deserializes random word values.
+     *
+     * @param wordLength the length of words to test
+     * @param bytePadding the number of bytes padding the byte array
+     * @param wordCount the number of word values to test
+     */
+    private static void runRandomTest(final int wordLength, final int bytePadding, final int wordCount) {
+        final long seed = randomLong();
+        final Random random = new Random(seed);
+        final Random verificationRandom = new Random(seed);
+
+        final long wordMask;
+        if(wordLength == 64) {
+            wordMask = ~0L;
+        } else {
+            wordMask = (1L << wordLength) - 1L;
+        }
+
+        final BigEndianAscendingWordSerializer serializer =
+            new BigEndianAscendingWordSerializer(wordLength/*wordLength, arbitrary*/,
+                                                 wordCount,
+                                                 bytePadding/*bytePadding, arbitrary*/);
+
+        for(int i=0; i<wordCount; i++) {
+            final long value = random.nextLong() & wordMask;
+            serializer.writeWord(value);
+        }
+
+        final byte[] bytes = serializer.getBytes();
+
+        final BigEndianAscendingWordDeserializer deserializer =
+            new BigEndianAscendingWordDeserializer(wordLength, bytePadding, bytes);
+
+        assertEquals(deserializer.totalWordCount(), wordCount);
+        for(int i=0; i<wordCount; i++) {
+            assertEquals(deserializer.readWord(), (verificationRandom.nextLong() & wordMask));
+        }
+    }
+
+    /**
+     * Runs a test which serializes and deserializes ascending (from zero) word values.
+     *
+     * @param wordLength the length of words to test
+     * @param bytePadding the number of bytes padding the byte array
+     * @param wordCount the number of word values to test
+     */
+    private static void runAscendingTest(final int wordLength, final int bytePadding, final int wordCount) {
+        final long wordMask;
+        if(wordLength == 64) {
+            wordMask = ~0L;
+        } else {
+            wordMask = (1L << wordLength) - 1L;
+        }
+
+        final BigEndianAscendingWordSerializer serializer =
+            new BigEndianAscendingWordSerializer(wordLength/*wordLength, arbitrary*/,
+                                                 wordCount,
+                                                 bytePadding/*bytePadding, arbitrary*/);
+
+        for(long i=0; i<wordCount; i++) {
+            serializer.writeWord(i & wordMask);
+        }
+
+        final byte[] bytes = serializer.getBytes();
+
+        final BigEndianAscendingWordDeserializer deserializer =
+            new BigEndianAscendingWordDeserializer(wordLength, bytePadding, bytes);
+
+        assertEquals(deserializer.totalWordCount(), wordCount);
+        for(long i=0; i<wordCount; i++) {
+            assertEquals(deserializer.readWord(), i & wordMask);
+        }
+    }
+}
--- a/solr/core/src/test/org/apache/solr/util/hll/BigEndianAscendingWordSerializerTest.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/BigEndianAscendingWordSerializerTest.java
@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.hll;
+
+import java.util.Arrays;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+/**
+ * Unit tests for {@link BigEndianAscendingWordSerializer}.
+ */
+public class BigEndianAscendingWordSerializerTest extends LuceneTestCase {
+    /**
+     * Error checking tests for constructor.
+     */
+    @Test
+    public void constructorErrorTest() {
+        // word length too small
+        try {
+            new BigEndianAscendingWordSerializer(0/*wordLength, below minimum of 1*/, 1/*wordCount, arbitrary*/, 0/*bytePadding, arbitrary*/);
+            fail("Should complain about too-short words.");
+        } catch(final IllegalArgumentException e) {
+            assertTrue(e.getMessage().contains("Word length must be"));
+        }
+
+        // word length too large
+        try {
+            new BigEndianAscendingWordSerializer(65/*wordLength, above max of 64*/, 1/*wordCount, arbitrary*/, 0/*bytePadding, arbitrary*/);
+            fail("Should complain about too-long words.");
+        } catch(final IllegalArgumentException e) {
+            assertTrue(e.getMessage().contains("Word length must be"));
+        }
+
+        // word count negative
+        try {
+            new BigEndianAscendingWordSerializer(5/*wordLength, arbitrary*/, -1/*wordCount, too small*/, 0/*bytePadding, arbitrary*/);
+            fail("Should complain about negative word count.");
+        } catch(final IllegalArgumentException e) {
+            assertTrue(e.getMessage().contains("Word count must be"));
+        }
+
+        // byte padding negative
+        try {
+            new BigEndianAscendingWordSerializer(5/*wordLength, arbitrary*/, 1/*wordCount, arbitrary*/, -1/*bytePadding, too small*/);
+            fail("Should complain about negative byte padding.");
+        } catch(final IllegalArgumentException e) {
+            assertTrue(e.getMessage().contains("Byte padding must be"));
+        }
+    }
+
+    /**
+     * Tests runtime exception thrown at premature call to {@link BigEndianAscendingWordSerializer#getBytes()}.
+     */
+    @Test
+    public void earlyGetBytesTest() {
+        final BigEndianAscendingWordSerializer serializer =
+            new BigEndianAscendingWordSerializer(5/*wordLength, arbitrary*/,
+                                                 1/*wordCount*/,
+                                                 0/*bytePadding, arbitrary*/);
+
+        // getBytes without enough writeWord should throw
+        try {
+            serializer.getBytes();
+            fail("Should throw.");
+        } catch(final RuntimeException e) {
+            assertTrue(e.getMessage().contains("Not all words"));
+        }
+    }
+
+    /**
+     */
+    @Test
+    public void smokeTestExplicitParams() {
+        final int shortWordLength = 64/*longs used in LongSetSlab*/;
+
+        {// Should work on an empty sequence, with no padding.
+            final BigEndianAscendingWordSerializer serializer =
+                new BigEndianAscendingWordSerializer(shortWordLength,
+                                                     0/*wordCount*/,
+                                                     0/*bytePadding, none*/);
+
+            assert(Arrays.equals(serializer.getBytes(), new byte[0]));
+        }
+        {// Should work on a byte-divisible sequence, with no padding.
+            final BigEndianAscendingWordSerializer serializer =
+                new BigEndianAscendingWordSerializer(shortWordLength,
+                                                     2/*wordCount*/,
+                                                     0/*bytePadding, none*/);
+
+            serializer.writeWord(0xBAAAAAAAAAAAAAACL);
+            serializer.writeWord(0x8FFFFFFFFFFFFFF1L);
+
+            // Bytes:
+            // ======
+            // 0xBA 0xAA 0xAA 0xAA 0xAA 0xAA 0xAA 0xAC
+            // 0x8F 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xF1
+            //
+            // -70 -86 ...                        -84
+            // -113 -1 ...                        -15
+            final byte[] bytes = serializer.getBytes();
+            final byte[] expectedBytes = new byte[] { -70, -86, -86, -86, -86, -86, -86, -84,
+                                                      -113, -1, -1, -1, -1, -1, -1, -15 };
+            assertTrue(Arrays.equals(bytes, expectedBytes));
+        }
+        {// Should pad the array correctly.
+            final BigEndianAscendingWordSerializer serializer =
+                new BigEndianAscendingWordSerializer(shortWordLength,
+                                                     1/*wordCount*/,
+                                                     1/*bytePadding*/);
+
+            serializer.writeWord(1);
+            // 1 byte leading padding | value 1 | trailing padding
+            // 0000 0000 | 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0001
+            // 0x00 | 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x01
+            final byte[] bytes = serializer.getBytes();
+            final byte[] expectedBytes = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+            assertTrue(Arrays.equals(bytes, expectedBytes));
+        }
+    }
+
+    /**
+     * Smoke test for typical parameters used in practice.
+     */
+    @Test
+    public void smokeTestProbabilisticParams() {
+        // XXX: revisit this
+        final int shortWordLength = 5;
+        {// Should work on an empty sequence, with no padding.
+            final BigEndianAscendingWordSerializer serializer =
+                new BigEndianAscendingWordSerializer(shortWordLength,
+                                                     0/*wordCount*/,
+                                                     0/*bytePadding, none*/);
+
+            assert(Arrays.equals(serializer.getBytes(), new byte[0]));
+        }
+        {// Should work on a non-byte-divisible sequence, with no padding.
+            final BigEndianAscendingWordSerializer serializer =
+                new BigEndianAscendingWordSerializer(shortWordLength,
+                                                     3/*wordCount*/,
+                                                     0/*bytePadding, none*/);
+
+            serializer.writeWord(9);
+            serializer.writeWord(31);
+            serializer.writeWord(1);
+
+            // The values:
+            // -----------
+            // 9     |31    |1     |padding
+
+            // Corresponding bits:
+            // ------------------
+            // 0100 1|111 11|00 001|0
+
+            // And the hex/decimal (remember Java bytes are signed):
+            // -----------------------------------------------------
+            // 0100 1111 -> 0x4F -> 79
+            // 1100 0010 -> 0xC2 -> -62
+
+            final byte[] bytes = serializer.getBytes();
+            final byte[] expectedBytes = new byte[] { 79, -62 };
+            assertTrue(Arrays.equals(bytes, expectedBytes));
+        }
+        {// Should work on a byte-divisible sequence, with no padding.
+            final BigEndianAscendingWordSerializer serializer =
+                new BigEndianAscendingWordSerializer(shortWordLength,
+                                                     8/*wordCount*/,
+                                                     0/*bytePadding, none*/);
+
+            for(int i=1; i<9; i++) {
+                serializer.writeWord(i);
+            }
+
+            // Values: 1-8
+            // Corresponding bits:
+            // ------------------
+            // 00001
+            // 00010
+            // 00011
+            // 00100
+            // 00101
+            // 00110
+            // 00111
+            // 01000
+
+            // And the hex:
+            // ------------
+            // 0000 1000 => 0x08 => 8
+            // 1000 0110 => 0x86 => -122
+            // 0100 0010 => 0x62 => 66
+            // 1001 1000 => 0x98 => -104
+            // 1110 1000 => 0xE8 => -24
+
+            final byte[] bytes = serializer.getBytes();
+            final byte[] expectedBytes = new byte[] { 8, -122, 66, -104, -24 };
+            assertTrue(Arrays.equals(bytes, expectedBytes));
+        }
+        {// Should pad the array correctly.
+            final BigEndianAscendingWordSerializer serializer =
+                new BigEndianAscendingWordSerializer(shortWordLength,
+                                                     1/*wordCount*/,
+                                                     1/*bytePadding*/);
+
+            serializer.writeWord(1);
+            // 1 byte leading padding | value 1 | trailing padding
+            // 0000 0000 | 0000 1|000
+            final byte[] bytes = serializer.getBytes();
+            final byte[] expectedBytes = new byte[] { 0, 8 };
+            assertTrue(Arrays.equals(bytes, expectedBytes));
+        }
+    }
+
+    /**
+     * Smoke test for typical parameters used in practice.
+     */
+    @Test
+    public void smokeTestSparseParams() {
+        // XXX: revisit
+        final int shortWordLength = 17;
+        {// Should work on an empty sequence, with no padding.
+            final BigEndianAscendingWordSerializer serializer =
+                new BigEndianAscendingWordSerializer(shortWordLength,
+                                                     0/*wordCount*/,
+                                                     0/*bytePadding, none*/);
+
+            assert(Arrays.equals(serializer.getBytes(), new byte[0]));
+        }
+        {// Should work on a non-byte-divisible sequence, with no padding.
+            final BigEndianAscendingWordSerializer serializer =
+                new BigEndianAscendingWordSerializer(shortWordLength,
+                                                     3/*wordCount*/,
+                                                     0/*bytePadding, none*/);
+
+            serializer.writeWord(9);
+            serializer.writeWord(42);
+            serializer.writeWord(75);
+
+            // The values:
+            // -----------
+            // 9                    |42                   |75                   |padding
+
+            // Corresponding bits:
+            // ------------------
+            // 0000 0000 0000 0100 1|000 0000 0000 1010 10|00 0000 0000 1001 011|0 0000
+
+            // And the hex/decimal (remember Java bytes are signed):
+            // -----------------------------------------------------
+            // 0000 0000 -> 0x00 -> 0
+            // 0000 0100 -> 0x04 -> 4
+            // 1000 0000 -> 0x80 -> -128
+            // 0000 1010 -> 0x0A -> 10
+            // 1000 0000 -> 0x80 -> -128
+            // 0000 1001 -> 0x09 -> 9
+            // 0110 0000 -> 0x60 -> 96
+
+            final byte[] bytes = serializer.getBytes();
+            final byte[] expectedBytes = new byte[] { 0, 4, -128, 10, -128, 9, 96 };
+            assertTrue(Arrays.equals(bytes, expectedBytes));
+        }
+        {// Should work on a byte-divisible sequence, with no padding.
+            final BigEndianAscendingWordSerializer serializer =
+                new BigEndianAscendingWordSerializer(shortWordLength,
+                                                     8/*wordCount*/,
+                                                     0/*bytePadding, none*/);
+
+            for(int i=1; i<9; i++) {
+                serializer.writeWord(i);
+            }
+
+            // Values: 1-8
+            // Corresponding bits:
+            // ------------------
+            // 0000 0000 0000 0000 1
+            // 000 0000 0000 0000 10
+            // 00 0000 0000 0000 011
+            // 0 0000 0000 0000 0100
+
+            // 0000 0000 0000 0010 1
+            // 000 0000 0000 0001 10
+            // 00 0000 0000 0000 111
+            // 0 0000 0000 0000 1000
+
+            // And the hex:
+            // ------------
+            // 0000 0000 -> 0x00 -> 0
+            // 0000 0000 -> 0x00 -> 0
+            // 1000 0000 -> 0x80 -> -128
+            // 0000 0000 -> 0x00 -> 0
+            // 1000 0000 -> 0x80 -> -128
+            // 0000 0000 -> 0x00 -> 0
+            // 0110 0000 -> 0x60 -> 96
+            // 0000 0000 -> 0x00 -> 0
+            // 0100 0000 -> 0x40 -> 64
+            // 0000 0000 -> 0x00 -> 0
+            // 0010 1000 -> 0x28 -> 40
+            // 0000 0000 -> 0x00 -> 0
+            // 0001 1000 -> 0x18 -> 24
+            // 0000 0000 -> 0x00 -> 0
+            // 0000 1110 -> 0x0D -> 14
+            // 0000 0000 -> 0x00 -> 0
+            // 0000 1000 -> 0x08 -> 8
+
+            final byte[] bytes = serializer.getBytes();
+            final byte[] expectedBytes = new byte[] { 0, 0, -128, 0, -128, 0, 96, 0, 64, 0, 40, 0, 24, 0, 14, 0, 8 };
+            assertTrue(Arrays.equals(bytes, expectedBytes));
+        }
+        {// Should pad the array correctly.
+            final BigEndianAscendingWordSerializer serializer =
+                new BigEndianAscendingWordSerializer(shortWordLength,
+                                                     1/*wordCount*/,
+                                                     1/*bytePadding*/);
+
+            serializer.writeWord(1);
+            // 1 byte leading padding | value 1 | trailing padding
+            // 0000 0000 | 0000 0000 0000 0000 1|000 0000
+            // 0x00 0x00 0x00 0x80
+            final byte[] bytes = serializer.getBytes();
+            final byte[] expectedBytes = new byte[] { 0, 0, 0, -128 };
+            assertTrue(Arrays.equals(bytes, expectedBytes));
+        }
+    }
+}
--- a/solr/core/src/test/org/apache/solr/util/hll/BitVectorTest.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/BitVectorTest.java
@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.hll;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+/**
+ * Unit tests for {@link BitVector}.
+ */
+public class BitVectorTest extends LuceneTestCase {
+    /**
+     * Tests {@link BitVector#getRegister(long)} and {@link BitVector#setRegister(long, long)}.
+     */
+    @Test
+    public void getSetRegisterTest() {
+        { // locally scoped for sanity
+            // NOTE:  registers are only 5bits wide
+            final BitVector vector1 = new BitVector(5/*width*/, 128/*count, 2^7*/);
+            final BitVector vector2 = new BitVector(5/*width*/, 128/*count, 2^7*/);
+            final BitVector vector3 = new BitVector(5/*width*/, 128/*count, 2^7*/);
+            final BitVector vector4 = new BitVector(5/*width*/, 128/*count, 2^7*/);
+
+            for(int i=0; i<128/*2^7*/; i++) {
+                vector1.setRegister(i, 0x1F);
+                vector2.setRegister(i, (i & 0x1F));
+                vector3.setRegister(i, ((127 - i) & 0x1F));
+                vector4.setRegister(i, 0x15);
+            }
+
+            for(int i=0; i<128/*2^7*/; i++) {
+                assertEquals(vector1.getRegister(i), 0x1F);
+                assertEquals(vector2.getRegister(i), (i & 0x1F));
+                assertEquals(vector3.getRegister(i), ((127 - i) & 0x1F));
+                assertEquals(vector4.getRegister(i), 0x15);
+            }
+        }
+    }
+
+    // ========================================================================
+    /**
+     * Tests {@link BitVector#registerIterator()}
+     */
+    @Test
+    public void registerIteratorTest() {
+        { // scoped locally for sanity
+            // NOTE:  registers are only 5bits wide
+            final BitVector vector1 = new BitVector(5/*width*/, 128/*count, 2^7*/);
+            final BitVector vector2 = new BitVector(5/*width*/, 128/*count, 2^7*/);
+            final BitVector vector3 = new BitVector(5/*width*/, 128/*count, 2^7*/);
+            final BitVector vector4 = new BitVector(5/*width*/, 128/*count, 2^7*/);
+
+            for(int i=0; i<128/*2^7*/; i++) {
+                vector1.setRegister(i, 0x1F);
+                vector2.setRegister(i, (i & 0x1F));
+                vector3.setRegister(i, ((127 - i) & 0x1F));
+                vector4.setRegister(i, 0x15);
+            }
+
+            final LongIterator registerIterator1 = vector1.registerIterator();
+            final LongIterator registerIterator2 = vector2.registerIterator();
+            final LongIterator registerIterator3 = vector3.registerIterator();
+            final LongIterator registerIterator4 = vector4.registerIterator();
+            for(int i=0; i<128/*2^7*/; i++) {
+                assertEquals(registerIterator1.hasNext(), true);
+                assertEquals(registerIterator2.hasNext(), true);
+                assertEquals(registerIterator3.hasNext(), true);
+                assertEquals(registerIterator4.hasNext(), true);
+
+                assertEquals(registerIterator1.next(), 0x1F);
+                assertEquals(registerIterator2.next(), (i & 0x1F));
+                assertEquals(registerIterator3.next(), ((127 - i) & 0x1F));
+                assertEquals(registerIterator4.next(), 0x15);
+            }
+            assertEquals(registerIterator1.hasNext(), false/*no more*/);
+            assertEquals(registerIterator2.hasNext(), false/*no more*/);
+            assertEquals(registerIterator3.hasNext(), false/*no more*/);
+            assertEquals(registerIterator4.hasNext(), false/*no more*/);
+        }
+
+        { // scoped locally for sanity
+            // Vectors that are shorter than one word
+            assertIterator(1, 12/* 1*12=12 bits, fewer than a single word */);
+            assertIterator(2, 12/* 2*12=24 bits, fewer than a single word */);
+            assertIterator(3, 12/* 3*12=36 bits, fewer than a single word */);
+            assertIterator(4, 12/* 4*12=48 bits, fewer than a single word */);
+
+            // Vectors that don't fit exactly into longs
+            assertIterator(5, 16/* 5*16=80 bits */);
+            assertIterator(5, 32/* 5*32=160 bits */);
+        }
+
+        // Iterate over vectors that are padded
+    }
+
+    private static void assertIterator(final int width, final int count) {
+        final BitVector vector = new BitVector(width, count);
+        final LongIterator iter = vector.registerIterator();
+
+        for(int i=0; i<count; i++) {
+            assertTrue(String.format("expected more elements: width=%s, count=%s", width, count), iter.hasNext());
+            // TODO: fill with a sentinel value
+            assertEquals(iter.next(), 0);
+        }
+        assertFalse(String.format("expected no more elements: width=%s, count=%s", width, count), iter.hasNext());
+    }
+
+    // ========================================================================
+    /**
+     * Tests {@link BitVector#setMaxRegister(long, long)}
+     */
+    @Test
+    public void setMaxRegisterTest() {
+        final BitVector vector = new BitVector(5/*width*/, 128/*count, 2^7*/);
+
+        vector.setRegister(0, 10);
+        // should replace with a larger value
+        vector.setMaxRegister(0, 11);
+        assertEquals(vector.getRegister(0), 11);
+        // should not replace with a smaller or equal value
+        vector.setMaxRegister(0, 9);
+        assertEquals(vector.getRegister( 0), 11);
+        vector.setMaxRegister(0, 11);
+        assertEquals(vector.getRegister(0), 11);
+    }
+
+    // ========================================================================
+    // fill
+    /**
+     * Tests {@link BitVector#fill(long)}
+     */
+    @Test
+    public void fillTest() {
+        final BitVector vector = new BitVector(5/*width*/, 128/*count, 2^7*/);
+
+        for(int i=0; i<128/*2^7*/; i++) {
+            vector.setRegister(i, i);
+        }
+
+        vector.fill(0L);
+
+        for(int i=0; i<128/*2^7*/; i++) {
+            assertEquals(vector.getRegister(i), 0);
+        }
+
+        vector.fill(17L/*arbitrary*/);
+
+        for(int i=0; i<128/*2^7*/; i++) {
+            assertEquals(vector.getRegister(i), 17/*arbitrary*/);
+        }
+    }
+}
--- a/solr/core/src/test/org/apache/solr/util/hll/ExplicitHLLTest.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/ExplicitHLLTest.java
@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.hll;
+
+import java.util.HashSet;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+import com.carrotsearch.hppc.LongOpenHashSet;
+import static com.carrotsearch.randomizedtesting.RandomizedTest.*;
+
+
+/**
+ * Tests {@link HLL} of type {@link HLLType#EXPLICIT}.
+ */
+public class ExplicitHLLTest extends LuceneTestCase {
+    /**
+     * Tests basic set semantics of {@link HLL#addRaw(long)}.
+     */
+    @Test
+    public void addBasicTest() {
+        { // Adding a single positive value to an empty set should work.
+            final HLL hll = newHLL(128/*arbitrary*/);
+            hll.addRaw(1L/*positive*/);
+            assertEquals(hll.cardinality(), 1L);
+        }
+        { // Adding a single negative value to an empty set should work.
+            final HLL hll = newHLL(128/*arbitrary*/);
+            hll.addRaw(-1L/*negative*/);
+            assertEquals(hll.cardinality(), 1L);
+        }
+        { // Adding a duplicate value to a set should be a no-op.
+            final HLL hll = newHLL(128/*arbitrary*/);
+            hll.addRaw(1L/*positive*/);
+            assertEquals(hll.cardinality(), 1L/*arbitrary*/);
+            assertEquals(hll.cardinality(), 1L/*dupe*/);
+        }
+    }
+
+    // ------------------------------------------------------------------------
+    /**
+     * Tests {@link HLL#union(HLL)}.
+     */
+    @Test
+    public void unionTest() {
+        {// Unioning two distinct sets should work
+            final HLL hllA = newHLL(128/*arbitrary*/);
+            final HLL hllB = newHLL(128/*arbitrary*/);
+            hllA.addRaw(1L);
+            hllA.addRaw(2L);
+            hllB.addRaw(3L);
+
+            hllA.union(hllB);
+            assertEquals(hllA.cardinality(), 3);
+        }
+        {// Unioning two sets whose union doesn't exceed the cardinality cap should not promote
+            final HLL hllA = newHLL(128/*arbitrary*/);
+            final HLL hllB = newHLL(128/*arbitrary*/);
+            hllA.addRaw(1L);
+            hllA.addRaw(2L);
+            hllB.addRaw(1L);
+
+            hllA.union(hllB);
+            assertEquals(hllA.cardinality(), 2);
+        }
+        {// unioning two sets whose union exceeds the cardinality cap should promote
+            final HLL hllA = newHLL(128/*arbitrary*/);
+            final HLL hllB = newHLL(128/*arbitrary*/);
+
+            // fill up sets to explicitThreshold
+            for(long i=0; i<128/*explicitThreshold*/; i++) {
+                hllA.addRaw(i);
+                hllB.addRaw(i + 128);
+            }
+
+            hllA.union(hllB);
+            assertEquals(hllA.getType(), HLLType.SPARSE);
+        }
+    }
+
+    // ------------------------------------------------------------------------
+    /**
+     * Tests {@link HLL#clear()}
+     */
+    @Test
+    public void clearTest() {
+        final HLL hll = newHLL(128/*arbitrary*/);
+        hll.addRaw(1L);
+        assertEquals(hll.cardinality(), 1L);
+        hll.clear();
+        assertEquals(hll.cardinality(), 0L);
+    }
+
+    // ------------------------------------------------------------------------
+    /**
+     */
+    @Test
+    public void toFromBytesTest() {
+        final ISchemaVersion schemaVersion = SerializationUtil.DEFAULT_SCHEMA_VERSION;
+        final HLLType type = HLLType.EXPLICIT;
+        final int padding = schemaVersion.paddingBytes(type);
+        final int bytesPerWord = 8;
+
+        {// Should work on an empty set
+            final HLL hll = newHLL(128/*arbitrary*/);
+
+            final byte[] bytes = hll.toBytes(schemaVersion);
+
+            // assert output has correct byte length
+            assertEquals(bytes.length, padding/*no elements, just padding*/);
+
+            final HLL inHLL = HLL.fromBytes(bytes);
+
+            assertElementsEqual(hll, inHLL);
+        }
+        {// Should work on a partially filled set
+            final HLL hll = newHLL(128/*arbitrary*/);
+
+            for(int i=0; i<3; i++) {
+                hll.addRaw(i);
+            }
+
+            final byte[] bytes = hll.toBytes(schemaVersion);
+
+            // assert output has correct byte length
+            assertEquals(bytes.length, padding + (bytesPerWord * 3/*elements*/));
+
+            final HLL inHLL = HLL.fromBytes(bytes);
+
+            assertElementsEqual(hll, inHLL);
+        }
+        {// Should work on a full set
+            final int explicitThreshold = 128;
+            final HLL hll = newHLL(explicitThreshold);
+
+            for(int i=0; i<explicitThreshold; i++) {
+                hll.addRaw(27 + i/*arbitrary*/);
+            }
+
+            final byte[] bytes = hll.toBytes(schemaVersion);
+
+            // assert output has correct byte length
+            assertEquals(bytes.length, padding + (bytesPerWord * explicitThreshold/*elements*/));
+
+            final HLL inHLL = HLL.fromBytes(bytes);
+
+            assertElementsEqual(hll, inHLL);
+        }
+    }
+
+    // ------------------------------------------------------------------------
+    /**
+     * Tests correctness against {@link java.util.HashSet}.
+     */
+    @Test
+    public void randomValuesTest() {
+        final int explicitThreshold = 4096;
+        final HashSet<Long> canonical = new HashSet<Long>();
+        final HLL hll = newHLL(explicitThreshold);
+
+        for(int i=0;i<explicitThreshold;i++){
+            long randomLong = randomLong();
+            canonical.add(randomLong);
+            hll.addRaw(randomLong);
+        }
+        final int canonicalCardinality = canonical.size();
+        assertEquals(hll.cardinality(), canonicalCardinality);
+    }
+
+    // ------------------------------------------------------------------------
+    /**
+     * Tests promotion to {@link HLLType#SPARSE} and {@link HLLType#FULL}.
+     */
+    @Test
+    public void promotionTest() {
+        { // locally scoped for sanity
+            final int explicitThreshold = 128;
+            final HLL hll = new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, explicitThreshold, 256/*sparseThreshold*/, HLLType.EXPLICIT);
+
+            for(int i=0;i<explicitThreshold + 1;i++){
+                hll.addRaw(i);
+            }
+            assertEquals(hll.getType(), HLLType.SPARSE);
+        }
+        { // locally scoped for sanity
+            final HLL hll = new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, 4/*expthresh => explicitThreshold = 8*/, false/*sparseon*/, HLLType.EXPLICIT);
+
+            for(int i=0;i<9/* > explicitThreshold */;i++){
+                hll.addRaw(i);
+            }
+            assertEquals(hll.getType(), HLLType.FULL);
+        }
+    }
+
+    // ************************************************************************
+    // assertion helpers
+    /**
+     * Asserts that values in both sets are exactly equal.
+     */
+    private static void assertElementsEqual(final HLL hllA, final HLL hllB) {
+        final LongOpenHashSet internalSetA = hllA.explicitStorage;
+        final LongOpenHashSet internalSetB = hllB.explicitStorage;
+
+        assertTrue(internalSetA.equals(internalSetB));
+    }
+
+    /**
+     * Builds a {@link HLLType#EXPLICIT} {@link HLL} instance with the specified
+     * explicit threshold.
+     *
+     * @param  explicitThreshold explicit threshold to use for the constructed
+     *         {@link HLL}. This must be greater than zero.
+     * @return a default-sized {@link HLLType#EXPLICIT} empty {@link HLL} instance.
+     *         This will never be <code>null</code>.
+     */
+    private static HLL newHLL(final int explicitThreshold) {
+        return new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, explicitThreshold, 256/*sparseThreshold, arbitrary, unused*/, HLLType.EXPLICIT);
+    }
+}
--- a/solr/core/src/test/org/apache/solr/util/hll/FullHLLTest.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/FullHLLTest.java
@ -0,0 +1,341 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.hll;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+/**
+ * Tests {@link HLL} of type {@link HLLType#FULL}.
+ */
+public class FullHLLTest extends LuceneTestCase {
+    // TODO union test
+    /**
+     * Smoke test for {@link HLL#cardinality()} and the proper use of the
+     * small range correction.
+     */
+    @Test
+    public void smallRangeSmokeTest() {
+        final int log2m = 11;
+        final int m = (1 << log2m);
+        final int regwidth = 5;
+
+        // only one register set
+        {
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary, unused*/, HLLType.FULL);
+            hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, 0/*ix*/, 1/*val*/));
+
+            final long cardinality = hll.cardinality();
+
+            // Trivially true that small correction conditions hold: one register
+            // set implies zeroes exist, and estimator trivially smaller than 5m/2.
+            // Small range correction: m * log(m/V)
+            final long expected = (long)Math.ceil(m * Math.log((double)m / (m - 1)/*# of zeroes*/));
+            assertEquals(cardinality, expected);
+        }
+
+        // all but one register set
+        {
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary, unused*/, HLLType.FULL);
+            for(int i=0; i<(m - 1); i++) {
+                hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i/*ix*/, 1/*val*/));
+            }
+
+            // Trivially true that small correction conditions hold: all but
+            // one register set implies a zero exists, and estimator trivially
+            // smaller than 5m/2 since it's alpha / ((m-1)/2)
+            final long cardinality = hll.cardinality();
+
+            // Small range correction: m * log(m/V)
+            final long expected = (long)Math.ceil(m * Math.log((double)m / 1/*# of zeroes*/));
+            assertEquals(cardinality, expected);
+        }
+    }
+
+    /**
+     * Smoke test for {@link HLL#cardinality()} and the proper use of the
+     * uncorrected estimator
+     */
+    @Test
+    public void normalRangeSmokeTest() {
+        final int log2m = 11;
+        final int regwidth = 5;
+        // regwidth = 5, so hash space is
+        // log2m + (2^5 - 1 - 1), so L = log2m + 30
+        final int l = log2m + 30;
+        final int m = (1 << log2m);
+        final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary, unused*/, HLLType.FULL);
+
+        // all registers at 'medium' value
+        {
+            final int registerValue = 7/*chosen to ensure neither correction kicks in*/;
+            for(int i=0; i<m; i++) {
+                hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i, registerValue));
+            }
+
+            final long cardinality = hll.cardinality();
+
+
+            // Simplified estimator when all registers take same value: alpha / (m/2^val)
+            final double estimator = HLLUtil.alphaMSquared(m)/((double)m/Math.pow(2, registerValue));
+
+            // Assert conditions for uncorrected range
+            assertTrue(estimator <= Math.pow(2, l)/30);
+            assertTrue(estimator > (5 * m /(double)2));
+
+            final long expected = (long)Math.ceil(estimator);
+            assertEquals(cardinality, expected);
+        }
+    }
+
+    /**
+     * Smoke test for {@link HLL#cardinality()} and the proper use of the large
+     * range correction.
+     */
+    @Test
+    public void largeRangeSmokeTest() {
+        final int log2m = 12;
+        final int regwidth = 5;
+        // regwidth = 5, so hash space is
+        // log2m + (2^5 - 1 - 1), so L = log2m + 30
+        final int l = log2m + 30;
+        final int m = (1 << log2m);
+        final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary, unused*/, HLLType.FULL);
+
+        {
+            final int registerValue = 31/*chosen to ensure large correction kicks in*/;
+            for(int i=0; i<m; i++) {
+                hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i, registerValue));
+            }
+
+            final long cardinality = hll.cardinality();
+
+
+            // Simplified estimator when all registers take same value: alpha / (m/2^val)
+            final double estimator = HLLUtil.alphaMSquared(m)/((double)m/Math.pow(2, registerValue));
+
+            // Assert conditions for large range
+
+            assertTrue(estimator > Math.pow(2,l)/30);
+
+            // Large range correction: -2^L * log(1 - E/2^L)
+            final long expected = (long)Math.ceil(-1.0 * Math.pow(2, l) * Math.log(1.0 - estimator/Math.pow(2, l)));
+            assertEquals(cardinality, expected);
+        }
+    }
+
+    // ========================================================================
+    /**
+     * Tests the bounds on a register's value for a given raw input value.
+     */
+    @Test
+    public void registerValueTest() {
+        final int log2m = 4/*small enough to make testing easy (addRaw() shifts by one byte)*/;
+
+        // register width 4 (the minimum size)
+        { // scoped locally for sanity
+            final int regwidth = 4;
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary, unused*/, HLLType.FULL);
+            final BitVector bitVector = hll.probabilisticStorage;
+
+            // lower-bounds of the register
+            hll.addRaw(0x000000000000001L/*'j'=1*/);
+            assertEquals(bitVector.getRegister(1/*'j'*/), 0);
+
+            hll.addRaw(0x0000000000000012L/*'j'=2*/);
+            assertEquals(bitVector.getRegister(2/*'j'*/), 1);
+
+            hll.addRaw(0x0000000000000023L/*'j'=3*/);
+            assertEquals(bitVector.getRegister(3/*'j'*/), 2);
+
+            hll.addRaw(0x0000000000000044L/*'j'=4*/);
+            assertEquals(bitVector.getRegister(4/*'j'*/), 3);
+
+            hll.addRaw(0x0000000000000085L/*'j'=5*/);
+            assertEquals(bitVector.getRegister(5/*'j'*/), 4);
+
+            // upper-bounds of the register
+            // NOTE:  bear in mind that BitVector itself does ensure that
+            //        overflow of a register is prevented
+            hll.addRaw(0x0000000000010006L/*'j'=6*/);
+            assertEquals(bitVector.getRegister(6/*'j'*/), 13);
+
+            hll.addRaw(0x0000000000020007L/*'j'=7*/);
+            assertEquals(bitVector.getRegister(7/*'j'*/), 14);
+
+            hll.addRaw(0x0000000000040008L/*'j'=8*/);
+            assertEquals(bitVector.getRegister(8/*'j'*/), 15);
+
+            hll.addRaw(0x0000000000080009L/*'j'=9*/);
+            assertEquals(bitVector.getRegister(9/*'j'*/), 15/*overflow*/);
+
+            // sanity checks to ensure that no other bits above the lowest-set
+            // bit matters
+            // NOTE:  same as case 'j = 6' above
+            hll.addRaw(0x000000000003000AL/*'j'=10*/);
+            assertEquals(bitVector.getRegister(10/*'j'*/), 13);
+
+            hll.addRaw(0x000000000011000BL/*'j'=11*/);
+            assertEquals(bitVector.getRegister(11/*'j'*/), 13);
+        }
+
+        // register width 5
+        { // scoped locally for sanity
+            final int regwidth = 5;
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary, unused*/, HLLType.FULL);
+            final BitVector bitVector = hll.probabilisticStorage;
+
+            // lower-bounds of the register
+            hll.addRaw(0x0000000000000001L/*'j'=1*/);
+            assertEquals(bitVector.getRegister(1/*'j'*/), 0);
+
+            hll.addRaw(0x0000000000000012L/*'j'=2*/);
+            assertEquals(bitVector.getRegister(2/*'j'*/), 1);
+
+            hll.addRaw(0x0000000000000023L/*'j'=3*/);
+            assertEquals(bitVector.getRegister(3/*'j'*/), 2);
+
+            hll.addRaw(0x0000000000000044L/*'j'=4*/);
+            assertEquals(bitVector.getRegister(4/*'j'*/), 3);
+
+            hll.addRaw(0x0000000000000085L/*'j'=5*/);
+            assertEquals(bitVector.getRegister(5/*'j'*/), 4);
+
+            // upper-bounds of the register
+            // NOTE:  bear in mind that BitVector itself does ensure that
+            //        overflow of a register is prevented
+            hll.addRaw(0x0000000100000006L/*'j'=6*/);
+            assertEquals(bitVector.getRegister(6/*'j'*/), 29);
+
+            hll.addRaw(0x0000000200000007L/*'j'=7*/);
+            assertEquals(bitVector.getRegister(7/*'j'*/), 30);
+
+            hll.addRaw(0x0000000400000008L/*'j'=8*/);
+            assertEquals(bitVector.getRegister(8/*'j'*/), 31);
+
+            hll.addRaw(0x0000000800000009L/*'j'=9*/);
+            assertEquals(bitVector.getRegister(9/*'j'*/), 31/*overflow*/);
+        }
+    }
+
+    // ========================================================================
+    /**
+     * Tests {@link HLL#clear()}.
+     */
+    @Test
+    public void clearTest() {
+        final int regwidth = 5;
+        final int log2m = 4/*16 registers per counter*/;
+        final int m = 1 << log2m;
+
+        final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary, unused*/, HLLType.FULL);
+        final BitVector bitVector = hll.probabilisticStorage;
+        for(int i=0; i<m; i++)
+            bitVector.setRegister(i, i);
+
+        hll.clear();
+        for(int i=0; i<m; i++){
+            assertEquals(bitVector.getRegister(i), 0L/*default value of register*/);
+        }
+    }
+
+    // ========================================================================
+    // Serialization
+    /**
+     * Tests {@link HLL#toBytes(ISchemaVersion)} and {@link HLL#fromBytes(byte[])}.
+     */
+    @Test
+    public void toFromBytesTest() {
+        final int log2m = 11/*arbitrary*/;
+        final int regwidth = 5;
+
+        final ISchemaVersion schemaVersion = SerializationUtil.DEFAULT_SCHEMA_VERSION;
+        final HLLType type = HLLType.FULL;
+        final int padding = schemaVersion.paddingBytes(type);
+        final int dataByteCount = ProbabilisticTestUtil.getRequiredBytes(regwidth, (1 << log2m)/*aka 2^log2m = m*/);
+        final int expectedByteCount = padding + dataByteCount;
+
+        {// Should work on an empty element
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary, unused*/, HLLType.FULL);
+            final byte[] bytes = hll.toBytes(schemaVersion);
+
+            // assert output length is correct
+            assertEquals(bytes.length, expectedByteCount);
+
+            final HLL inHLL = HLL.fromBytes(bytes);
+
+            // assert register values correct
+            assertElementsEqual(hll, inHLL);
+        }
+        {// Should work on a partially filled element
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary, unused*/, HLLType.FULL);
+
+            for(int i=0; i<3; i++) {
+                final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, i, (i+9));
+                hll.addRaw(rawValue);
+            }
+
+            final byte[] bytes = hll.toBytes(schemaVersion);
+
+            // assert output length is correct
+            assertEquals(bytes.length, expectedByteCount);
+
+            final HLL inHLL = HLL.fromBytes(bytes);
+
+            // assert register values correct
+            assertElementsEqual(hll, inHLL);
+        }
+        {// Should work on a full set
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary, unused*/, HLLType.FULL);
+
+            for(int i=0; i<(1 << log2m)/*aka 2^log2m*/; i++) {
+                final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, i, (i % 9) + 1);
+                hll.addRaw(rawValue);
+            }
+
+            final byte[] bytes = hll.toBytes(schemaVersion);
+
+            // assert output length is correct
+            assertEquals(bytes.length, expectedByteCount);
+
+            final HLL inHLL = HLL.fromBytes(bytes);
+
+            // assert register values correct
+            assertElementsEqual(hll, inHLL);
+        }
+    }
+
+    // ************************************************************************
+    // Assertion Helpers
+    /**
+     * Asserts that the two HLLs are register-wise equal.
+     */
+    private static void assertElementsEqual(final HLL hllA, final HLL hllB) {
+        final BitVector bitVectorA = hllA.probabilisticStorage;
+        final BitVector bitVectorB = hllA.probabilisticStorage;
+
+        final LongIterator iterA = bitVectorA.registerIterator();
+        final LongIterator iterB = bitVectorB.registerIterator();
+
+        for(;iterA.hasNext() && iterB.hasNext();) {
+            assertEquals(iterA.next(), iterB.next());
+        }
+        assertFalse(iterA.hasNext());
+        assertFalse(iterB.hasNext());
+    }
+}
--- a/solr/core/src/test/org/apache/solr/util/hll/HLLSerializationTest.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/HLLSerializationTest.java
@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.hll;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+import static com.carrotsearch.randomizedtesting.RandomizedTest.*;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Random;
+
+import static org.apache.solr.util.hll.HLL.*;
+
+/**
+ * Serialization smoke-tests.
+ */
+public class HLLSerializationTest extends LuceneTestCase {
+    /**
+     * A smoke-test that covers serialization/deserialization of an HLL
+     * under all possible parameters.
+     */
+    @Test
+    @Slow
+    @Nightly
+    public void serializationSmokeTest() throws Exception {
+        final Random random = new Random(randomLong());
+        final int randomCount = 250;
+        final List<Long> randoms = new ArrayList<Long>(randomCount);
+        for (int i=0; i<randomCount; i++) {
+          randoms.add(random.nextLong());
+      }
+
+        assertCardinality(HLLType.EMPTY, randoms);
+        assertCardinality(HLLType.EXPLICIT, randoms);
+        assertCardinality(HLLType.SPARSE, randoms);
+        assertCardinality(HLLType.FULL, randoms);
+    }
+
+    // NOTE: log2m<=16 was chosen as the max log2m parameter so that the test
+    //       completes in a reasonable amount of time. Not much is gained by
+    //       testing larger values - there are no more known serialization
+    //       related edge cases that appear as log2m gets even larger.
+    // NOTE: This test completed successfully with log2m<=MAXIMUM_LOG2M_PARAM
+    //       on 2014-01-30.
+    private static void assertCardinality(final HLLType hllType, final Collection<Long> items)
+           throws CloneNotSupportedException {
+        for(int log2m=MINIMUM_LOG2M_PARAM; log2m<=16; log2m++) {
+            for(int regw=MINIMUM_REGWIDTH_PARAM; regw<=MAXIMUM_REGWIDTH_PARAM; regw++) {
+                for(int expthr=MINIMUM_EXPTHRESH_PARAM; expthr<=MAXIMUM_EXPTHRESH_PARAM; expthr++ ) {
+                    for(final boolean sparse: new boolean[]{true, false}) {
+                        HLL hll = new HLL(log2m, regw, expthr, sparse, hllType);
+                        for(final Long item: items) {
+                            hll.addRaw(item);
+                        }
+                        HLL copy = HLL.fromBytes(hll.toBytes());
+                        assertEquals(copy.cardinality(), hll.cardinality());
+                        assertEquals(copy.getType(), hll.getType());
+                        assertTrue(Arrays.equals(copy.toBytes(), hll.toBytes()));
+
+                        HLL clone = hll.clone();
+                        assertEquals(clone.cardinality(), hll.cardinality());
+                        assertEquals(clone.getType(), hll.getType());
+                        assertTrue(Arrays.equals(clone.toBytes(), hll.toBytes()));
+                    }
+                }
+            }
+        }
+    }
+}
--- a/solr/core/src/test/org/apache/solr/util/hll/HLLUtilTest.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/HLLUtilTest.java
@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.hll;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+/**
+ * Tests {@link HLLUtil} static methods.
+ *
+ * @author tkarnezo
+ */
+public class HLLUtilTest extends LuceneTestCase {
+    /**
+     * Tests that {@link HLLUtil#largeEstimatorCutoff(int, int)} is the same
+     * as a trivial implementation.
+     */
+    @Test
+    public void largeEstimatorCutoffTest() {
+        for(int log2m=HLL.MINIMUM_LOG2M_PARAM; log2m<=HLL.MAXIMUM_LOG2M_PARAM; log2m++) {
+            for(int regWidth=HLL.MINIMUM_REGWIDTH_PARAM; regWidth<=HLL.MINIMUM_REGWIDTH_PARAM; regWidth++) {
+                final double cutoff = HLLUtil.largeEstimatorCutoff(log2m, regWidth);
+
+                // See blog post (http://research.neustar.biz/2013/01/24/hyperloglog-googles-take-on-engineering-hll/)
+                // and original paper (Fig. 3) for information on 2^L and
+                // "large range correction" cutoff.
+                final double expected = Math.pow(2, Math.pow(2, regWidth) - 2 + log2m) / 30.0;
+                assertEquals(cutoff, expected, 0.0001);
+            }
+        }
+    }
+}
--- a/solr/core/src/test/org/apache/solr/util/hll/IntegrationTestGenerator.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/IntegrationTestGenerator.java
@ -0,0 +1,708 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.hll;
+
+import static com.carrotsearch.randomizedtesting.RandomizedTest.*;
+import static org.apache.solr.util.hll.ProbabilisticTestUtil.*;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Random;
+
+/**
+ * Generates test files for testing other implementations of HLL
+ * serialization/deserialization, namely the PostgreSQL implementation.
+ */
+public class IntegrationTestGenerator {
+    // ************************************************************************
+    // directory to output the generated tests
+    private static final String OUTPUT_DIRECTORY = "/tmp/hll_test/";
+
+    // ------------------------------------------------------------------------
+    // configurations for HLLs, should mirror settings in PostgreSQL impl. tests
+    private static final int REGWIDTH = 5;
+    private static final int LOG2M = 11;
+    // NOTE:  This differs from the PostgreSQL impl. parameter 'expthresh'. This
+    //        is a literal threshold to use in the promotion hierarchy, implying
+    //        that both EXPLICIT representation should be used and it should
+    //        NOT be automatically computed. This is done to ensure that the
+    //        parameters of the test are very explicitly defined.
+    private static final int EXPLICIT_THRESHOLD = 256;
+    // NOTE:  This is not the PostgreSQL impl. parameter 'sparseon'. 'sparseon'
+    //        is assumed to be true and this is a literal register-count threshold
+    //        to use in the promotion hierarchy. This is done to ensure that the
+    //        parameters of the test are very explicitly defined.
+    private static final int SPARSE_THRESHOLD = 850;
+
+    // ------------------------------------------------------------------------
+    // computed constants
+    private static final int REGISTER_COUNT = (1 << LOG2M);
+    private static final int REGISTER_MAX_VALUE = (1 << REGWIDTH) - 1;
+
+    // ========================================================================
+    // Tests
+    /**
+     * Cumulatively adds random values to a FULL HLL through the small range
+     * correction, uncorrected range, and large range correction of the HLL's
+     * cardinality estimator.
+     *
+     * Format: cumulative add
+     * Tests:
+     * - FULL cardinality computation
+     */
+    private static void fullCardinalityCorrectionTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "cardinality_correction", TestType.ADD);
+
+        // the accumulator, starts empty
+        final HLL hll = newHLL(HLLType.FULL);
+        initLineAdd(output, hll, schemaVersion);
+
+        // run through some values in the small range correction
+        for(int i=0; i<((1 << LOG2M) - 1); i++) {
+            final long rawValue = constructHLLValue(LOG2M, i, 1);
+            cumulativeAddLine(output, hll, rawValue, schemaVersion);
+        }
+
+        // run up past some values in the uncorrected range
+        for(int i=0; i<(1 << LOG2M); i++) {
+            final long rawValue = constructHLLValue(LOG2M, i, 7);
+            cumulativeAddLine(output, hll, rawValue, schemaVersion);
+        }
+
+        // run through some values in the large range correction
+        for(int i=0; i<(1 << LOG2M); i++) {
+            final long rawValue = constructHLLValue(LOG2M, i, 30);
+            cumulativeAddLine(output, hll, rawValue, schemaVersion);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    /**
+     * Cumulatively adds random values to an EMPTY HLL.
+     *
+     * Format: cumulative add
+     * Tests:
+     * - EMPTY, EXPLICIT, SPARSE, PROBABILSTIC addition
+     * - EMPTY to EXPLICIT promotion
+     * - EXPLICIT to SPARSE promotion
+     * - SPARSE to FULL promotion
+     */
+    private static void globalStepTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "comprehensive_promotion", TestType.ADD);
+
+        // the accumulator, starts empty
+        final HLL hll = newHLL(HLLType.EMPTY);
+        initLineAdd(output, hll, schemaVersion);
+
+        for(int i=0; i<10000/*arbitrary*/; i++) {
+            cumulativeAddLine(output, hll, randomLong(), schemaVersion);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    /**
+     * Cumulatively unions "underpopulated" FULL HLLs into the
+     * accumulator to verify the correct behavior from the PostgreSQL implementation.
+     * The PostgreSQL implementation's representations of probabilistic HLLs should
+     * depend exclusively on the chosen SPARSE-to-FULL cutoff.
+     *
+     * Format: cumulative union
+     * Tests:
+     * - EMPTY U "underpopulated" FULL => SPARSE
+     * - SPARSE U "underpopulated" FULL => SPARSE
+     * - SPARSE U "barely underpopulated" FULL => FULL
+     */
+    private static void sparseFullRepresentationTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "sparse_full_representation", TestType.UNION);
+
+        final HLL emptyHLL1 = newHLL(HLLType.EMPTY);
+        final HLL emptyHLL2 = newHLL(HLLType.EMPTY);
+
+        cumulativeUnionLine(output, emptyHLL1, emptyHLL2, schemaVersion);
+
+        // NOTE:  In this test the sparseReference will be the "expected" value
+        //        from the C representation, since it doesn't choose representation
+        //        based on original encoding, but rather on the promotion rules
+        //        and the declared type of the "receiving" field.
+        //        It is the manually-constructed union result.
+
+        // "underpopulated" FULL U EMPTY => SPARSE
+        final HLL fullHLL = newHLL(HLLType.FULL);
+        fullHLL.addRaw(constructHLLValue(LOG2M, 0/*ix*/, 1/*val*/));
+
+        final HLL sparseHLL = newHLL(HLLType.SPARSE);
+        sparseHLL.addRaw(constructHLLValue(LOG2M, 0/*ix*/, 1/*val*/));
+
+        output.write(stringCardinality(fullHLL) + "," + toByteA(fullHLL, schemaVersion) + "," + stringCardinality(sparseHLL) + "," + toByteA(sparseHLL, schemaVersion) + "\n");
+        output.flush();
+
+        // "underpopulated" FULL (small) U SPARSE (small) => SPARSE
+        final HLL fullHLL2 = newHLL(HLLType.FULL);
+        fullHLL2.addRaw(constructHLLValue(LOG2M, 1/*ix*/, 1/*val*/));
+
+        sparseHLL.addRaw(constructHLLValue(LOG2M, 1/*ix*/, 1/*val*/));
+
+        output.write(stringCardinality(fullHLL2) + "," + toByteA(fullHLL2, schemaVersion) + "," + stringCardinality(sparseHLL) + "," + toByteA(sparseHLL, schemaVersion) + "\n");
+        output.flush();
+
+        // "underpopulated" FULL (just on edge) U SPARSE (small) => FULL
+        final HLL fullHLL3 = newHLL(HLLType.FULL);
+        for(int i=2; i<(SPARSE_THRESHOLD + 1); i++) {
+            fullHLL3.addRaw(constructHLLValue(LOG2M, i/*ix*/, 1/*val*/));
+            sparseHLL.addRaw(constructHLLValue(LOG2M, i/*ix*/, 1/*val*/));
+        }
+
+        output.write(stringCardinality(fullHLL3) + "," + toByteA(fullHLL3, schemaVersion) + "," + stringCardinality(sparseHLL) + "," + toByteA(sparseHLL, schemaVersion) + "\n");
+        output.flush();
+    }
+
+    /**
+     * Cumulatively sets successive registers to:
+     *
+     *     <code>(registerIndex % REGISTER_MAX_VALUE) + 1</code>
+     *
+     * by adding specifically constructed values to a SPARSE HLL.
+     * Does not induce promotion.
+     *
+     * Format: cumulative add
+     * Tests:
+     * - SPARSE addition (predictable)
+     */
+    private static void sparseStepTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "sparse_step", TestType.ADD);
+
+        // the accumulator, starts empty sparse probabilistic
+        final HLL hll = newHLL(HLLType.SPARSE);
+        initLineAdd(output, hll, schemaVersion);
+
+        for(int i=0; i<SPARSE_THRESHOLD; i++) {
+            final long rawValue = constructHLLValue(LOG2M, i, ((i % REGISTER_MAX_VALUE) + 1));
+            cumulativeAddLine(output, hll, rawValue, schemaVersion);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    /**
+     * Cumulatively sets random registers of a SPARSE HLL to
+     * random values by adding random values. Does not induce promotion.
+     *
+     * Format: cumulative add
+     * Tests:
+     * - SPARSE addition (random)
+     */
+    private static void sparseRandomTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "sparse_random", TestType.ADD);
+
+        final Random random = new Random(randomLong());
+
+        // the accumulator, starts empty
+        final HLL hll = newHLL(HLLType.SPARSE);
+        initLineAdd(output, hll, schemaVersion);
+
+        for(int i=0; i<SPARSE_THRESHOLD; i++) {
+            final int registerIndex = Math.abs(random.nextInt()) % REGISTER_COUNT;
+            final int registerValue = ((Math.abs(random.nextInt()) % REGISTER_MAX_VALUE) + 1);
+            final long rawValue = constructHLLValue(LOG2M, registerIndex, registerValue);
+
+            cumulativeAddLine(output, hll, rawValue, schemaVersion);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    /**
+     * Cumulatively sets the first register (index 0) to value 2, the last
+     * register (index m-1) to value 2, and then sets registers with indices in
+     * the range 2 to (sparseCutoff + 2) to value 1 to trigger promotion.
+     *
+     * This tests for register alignment in the promotion from SPARSE
+     * to FULL.
+     *
+     * Format: cumulative add
+     * Tests:
+     * - SPARSE addition
+     * - SPARSE to FULL promotion
+     */
+    private static void sparseEdgeTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "sparse_edge", TestType.ADD);
+
+        // the accumulator, starts empty
+        final HLL hll = newHLL(HLLType.SPARSE);
+        initLineAdd(output, hll, schemaVersion);
+
+        final long firstValue = constructHLLValue(LOG2M, 0, 2);
+        cumulativeAddLine(output, hll, firstValue, schemaVersion);
+
+        final long lastValue = constructHLLValue(LOG2M, (1 << LOG2M) - 1, 2);
+        cumulativeAddLine(output, hll, lastValue, schemaVersion);
+
+        for(int i=2; i<(SPARSE_THRESHOLD + 2); i++) {
+            final long middleValue = constructHLLValue(LOG2M, i, 1);
+
+            cumulativeAddLine(output, hll, middleValue, schemaVersion);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    /**
+     * Unions an EMPTY accumulator with EXPLICIT HLLs, each containing a
+     * single random value.
+     *
+     * Format: cumulative union
+     * Tests:
+     * - EMPTY U EXPLICIT
+     * - EXPLICIT U EXPLICIT
+     * - EXPLICIT to SPARSE promotion
+     * - SPARSE U EXPLICIT
+     */
+    private static void explicitPromotionTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "explicit_promotion", TestType.UNION);
+
+        final Random random = new Random(randomLong());
+
+        // the accumulator, starts empty
+        final HLL hll = newHLL(HLLType.EMPTY);
+        final HLL emptyHLL = newHLL(HLLType.EMPTY);
+        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);
+
+        for(int i=0; i<(EXPLICIT_THRESHOLD+500)/*should be greater than promotion cutoff*/; i++) {
+            // make an EXPLICIT set and populate with cardinality 1
+            final HLL explicitHLL = newHLL(HLLType.EXPLICIT);
+            explicitHLL.addRaw(random.nextLong());
+
+            cumulativeUnionLine(output, hll, explicitHLL, schemaVersion);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    /**
+     * Unions an EMPTY accumulator with SPARSE HLLs, each
+     * having one register set.
+     *
+     * Format: cumulative union
+     * Tests:
+     * - EMPTY U SPARSE
+     * - SPARSE U SPARSE
+     * - SPARSE promotion
+     * - SPARSE U FULL
+     */
+    private static void sparseProbabilisticPromotionTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "sparse_promotion", TestType.UNION);
+
+        final Random random = new Random(randomLong());
+
+        // the accumulator, starts empty
+        final HLL hll = newHLL(HLLType.EMPTY);
+        final HLL emptyHLL = newHLL(HLLType.EMPTY);
+        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);
+
+
+        for(int i=0; i<(SPARSE_THRESHOLD + 1000)/*should be greater than promotion cutoff*/; i++) {
+            // make a SPARSE set and populate with cardinality 1
+            final HLL sparseHLL = newHLL(HLLType.SPARSE);
+
+            final int registerIndex = Math.abs(random.nextInt()) % REGISTER_COUNT;
+            final int registerValue = ((Math.abs(random.nextInt()) % REGISTER_MAX_VALUE) + 1);
+            final long rawValue = constructHLLValue(LOG2M, registerIndex, registerValue);
+            sparseHLL.addRaw(rawValue);
+
+            cumulativeUnionLine(output, hll, sparseHLL, schemaVersion);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    /**
+     * Unions an EMPTY accumulator with EXPLICIT HLLs, each having a single
+     * random value, twice in a row to verify that the set properties are
+     * satisfied.
+     *
+     * Format: cumulative union
+     * Tests:
+     * - EMPTY U EXPLICIT
+     * - EXPLICIT U EXPLICIT
+     */
+    private static void explicitOverlapTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "explicit_explicit", TestType.UNION);
+
+        final Random random = new Random(randomLong());
+
+        // the accumulator, starts empty
+        final HLL hll = newHLL(HLLType.EMPTY);
+        final HLL emptyHLL = newHLL(HLLType.EMPTY);
+
+        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);
+
+        for(int i=0; i<EXPLICIT_THRESHOLD; i++) {
+            // make an EXPLICIT set and populate with cardinality 1
+            final HLL explicitHLL = newHLL(HLLType.EXPLICIT);
+            explicitHLL.addRaw(random.nextLong());
+
+            // union it into the accumulator twice, to test overlap (cardinality should not change)
+            cumulativeUnionLine(output, hll, explicitHLL, schemaVersion);
+            cumulativeUnionLine(output, hll, explicitHLL, schemaVersion);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    /**
+     * Unions an EMPTY accumulator with SPARSE HLLs, each
+     * having a single register set, twice in a row to verify that the set
+     * properties are satisfied.
+     *
+     * Format: cumulative union
+     * Tests:
+     * - EMPTY U SPARSE
+     * - SPARSE U SPARSE
+     */
+    private static void sparseProbabilisticOverlapTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "sparse_sparse", TestType.UNION);
+
+        final Random random = new Random(randomLong());
+
+        // the accumulator, starts empty
+        final HLL hll = newHLL(HLLType.EMPTY);
+        final HLL emptyHLL = newHLL(HLLType.EMPTY);
+
+        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);
+
+        for(int i=0; i<SPARSE_THRESHOLD; i++) {
+            // make a SPARSE set and populate with cardinality 1
+            final HLL sparseHLL = newHLL(HLLType.SPARSE);
+            final int registerIndex = Math.abs(random.nextInt()) % REGISTER_COUNT;
+            final int registerValue = ((Math.abs(random.nextInt()) % REGISTER_MAX_VALUE) + 1);
+            final long rawValue = constructHLLValue(LOG2M, registerIndex, registerValue);
+            sparseHLL.addRaw(rawValue);
+
+            cumulativeUnionLine(output, hll, sparseHLL, schemaVersion);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    /**
+     * Unions an EMPTY accumulator with FULL HLLs, each having
+     * many registers set, twice in a row to verify that the set properties are
+     * satisfied.
+     *
+     * Format: cumulative union
+     * Tests:
+     * - EMPTY U FULL
+     * - FULL U FULL
+     */
+    private static void probabilisticUnionTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "probabilistic_probabilistic", TestType.UNION);
+
+        final Random random = new Random(randomLong());
+
+        // the accumulator, starts empty
+        final HLL hll = newHLL(HLLType.EMPTY);
+        final HLL emptyHLL = newHLL(HLLType.EMPTY);
+        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);
+
+        for(int i=0; i<1000/*number of rows to generate*/; i++) {
+            // make a FULL set and populate with
+            final HLL fullHLL = newHLL(HLLType.FULL);
+            final int elementCount = random.nextInt(10000/*arbitrary maximum cardinality*/);
+            for(int j=0;j<elementCount;j++) {
+                fullHLL.addRaw(random.nextLong());
+            }
+
+            cumulativeUnionLine(output, hll, fullHLL, schemaVersion);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    /**
+     * Unions an EMPTY accumulator with random HLLs.
+     *
+     * Format: cumulative union
+     * Tests:
+     * - hopefully all union possibilities
+     */
+    private static void globalUnionTest(final ISchemaVersion schemaVersion) throws IOException {
+        final FileWriter output = openOutput(schemaVersion, "comprehensive", TestType.UNION);
+
+        // the accumulator, starts empty
+        final HLL hll = newHLL(HLLType.EMPTY);
+        final HLL emptyHLL = newHLL(HLLType.EMPTY);
+
+        cumulativeUnionLine(output, hll, emptyHLL, schemaVersion);
+
+        for(int i=0; i<1000/*number of rows to generate*/; i++) {
+            final HLL randomHLL = generateRandomHLL();
+            cumulativeUnionLine(output, hll, randomHLL, schemaVersion);
+        }
+
+        output.flush();
+        output.close();
+    }
+
+    // ========================================================================
+    // Main
+    public static void fullSuite(final ISchemaVersion schemaVersion) throws IOException {
+        fullCardinalityCorrectionTest(schemaVersion);
+        globalUnionTest(schemaVersion);
+        globalStepTest(schemaVersion);
+        probabilisticUnionTest(schemaVersion);
+        explicitPromotionTest(schemaVersion);
+        explicitOverlapTest(schemaVersion);
+        sparseFullRepresentationTest(schemaVersion);
+        sparseStepTest(schemaVersion);
+        sparseRandomTest(schemaVersion);
+        sparseEdgeTest(schemaVersion);
+        sparseProbabilisticPromotionTest(schemaVersion);
+        sparseProbabilisticOverlapTest(schemaVersion);
+    }
+
+    public static void main(String[] args) throws IOException {
+        fullSuite(SerializationUtil.VERSION_ONE);
+    }
+
+    // ************************************************************************
+    // Helpers
+    /**
+     * Shortcut for testing constructor, which uses the constants defined at
+     * the top of the file as default parameters.
+     *
+     * @return a new {@link HLL} of specified type, which uses the parameters
+     *         ({@link #LOG2M}, {@link #REGWIDTH}, {@link #EXPLICIT_THRESHOLD},
+     *         and {@link #SPARSE_THRESHOLD}) specified above.
+     */
+    private static HLL newHLL(final HLLType type) {
+        return newHLL(type);
+    }
+
+    /**
+     * Returns the algorithm-specific cardinality of the specified {@link HLL}
+     * as a {@link String} appropriate for comparison with the algorithm-specific
+     * cardinality provided by the PostgreSQL implementation.
+     *
+     * @param  hll the HLL whose algorithm-specific cardinality is to be printed.
+     *         This cannot be <code>null</code>.
+     * @return the algorithm-specific cardinality of the instance as a PostgreSQL-
+     *         compatible String. This will never be <code>null</code>
+     */
+    private static String stringCardinality(final HLL hll) {
+        switch(hll.getType()) {
+            case EMPTY:
+                return "0";
+            case EXPLICIT:/*promotion has not yet occurred*/
+                return Long.toString(hll.cardinality());
+            case SPARSE:
+                return Double.toString(hll.sparseProbabilisticAlgorithmCardinality());
+            case FULL:
+                return Double.toString(hll.fullProbabilisticAlgorithmCardinality());
+            default:
+                throw new RuntimeException("Unknown HLL type " + hll.getType());
+        }
+    }
+
+    /**
+     * Generates a random HLL and populates it with random values.
+     *
+     * @return the populated HLL. This will never be <code>null</code>.
+     */
+    public static HLL generateRandomHLL() {
+        final int randomTypeInt = randomIntBetween(0, HLLType.values().length - 1);
+        final HLLType type;
+        switch(randomTypeInt) {
+            case 0:
+                type = HLLType.EMPTY;
+                break;
+            case 1:
+                type = HLLType.EXPLICIT;
+                break;
+            case 2:
+                type = HLLType.FULL;
+                break;
+            case 3:
+                type = HLLType.EMPTY;
+                break;
+            case 4:
+                type = HLLType.SPARSE;
+                break;
+            default:
+                throw new RuntimeException("Unassigned type int " + randomTypeInt);
+        }
+
+        final int cardinalityCap;
+        final int cardinalityBaseline;
+
+        switch(type) {
+            case EMPTY:
+                return newHLL(HLLType.EMPTY);
+            case EXPLICIT:
+                cardinalityCap = EXPLICIT_THRESHOLD;
+                cardinalityBaseline = 1;
+                break;
+            case SPARSE:
+                cardinalityCap = SPARSE_THRESHOLD;
+                cardinalityBaseline = (EXPLICIT_THRESHOLD + 1);
+                break;
+            case FULL:
+                cardinalityCap = 100000;
+                cardinalityBaseline = (SPARSE_THRESHOLD*10);
+                break;
+            default:
+                throw new RuntimeException("We should never be here.");
+        }
+
+        final HLL hll = newHLL(HLLType.EMPTY);
+        for(int i=0; i<cardinalityBaseline; i++) {
+            hll.addRaw(randomLong());
+        }
+        for(int i=0; i<randomInt(cardinalityCap - cardinalityBaseline); i++) {
+            hll.addRaw(randomLong());
+        }
+
+        return hll;
+    }
+
+    /**
+     * Opens a {@link FileWriter} and writes out an appropriate CSV header.
+     *
+     * @param  schemaVersion Schema version of the output. This cannot be
+     *         <code>null</code>.
+     * @param  description Description string used to build the filename.
+     *         This cannot be <code>null</code>.
+     * @param  type {@link TestType type} of the test file to be written.
+     *         This cannot be <code>null</code>.
+     * @return The opened {@link FileWriter writer}. This will never be <code>null</code>.
+     */
+    private static FileWriter openOutput(final ISchemaVersion schemaVersion, final String description, final TestType type) throws IOException {
+        final String schemaVersionPrefix = "v"+ schemaVersion.schemaVersionNumber() + "_";
+        final String header;
+        final String filename;
+        switch(type) {
+            case ADD:
+                header = "cardinality,raw_value,HLL\n";
+                filename = schemaVersionPrefix + "cumulative_add_" + description + ".csv";
+                break;
+            case UNION:
+                header = "cardinality,HLL,union_cardinality,union_HLL\n";
+                filename = schemaVersionPrefix + "cumulative_union_" + description + ".csv";
+                break;
+            default:
+                throw new RuntimeException("Unknown test type " + type);
+        }
+
+        final FileWriter output = new FileWriter(OUTPUT_DIRECTORY + filename);
+        output.write(header);
+        output.flush();
+        return output;
+    }
+
+    /**
+     * Writes out a {@link TestType#ADD}-formatted test line.
+     *
+     * @param  output The output {@link FileWriter writer}. This cannot be <code>null</code>.
+     * @param  hll The "accumulator" HLL instance. This cannot be <code>null</code>.
+     * @param  rawValue The raw value added to the HLL.
+     * @param  schemaVersion the schema with which to serialize the HLLs. This cannot
+     *         be <code>null</code>.
+     */
+    private static void cumulativeAddLine(final FileWriter output, final HLL hll, final long rawValue, final ISchemaVersion schemaVersion) throws IOException {
+        hll.addRaw(rawValue);
+        final String accumulatorCardinality = stringCardinality(hll);
+
+        output.write(accumulatorCardinality + "," + rawValue + "," + toByteA(hll, schemaVersion) + "\n");
+        output.flush();
+    }
+
+    /**
+     * Writes an initial line for a {@link TestType#ADD}-formatted test.
+     *
+     * @param  output The output {@link FileWriter writer}. This cannot be <code>null</code>.
+     * @param  hll The "accumulator" HLL instance. This cannot be <code>null</code>.
+     * @param  schemaVersion the schema with which to serialize the HLLs. This cannot
+     *         be <code>null</code>.
+     */
+    private static void initLineAdd(final FileWriter output, final HLL hll, final ISchemaVersion schemaVersion) throws IOException {
+        output.write(0 + "," + 0 + "," + toByteA(hll, schemaVersion) + "\n");
+        output.flush();
+    }
+
+    /**
+     * Writes out a {@link TestType#UNION}-formatted test line.
+     *
+     * @param  output The output {@link FileWriter writer}. This cannot be <code>null</code>.
+     * @param  hll The "accumulator" HLL instance. This cannot be <code>null</code>.
+     * @param  increment The "increment" HLL instance which will be unioned into
+     *         the accumulator. This cannot be <code>null</code>.
+     * @param  schemaVersion the schema with which to serialize the HLLs. This cannot
+     *         be <code>null</code>.
+     */
+    private static void cumulativeUnionLine(final FileWriter output, final HLL hll, final HLL increment, final ISchemaVersion schemaVersion) throws IOException {
+        hll.union(increment);
+
+        final String incrementCardinality = stringCardinality(increment);
+        final String accumulatorCardinality = stringCardinality(hll);
+        output.write(incrementCardinality + "," + toByteA(increment, schemaVersion) + "," + accumulatorCardinality + "," + toByteA(hll, schemaVersion) + "\n");
+        output.flush();
+    }
+
+    /**
+     * Serializes a HLL to Postgres 9 'bytea' hex-format, for CSV ingest.
+     *
+     * @param  hll the HLL to serialize. This cannot be <code>null</code>.
+     * @param  schemaVersion the schema with which to serialize the HLLs. This cannot
+     *         be <code>null</code>.
+     * @return a PostgreSQL 'bytea' string representing the HLL.
+     */
+    private static String toByteA(final HLL hll, final ISchemaVersion schemaVersion) {
+        final byte[] bytes = hll.toBytes(schemaVersion);
+        return ("\\x" + NumberUtil.toHex(bytes, 0, bytes.length));
+    }
+
+    /**
+     * Indicates what kind of test output a test will generate.
+     */
+    private static enum TestType {
+        /**
+         * This type of test is characterized by values being added to an
+         * accumulator HLL whose serialized representation (after the value is added)
+         * is printed to each line along with the cardinality and added value.
+         */
+        ADD,
+        /**
+         * This type of test is characterized by HLLs being unioned into an
+         * accumulator HLL whose serialized representation (after the HLL is
+         * union'd) is printed to each line along with the cardinalities and the
+         * serialized representation of the HLL union'd in.
+         */
+        UNION;
+    }
+}
--- a/solr/core/src/test/org/apache/solr/util/hll/ProbabilisticTestUtil.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/ProbabilisticTestUtil.java
@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.hll;
+
+/**
+ * A collection of test utilities for constructing input values to HLLs and for
+ * computing their serialized size.
+ */
+public class ProbabilisticTestUtil {
+    /**
+     * Constructs a value that when added raw to a HLL will set the register at
+     * <code>registerIndex</code> to <code>registerValue</code>.
+     *
+     * @param  log2m the log-base-2 of the number of registers in the HLL
+     * @param  registerIndex the index of the register to set
+     * @param  registerValue the value to set the register to
+     * @return the value
+     */
+    public static long constructHLLValue(final int log2m, final int registerIndex, final int registerValue) {
+        final long partition = registerIndex;
+        final long substreamValue = (1L << (registerValue - 1));
+        return (substreamValue << log2m) | partition;
+    }
+
+    /**
+     * Extracts the HLL register index from a raw value.
+     */
+    public static short getRegisterIndex(final long rawValue, final int log2m) {
+        final long mBitsMask = (1 << log2m) - 1;
+        final short j = (short)(rawValue & mBitsMask);
+        return j;
+    }
+
+    /**
+     * Extracts the HLL register value from a raw value.
+     */
+    public static byte getRegisterValue(final long rawValue, final int log2m) {
+        final long substreamValue = (rawValue >>> log2m);
+        final byte p_w;
+
+        if (substreamValue == 0L) {
+            // The paper does not cover p(0x0), so the special value 0 is used.
+            // 0 is the original initialization value of the registers, so by
+            // doing this the HLL simply ignores it. This is acceptable
+            // because the probability is 1/(2^(2^registerSizeInBits)).
+            p_w = 0;
+        } else {
+            p_w = (byte)Math.min(1 + BitUtil.leastSignificantBit(substreamValue), 31);
+        }
+
+        return p_w;
+    }
+
+    /**
+     * @return the number of bytes required to pack <code>registerCount</code>
+     *         registers of width <code>shortWordLength</code>.
+     */
+    public static int getRequiredBytes(final int shortWordLength, final int registerCount) {
+        return (int)Math.ceil((registerCount * shortWordLength)/(float)8);
+    }
+}
--- a/solr/core/src/test/org/apache/solr/util/hll/SparseHLLTest.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/SparseHLLTest.java
@ -0,0 +1,453 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util.hll;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+import com.carrotsearch.hppc.IntByteOpenHashMap;
+import com.carrotsearch.hppc.cursors.IntByteCursor;
+import com.carrotsearch.randomizedtesting.RandomizedTest;
+
+/**
+ * Tests {@link HLL} of type {@link HLLType#SPARSE}.
+ */
+public class SparseHLLTest extends LuceneTestCase {
+    private static final int log2m = 11;
+
+    /**
+     * Tests {@link HLL#addRaw(long)}.
+     */
+    @Test
+    public void addTest() {
+        { // insert an element with register value 1 (minimum set value)
+            final int registerIndex = 0;
+            final int registerValue = 1;
+            final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, registerIndex, registerValue);
+
+            final HLL hll = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary*/, HLLType.SPARSE);
+            hll.addRaw(rawValue);
+
+            assertOneRegisterSet(hll, registerIndex, (byte)registerValue);
+        }
+        { // insert an element with register value 31 (maximum set value)
+            final int registerIndex = 0;
+            final int registerValue = 31;
+            final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, registerIndex, registerValue);
+
+            final HLL hll = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary*/, HLLType.SPARSE);
+            hll.addRaw(rawValue);
+
+            assertOneRegisterSet(hll, registerIndex, (byte)registerValue);
+        }
+        { // insert an element that could overflow the register (past 31)
+            final int registerIndex = 0;
+            final int registerValue = 36;
+            final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, registerIndex, registerValue);
+
+            final HLL hll = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary*/, HLLType.SPARSE);
+            hll.addRaw(rawValue);
+
+            assertOneRegisterSet(hll, (short)registerIndex, (byte)31/*register max*/);
+        }
+        { // insert duplicate elements, observe no change
+            final int registerIndex = 0;
+            final int registerValue = 1;
+            final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, registerIndex, registerValue);
+
+            final HLL hll = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary*/, HLLType.SPARSE);
+            hll.addRaw(rawValue);
+            hll.addRaw(rawValue);
+
+            assertOneRegisterSet(hll, registerIndex, (byte)registerValue);
+        }
+        { // insert elements that increase a register's value
+            final int registerIndex = 0;
+            final int registerValue = 1;
+            final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, registerIndex, registerValue);
+
+            final HLL hll = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary*/, HLLType.SPARSE);
+            hll.addRaw(rawValue);
+
+            final int registerValue2 = 2;
+            final long rawValue2 = ProbabilisticTestUtil.constructHLLValue(log2m, registerIndex, registerValue2);
+            hll.addRaw(rawValue2);
+
+            assertOneRegisterSet(hll, registerIndex, (byte)registerValue2);
+        }
+        { // insert elements that have lower register values, observe no change
+            final int registerIndex = 0;
+            final int registerValue = 2;
+            final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, registerIndex, registerValue);
+
+            final HLL hll = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary*/, HLLType.SPARSE);
+            hll.addRaw(rawValue);
+
+            final int registerValue2 = 1;
+            final long rawValue2 = ProbabilisticTestUtil.constructHLLValue(log2m, registerIndex, registerValue2);
+            hll.addRaw(rawValue2);
+
+            assertOneRegisterSet(hll, registerIndex, (byte)registerValue);
+        }
+    }
+
+    /**
+     * Smoke test for {@link HLL#cardinality()} and the proper use of the small
+     * range correction.
+     */
+    @Test
+    public void smallRangeSmokeTest() {
+        final int log2m = 11;
+        final int m = (1 << log2m);
+        final int regwidth = 5;
+
+        // only one register set
+        {
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary*/, HLLType.SPARSE);
+            hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, 0, 1));
+
+            final long cardinality = hll.cardinality();
+
+            // Trivially true that small correction conditions hold: one register
+            // set implies zeroes exist, and estimator trivially smaller than 5m/2.
+            // Small range correction: m * log(m/V)
+            final long expected = (long)Math.ceil(m * Math.log((double)m / (m - 1)/*# of zeroes*/));
+            assertEquals(cardinality, expected);
+        }
+
+        // all but one register set
+        {
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary*/, HLLType.SPARSE);
+            for(int i=0; i<(m - 1); i++) {
+                hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i, 1));
+            }
+
+            // Trivially true that small correction conditions hold: all but
+            // one register set implies a zero exists, and estimator trivially
+            // smaller than 5m/2 since it's alpha / ((m-1)/2)
+            final long cardinality = hll.cardinality();
+
+            // Small range correction: m * log(m/V)
+            final long expected = (long)Math.ceil(m * Math.log((double)m / 1/*# of zeroes*/));
+            assertEquals(cardinality, expected);
+        }
+    }
+
+    /**
+     * Smoke test for {@link HLL#cardinality()} and the proper use of the
+     * uncorrected estimator.
+     */
+    @Test
+    public void normalRangeSmokeTest() {
+        final int log2m = 11;
+        final int m = (1 << log2m);
+        final int regwidth = 5;
+        // regwidth = 5, so hash space is
+        // log2m + (2^5 - 1 - 1), so L = log2m + 30
+        final int l = log2m + 30;
+
+        // all registers at 'medium' value
+        {
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, m/*sparseThreshold*/, HLLType.SPARSE);
+
+            final int registerValue = 7/*chosen to ensure neither correction kicks in*/;
+            for(int i=0; i<m; i++) {
+                hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i, registerValue));
+            }
+
+            final long cardinality = hll.cardinality();
+
+            // Simplified estimator when all registers take same value: alpha / (m/2^val)
+            final double estimator = HLLUtil.alphaMSquared(m)/((double)m/Math.pow(2, registerValue));
+
+            // Assert conditions for uncorrected range
+            assertTrue(estimator <= Math.pow(2,l)/30);
+            assertTrue(estimator > (5 * m /(double)2));
+
+            final long expected = (long)Math.ceil(estimator);
+            assertEquals(cardinality, expected);
+        }
+    }
+
+    /**
+     * Smoke test for {@link HLL#cardinality()} and the proper use of the large
+     * range correction.
+     */
+    @Test
+    public void largeRangeSmokeTest() {
+        final int log2m = 11;
+        final int m = (1 << log2m);
+        final int regwidth = 5;
+        // regwidth = 5, so hash space is
+        // log2m + (2^5 - 1 - 1), so L = log2m + 30
+        final int l = log2m + 30;
+
+        // all registers at large value
+        {
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, m/*sparseThreshold*/, HLLType.SPARSE);
+
+            final int registerValue = 31/*chosen to ensure large correction kicks in*/;
+            for(int i=0; i<m; i++) {
+                hll.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i, registerValue));
+            }
+
+            final long cardinality = hll.cardinality();
+
+
+            // Simplified estimator when all registers take same value: alpha / (m/2^val)
+            final double estimator = HLLUtil.alphaMSquared(m)/((double)m/Math.pow(2, registerValue));
+
+            // Assert conditions for large range
+            assertTrue(estimator > Math.pow(2, l)/30);
+
+            // Large range correction: -2^32 * log(1 - E/2^32)
+            final long expected = (long)Math.ceil(-1.0 * Math.pow(2, l) * Math.log(1.0 - estimator/Math.pow(2, l)));
+            assertEquals(cardinality, expected);
+        }
+    }
+
+    /**
+     * Tests {@link HLL#union(HLL)}.
+     */
+    @Test
+    public void unionTest() {
+        final int log2m = 11/*arbitrary*/;
+        final int sparseThreshold = 256/*arbitrary*/;
+
+        { // two empty multisets should union to an empty set
+            final HLL hllA = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+            final HLL hllB = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+
+            hllA.union(hllB);
+
+            assertEquals(hllA.getType(), HLLType.SPARSE/*unchanged*/);
+            assertEquals(hllA.cardinality(), 0L);
+        }
+        { // two disjoint multisets should union properly
+            final HLL hllA = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+            hllA.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, 1, 1));
+            final HLL hllB = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+            hllB.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, 2, 1));
+
+
+            hllA.union(hllB);
+
+            assertEquals(hllA.getType(), HLLType.SPARSE/*unchanged*/);
+            assertEquals(hllA.cardinality(), 3L/*precomputed*/);
+            assertRegisterPresent(hllA, 1, (byte)1);
+            assertRegisterPresent(hllA, 2, (byte)1);
+        }
+        { // two exactly overlapping multisets should union properly
+            final HLL hllA = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+            hllA.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, 1, 10));
+            final HLL hllB = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+            hllB.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, 1, 13));
+
+            hllA.union(hllB);
+
+            assertEquals(hllA.getType(), HLLType.SPARSE/*unchanged*/);
+            assertEquals(hllA.cardinality(), 2L/*precomputed*/);
+            assertOneRegisterSet(hllA, 1, (byte)13/*max(10,13)*/);
+        }
+        { // overlapping multisets should union properly
+            final HLL hllA = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+            final HLL hllB = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+            // register index = 3
+            final long rawValueA = ProbabilisticTestUtil.constructHLLValue(log2m, 3, 11);
+
+            // register index = 4
+            final long rawValueB = ProbabilisticTestUtil.constructHLLValue(log2m, 4, 13);
+            final long rawValueBPrime = ProbabilisticTestUtil.constructHLLValue(log2m, 4, 21);
+
+            // register index = 5
+            final long rawValueC = ProbabilisticTestUtil.constructHLLValue(log2m, 5, 14);
+
+            hllA.addRaw(rawValueA);
+            hllA.addRaw(rawValueB);
+
+            hllB.addRaw(rawValueBPrime);
+            hllB.addRaw(rawValueC);
+
+            hllA.union(hllB);
+            // union should have three registers set, with partition B set to the
+            // max of the two registers
+            assertRegisterPresent(hllA, 3, (byte)11);
+            assertRegisterPresent(hllA, 4, (byte)21/*max(21,13)*/);
+            assertRegisterPresent(hllA, 5, (byte)14);
+        }
+        { // too-large unions should promote
+            final HLL hllA = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+            final HLL hllB = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+
+            // fill up sets to maxCapacity
+            for(int i=0; i<sparseThreshold; i++) {
+                hllA.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, i, 1));
+                hllB.addRaw(ProbabilisticTestUtil.constructHLLValue(log2m, (i + sparseThreshold)/*non-overlapping*/, 1));
+            }
+
+            hllA.union(hllB);
+
+            assertEquals(hllA.getType(), HLLType.FULL);
+        }
+    }
+
+    /**
+     * Tests {@link HLL#clear()}.
+     */
+    @Test
+    public void clearTest() {
+        final HLL hll = new HLL(log2m, 5/*regwidth*/, 128/*explicitThreshold, arbitrary, unused*/, 256/*sparseThreshold, arbitrary, unused*/, HLLType.SPARSE);
+        hll.addRaw(1L);
+        hll.clear();
+        assertEquals(hll.cardinality(), 0L);
+    }
+
+    /**
+     * Tests {@link HLL#toBytes(ISchemaVersion)} and
+     * {@link HLL#fromBytes(byte[])}.
+     */
+    @Test
+    public void toFromBytesTest() {
+        final int log2m = 11/*arbitrary*/;
+        final int regwidth = 5/*arbitrary*/;
+        final int sparseThreshold = 256/*arbitrary*/;
+        final int shortWordLength = 16/*log2m + regwidth = 11 + 5*/;
+
+        final ISchemaVersion schemaVersion = SerializationUtil.DEFAULT_SCHEMA_VERSION;
+        final HLLType type = HLLType.SPARSE;
+        final int padding = schemaVersion.paddingBytes(type);
+
+        {// Should work on an empty element
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+            final byte[] bytes = hll.toBytes(schemaVersion);
+
+            // output should just be padding since no registers are used
+            assertEquals(bytes.length, padding);
+
+            final HLL inHLL = HLL.fromBytes(bytes);
+
+            // assert register values correct
+            assertElementsEqual(hll, inHLL);
+        }
+        {// Should work on a partially filled element
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+
+            for(int i=0; i<3; i++) {
+                final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, i, (i+9));
+                hll.addRaw(rawValue);
+            }
+
+            final byte[] bytes = hll.toBytes(schemaVersion);
+
+            assertEquals(bytes.length, padding + ProbabilisticTestUtil.getRequiredBytes(shortWordLength, 3/*registerCount*/));
+
+            final HLL inHLL = HLL.fromBytes(bytes);
+
+            // assert register values correct
+            assertElementsEqual(hll, inHLL);
+        }
+        {// Should work on a full set
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+
+            for(int i=0; i<sparseThreshold; i++) {
+                final long rawValue = ProbabilisticTestUtil.constructHLLValue(log2m, i, (i % 9) + 1);
+                hll.addRaw(rawValue);
+            }
+
+            final byte[] bytes = hll.toBytes(schemaVersion);
+
+            // 'short words' should be 12 bits + 5 bits = 17 bits long
+            assertEquals(bytes.length, padding + ProbabilisticTestUtil.getRequiredBytes(shortWordLength, sparseThreshold));
+
+            final HLL inHLL = HLL.fromBytes(bytes);
+
+            // assert register values correct
+            assertElementsEqual(hll, inHLL);
+        }
+    }
+
+    /**
+     * Smoke tests the multisets by adding random values.
+     */
+    @Test
+    public void randomValuesTest() {
+        final int log2m = 11/*arbitrary*/;
+        final int regwidth = 5/*arbitrary*/;
+        final int sparseThreshold = 256/*arbitrary*/;
+
+        for(int run=0; run<100; run++) {
+            final HLL hll = new HLL(log2m, regwidth, 128/*explicitThreshold, arbitrary, unused*/, sparseThreshold, HLLType.SPARSE);
+
+            final IntByteOpenHashMap map = new IntByteOpenHashMap();
+
+            for(int i=0; i<sparseThreshold; i++) {
+                final long rawValue = RandomizedTest.randomLong();
+
+                final short registerIndex = ProbabilisticTestUtil.getRegisterIndex(rawValue, log2m);
+                final byte registerValue = ProbabilisticTestUtil.getRegisterValue(rawValue, log2m);
+                if(map.get(registerIndex) < registerValue) {
+                    map.put(registerIndex, registerValue);
+                }
+
+                hll.addRaw(rawValue);
+            }
+
+            for (IntByteCursor c : map) {
+                final byte expectedRegisterValue = map.get(c.key);
+                assertRegisterPresent(hll, c.key, expectedRegisterValue);
+            }
+        }
+    }
+
+    //*************************************************************************
+    // assertion helpers
+    /**
+     * Asserts that the register at the specified index is set to the specified
+     * value.
+     */
+    private static void assertRegisterPresent(final HLL hll,
+                                              final int registerIndex,
+                                              final int registerValue) {
+        final IntByteOpenHashMap sparseProbabilisticStorage = hll.sparseProbabilisticStorage;
+        assertEquals(sparseProbabilisticStorage.get(registerIndex), registerValue);
+    }
+
+    /**
+     * Asserts that only the specified register is set and has the specified value.
+     */
+    private static void assertOneRegisterSet(final HLL hll,
+                                             final int registerIndex,
+                                             final byte registerValue) {
+        final IntByteOpenHashMap sparseProbabilisticStorage = hll.sparseProbabilisticStorage;
+        assertEquals(sparseProbabilisticStorage.size(), 1);
+        assertEquals(sparseProbabilisticStorage.get(registerIndex), registerValue);
+    }
+
+    /**
+     * Asserts that all registers in the two {@link HLL} instances are identical.
+     */
+    private static void assertElementsEqual(final HLL hllA, final HLL hllB) {
+        final IntByteOpenHashMap sparseProbabilisticStorageA = hllA.sparseProbabilisticStorage;
+        final IntByteOpenHashMap sparseProbabilisticStorageB = hllB.sparseProbabilisticStorage;
+        assertEquals(sparseProbabilisticStorageA.size(), sparseProbabilisticStorageB.size());
+        for (IntByteCursor c : sparseProbabilisticStorageA) {
+            assertEquals(sparseProbabilisticStorageA.get(c.key), 
+                         sparseProbabilisticStorageB.get(c.key));
+        }
+    }
+}