mirror of https://github.com/apache/druid.git
Update outdated RLE paper and improve some code refactoring (#4286)
* Update outdated RLE paper and improve some code refactoring * Roll back CONCISE's abbreviation
This commit is contained in:
parent
8ca7f9410e
commit
cdd521fb23
|
@ -126,8 +126,7 @@ public abstract class AbstractIntSet implements IntSet
|
||||||
public boolean containsAny(IntSet c)
|
public boolean containsAny(IntSet c)
|
||||||
{
|
{
|
||||||
IntIterator itr = c.iterator();
|
IntIterator itr = c.iterator();
|
||||||
boolean res = true;
|
while (itr.hasNext()) {
|
||||||
while (res && itr.hasNext()) {
|
|
||||||
if (contains(itr.next())) {
|
if (contains(itr.next())) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,9 +41,9 @@ import java.util.SortedSet;
|
||||||
* This class is an instance of {@link IntSet} internally represented by
|
* This class is an instance of {@link IntSet} internally represented by
|
||||||
* compressed bitmaps though a RLE (Run-Length Encoding) compression algorithm.
|
* compressed bitmaps though a RLE (Run-Length Encoding) compression algorithm.
|
||||||
* See <a
|
* See <a
|
||||||
* href="http://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf">http
|
* href="http://ricerca.mat.uniroma3.it/users/dipietro/publications/0020-0190.pdf">
|
||||||
* ://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf</a> for more
|
* http://ricerca.mat.uniroma3.it/users/dipietro/publications/0020-0190.pdf</a>
|
||||||
* details.
|
* for more details.
|
||||||
* <p/>
|
* <p/>
|
||||||
* Notice that the iterator by {@link #iterator()} is <i>fail-fast</i>,
|
* Notice that the iterator by {@link #iterator()} is <i>fail-fast</i>,
|
||||||
* similar to most {@link Collection}-derived classes. If the set is
|
* similar to most {@link Collection}-derived classes. If the set is
|
||||||
|
@ -139,8 +139,8 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
||||||
* than a power of 2 takes at most <tt>O(lg(32))</tt> time. The number of operations
|
* than a power of 2 takes at most <tt>O(lg(32))</tt> time. The number of operations
|
||||||
* is at most <tt>12 + 9 * ceil(lg(32))</tt>.
|
* is at most <tt>12 + 9 * ceil(lg(32))</tt>.
|
||||||
* <p/>
|
* <p/>
|
||||||
* See <a
|
* See <a href="http://graphics.stanford.edu/~seander/bithacks.html">
|
||||||
* href="http://graphics.stanford.edu/~seander/bithacks.html">http://graphics.stanford.edu/~seander/bithacks.html</a>
|
* http://graphics.stanford.edu/~seander/bithacks.html</a>
|
||||||
*
|
*
|
||||||
* @param n number to divide
|
* @param n number to divide
|
||||||
*
|
*
|
||||||
|
@ -546,10 +546,10 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
||||||
*/
|
*/
|
||||||
private void appendLiteral(int word)
|
private void appendLiteral(int word)
|
||||||
{
|
{
|
||||||
// when we have a zero sequence of the maximum lenght (that is,
|
// when we have a zero sequence of the maximum length (that is,
|
||||||
// 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen
|
// 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen
|
||||||
// that we try to append a zero literal because the result of the given operation must be an
|
// that we try to append a zero literal because the result of the given operation must be an
|
||||||
// empty set. Whitout the following test, we would have increased the
|
// empty set. Without the following test, we would have increased the
|
||||||
// counter of the zero sequence, thus obtaining 0x02000000 that
|
// counter of the zero sequence, thus obtaining 0x02000000 that
|
||||||
// represents a sequence with the first bit set!
|
// represents a sequence with the first bit set!
|
||||||
if (lastWordIndex == 0 && word == ConciseSetUtils.ALL_ZEROS_LITERAL && words[0] == 0x01FFFFFF) {
|
if (lastWordIndex == 0 && word == ConciseSetUtils.ALL_ZEROS_LITERAL && words[0] == 0x01FFFFFF) {
|
||||||
|
@ -1380,10 +1380,10 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
||||||
// -- If the literal is made up of all zeros, it definitely
|
// -- If the literal is made up of all zeros, it definitely
|
||||||
// cannot be part of a sequence (otherwise it would not have
|
// cannot be part of a sequence (otherwise it would not have
|
||||||
// been created). Thus, we can create a 1-bit literal word
|
// been created). Thus, we can create a 1-bit literal word
|
||||||
// -- If there are MAX_LITERAL_LENGHT - 2 set bits, by adding
|
// -- If there are MAX_LITERAL_LENGTH - 2 set bits, by adding
|
||||||
// the new one we potentially allow for a 1's sequence
|
// the new one we potentially allow for a 1's sequence
|
||||||
// together with the successive word
|
// together with the successive word
|
||||||
// -- If there are MAX_LITERAL_LENGHT - 1 set bits, by adding
|
// -- If there are MAX_LITERAL_LENGTH - 1 set bits, by adding
|
||||||
// the new one we potentially allow for a 1's sequence
|
// the new one we potentially allow for a 1's sequence
|
||||||
// together with the successive and/or the preceding words
|
// together with the successive and/or the preceding words
|
||||||
if (!simulateWAH) {
|
if (!simulateWAH) {
|
||||||
|
@ -1616,10 +1616,7 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
||||||
// completely "covers" the second operator
|
// completely "covers" the second operator
|
||||||
if (isSequenceWithNoBits(this.words[0])
|
if (isSequenceWithNoBits(this.words[0])
|
||||||
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
|
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
|
||||||
if (isZeroSequence(this.words[0])) {
|
return !isZeroSequence(this.words[0]);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
if (isSequenceWithNoBits(other.words[0])
|
if (isSequenceWithNoBits(other.words[0])
|
||||||
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
|
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
|
||||||
|
@ -1702,17 +1699,11 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
||||||
// disjoint sets
|
// disjoint sets
|
||||||
if (isSequenceWithNoBits(this.words[0])
|
if (isSequenceWithNoBits(this.words[0])
|
||||||
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
|
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
|
||||||
if (isZeroSequence(this.words[0])) {
|
return !isZeroSequence(this.words[0]);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
if (isSequenceWithNoBits(other.words[0])
|
if (isSequenceWithNoBits(other.words[0])
|
||||||
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
|
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
|
||||||
if (isZeroSequence(other.words[0])) {
|
return !isZeroSequence(other.words[0]);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// scan "this" and "other"
|
// scan "this" and "other"
|
||||||
|
@ -1791,17 +1782,11 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
||||||
// disjoint sets
|
// disjoint sets
|
||||||
if (isSequenceWithNoBits(this.words[0])
|
if (isSequenceWithNoBits(this.words[0])
|
||||||
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
|
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
|
||||||
if (isZeroSequence(this.words[0])) {
|
return !isZeroSequence(this.words[0]);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
if (isSequenceWithNoBits(other.words[0])
|
if (isSequenceWithNoBits(other.words[0])
|
||||||
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
|
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
|
||||||
if (isZeroSequence(other.words[0])) {
|
return !isZeroSequence(other.words[0]);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// resulting size
|
// resulting size
|
||||||
|
|
|
@ -330,7 +330,7 @@ public class ConciseSetUtils
|
||||||
|
|
||||||
public static int clearBitsAfterInLastWord(int lastWord, int lastSetBit)
|
public static int clearBitsAfterInLastWord(int lastWord, int lastSetBit)
|
||||||
{
|
{
|
||||||
return lastWord &= ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit));
|
return lastWord & (ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int onesUntil(int bit)
|
public static int onesUntil(int bit)
|
||||||
|
|
|
@ -39,7 +39,7 @@ public class ImmutableConciseSet
|
||||||
{
|
{
|
||||||
// Comparison is first by index, then one fills < literals < zero fills
|
// Comparison is first by index, then one fills < literals < zero fills
|
||||||
// one fills are sorted by length (longer one fills have priority)
|
// one fills are sorted by length (longer one fills have priority)
|
||||||
// similarily, shorter zero fills have priority
|
// similarly, shorter zero fills have priority
|
||||||
@Override
|
@Override
|
||||||
public int compare(WordIterator i1, WordIterator i2)
|
public int compare(WordIterator i1, WordIterator i2)
|
||||||
{
|
{
|
||||||
|
@ -82,7 +82,7 @@ public class ImmutableConciseSet
|
||||||
{
|
{
|
||||||
// Comparison is first by index, then zero fills < literals < one fills
|
// Comparison is first by index, then zero fills < literals < one fills
|
||||||
// zero fills are sorted by length (longer zero fills have priority)
|
// zero fills are sorted by length (longer zero fills have priority)
|
||||||
// similarily, shorter one fills have priority
|
// similarly, shorter one fills have priority
|
||||||
@Override
|
@Override
|
||||||
public int compare(WordIterator i1, WordIterator i2)
|
public int compare(WordIterator i1, WordIterator i2)
|
||||||
{
|
{
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.junit.runners.Parameterized;
|
||||||
import java.nio.IntBuffer;
|
import java.nio.IntBuffer;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
@ -1182,7 +1183,7 @@ public class ImmutableConciseSetTest
|
||||||
{
|
{
|
||||||
final int[] ints1 = {33, 100000};
|
final int[] ints1 = {33, 100000};
|
||||||
final int[] ints2 = {34, 100000};
|
final int[] ints2 = {34, 100000};
|
||||||
List<Integer> expected = Arrays.asList(100000);
|
List<Integer> expected = Collections.singletonList(100000);
|
||||||
|
|
||||||
ConciseSet set1 = new ConciseSet();
|
ConciseSet set1 = new ConciseSet();
|
||||||
for (int i : ints1) {
|
for (int i : ints1) {
|
||||||
|
@ -1655,7 +1656,7 @@ public class ImmutableConciseSetTest
|
||||||
@Test
|
@Test
|
||||||
public void testIntersectionTerminates() throws Exception
|
public void testIntersectionTerminates() throws Exception
|
||||||
{
|
{
|
||||||
verifyIntersection(Arrays.<Integer>asList(), Arrays.asList(new ImmutableConciseSet(), new ImmutableConciseSet()));
|
verifyIntersection(Collections.emptyList(), Arrays.asList(new ImmutableConciseSet(), new ImmutableConciseSet()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyIntersection(ConciseSet set1, ConciseSet set2)
|
private void verifyIntersection(ConciseSet set1, ConciseSet set2)
|
||||||
|
|
Loading…
Reference in New Issue