mirror of https://github.com/apache/druid.git
Update outdated RLE paper and improve some code refactoring (#4286)
* Update outdated RLE paper and improve some code refactoring * Roll back CONCISE's abbreviation
This commit is contained in:
parent
8ca7f9410e
commit
cdd521fb23
|
@ -126,8 +126,7 @@ public abstract class AbstractIntSet implements IntSet
|
|||
public boolean containsAny(IntSet c)
|
||||
{
|
||||
IntIterator itr = c.iterator();
|
||||
boolean res = true;
|
||||
while (res && itr.hasNext()) {
|
||||
while (itr.hasNext()) {
|
||||
if (contains(itr.next())) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -41,9 +41,9 @@ import java.util.SortedSet;
|
|||
* This class is an instance of {@link IntSet} internally represented by
|
||||
* compressed bitmaps though a RLE (Run-Length Encoding) compression algorithm.
|
||||
* See <a
|
||||
* href="http://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf">http
|
||||
* ://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf</a> for more
|
||||
* details.
|
||||
* href="http://ricerca.mat.uniroma3.it/users/dipietro/publications/0020-0190.pdf">
|
||||
* http://ricerca.mat.uniroma3.it/users/dipietro/publications/0020-0190.pdf</a>
|
||||
* for more details.
|
||||
* <p/>
|
||||
* Notice that the iterator by {@link #iterator()} is <i>fail-fast</i>,
|
||||
* similar to most {@link Collection}-derived classes. If the set is
|
||||
|
@ -139,8 +139,8 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
|||
* than a power of 2 takes at most <tt>O(lg(32))</tt> time. The number of operations
|
||||
* is at most <tt>12 + 9 * ceil(lg(32))</tt>.
|
||||
* <p/>
|
||||
* See <a
|
||||
* href="http://graphics.stanford.edu/~seander/bithacks.html">http://graphics.stanford.edu/~seander/bithacks.html</a>
|
||||
* See <a href="http://graphics.stanford.edu/~seander/bithacks.html">
|
||||
* http://graphics.stanford.edu/~seander/bithacks.html</a>
|
||||
*
|
||||
* @param n number to divide
|
||||
*
|
||||
|
@ -546,10 +546,10 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
|||
*/
|
||||
private void appendLiteral(int word)
|
||||
{
|
||||
// when we have a zero sequence of the maximum lenght (that is,
|
||||
// when we have a zero sequence of the maximum length (that is,
|
||||
// 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen
|
||||
// that we try to append a zero literal because the result of the given operation must be an
|
||||
// empty set. Whitout the following test, we would have increased the
|
||||
// empty set. Without the following test, we would have increased the
|
||||
// counter of the zero sequence, thus obtaining 0x02000000 that
|
||||
// represents a sequence with the first bit set!
|
||||
if (lastWordIndex == 0 && word == ConciseSetUtils.ALL_ZEROS_LITERAL && words[0] == 0x01FFFFFF) {
|
||||
|
@ -1380,10 +1380,10 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
|||
// -- If the literal is made up of all zeros, it definitely
|
||||
// cannot be part of a sequence (otherwise it would not have
|
||||
// been created). Thus, we can create a 1-bit literal word
|
||||
// -- If there are MAX_LITERAL_LENGHT - 2 set bits, by adding
|
||||
// -- If there are MAX_LITERAL_LENGTH - 2 set bits, by adding
|
||||
// the new one we potentially allow for a 1's sequence
|
||||
// together with the successive word
|
||||
// -- If there are MAX_LITERAL_LENGHT - 1 set bits, by adding
|
||||
// -- If there are MAX_LITERAL_LENGTH - 1 set bits, by adding
|
||||
// the new one we potentially allow for a 1's sequence
|
||||
// together with the successive and/or the preceding words
|
||||
if (!simulateWAH) {
|
||||
|
@ -1616,10 +1616,7 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
|||
// completely "covers" the second operator
|
||||
if (isSequenceWithNoBits(this.words[0])
|
||||
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
|
||||
if (isZeroSequence(this.words[0])) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return !isZeroSequence(this.words[0]);
|
||||
}
|
||||
if (isSequenceWithNoBits(other.words[0])
|
||||
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
|
||||
|
@ -1702,17 +1699,11 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
|||
// disjoint sets
|
||||
if (isSequenceWithNoBits(this.words[0])
|
||||
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
|
||||
if (isZeroSequence(this.words[0])) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return !isZeroSequence(this.words[0]);
|
||||
}
|
||||
if (isSequenceWithNoBits(other.words[0])
|
||||
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
|
||||
if (isZeroSequence(other.words[0])) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return !isZeroSequence(other.words[0]);
|
||||
}
|
||||
|
||||
// scan "this" and "other"
|
||||
|
@ -1791,17 +1782,11 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
|
|||
// disjoint sets
|
||||
if (isSequenceWithNoBits(this.words[0])
|
||||
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
|
||||
if (isZeroSequence(this.words[0])) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return !isZeroSequence(this.words[0]);
|
||||
}
|
||||
if (isSequenceWithNoBits(other.words[0])
|
||||
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
|
||||
if (isZeroSequence(other.words[0])) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return !isZeroSequence(other.words[0]);
|
||||
}
|
||||
|
||||
// resulting size
|
||||
|
|
|
@ -330,7 +330,7 @@ public class ConciseSetUtils
|
|||
|
||||
public static int clearBitsAfterInLastWord(int lastWord, int lastSetBit)
|
||||
{
|
||||
return lastWord &= ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit));
|
||||
return lastWord & (ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)));
|
||||
}
|
||||
|
||||
public static int onesUntil(int bit)
|
||||
|
|
|
@ -39,7 +39,7 @@ public class ImmutableConciseSet
|
|||
{
|
||||
// Comparison is first by index, then one fills < literals < zero fills
|
||||
// one fills are sorted by length (longer one fills have priority)
|
||||
// similarily, shorter zero fills have priority
|
||||
// similarly, shorter zero fills have priority
|
||||
@Override
|
||||
public int compare(WordIterator i1, WordIterator i2)
|
||||
{
|
||||
|
@ -82,7 +82,7 @@ public class ImmutableConciseSet
|
|||
{
|
||||
// Comparison is first by index, then zero fills < literals < one fills
|
||||
// zero fills are sorted by length (longer zero fills have priority)
|
||||
// similarily, shorter one fills have priority
|
||||
// similarly, shorter one fills have priority
|
||||
@Override
|
||||
public int compare(WordIterator i1, WordIterator i2)
|
||||
{
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.junit.runners.Parameterized;
|
|||
import java.nio.IntBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
@ -1182,7 +1183,7 @@ public class ImmutableConciseSetTest
|
|||
{
|
||||
final int[] ints1 = {33, 100000};
|
||||
final int[] ints2 = {34, 100000};
|
||||
List<Integer> expected = Arrays.asList(100000);
|
||||
List<Integer> expected = Collections.singletonList(100000);
|
||||
|
||||
ConciseSet set1 = new ConciseSet();
|
||||
for (int i : ints1) {
|
||||
|
@ -1655,7 +1656,7 @@ public class ImmutableConciseSetTest
|
|||
@Test
|
||||
public void testIntersectionTerminates() throws Exception
|
||||
{
|
||||
verifyIntersection(Arrays.<Integer>asList(), Arrays.asList(new ImmutableConciseSet(), new ImmutableConciseSet()));
|
||||
verifyIntersection(Collections.emptyList(), Arrays.asList(new ImmutableConciseSet(), new ImmutableConciseSet()));
|
||||
}
|
||||
|
||||
private void verifyIntersection(ConciseSet set1, ConciseSet set2)
|
||||
|
|
Loading…
Reference in New Issue