Update outdated RLE paper and improve some code refactoring (#4286)

* Update outdated RLE paper and improve some code refactoring

* Roll back CONCISE's abbreviation
This commit is contained in:
Benedict Jin 2017-05-19 03:26:24 +08:00 committed by Fangjin Yang
parent 8ca7f9410e
commit cdd521fb23
5 changed files with 21 additions and 36 deletions

View File

@ -126,8 +126,7 @@ public abstract class AbstractIntSet implements IntSet
public boolean containsAny(IntSet c) public boolean containsAny(IntSet c)
{ {
IntIterator itr = c.iterator(); IntIterator itr = c.iterator();
boolean res = true; while (itr.hasNext()) {
while (res && itr.hasNext()) {
if (contains(itr.next())) { if (contains(itr.next())) {
return true; return true;
} }

View File

@ -41,9 +41,9 @@ import java.util.SortedSet;
* This class is an instance of {@link IntSet} internally represented by * This class is an instance of {@link IntSet} internally represented by
* compressed bitmaps though a RLE (Run-Length Encoding) compression algorithm. * compressed bitmaps though a RLE (Run-Length Encoding) compression algorithm.
* See <a * See <a
* href="http://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf">http * href="http://ricerca.mat.uniroma3.it/users/dipietro/publications/0020-0190.pdf">
* ://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf</a> for more * http://ricerca.mat.uniroma3.it/users/dipietro/publications/0020-0190.pdf</a>
* details. * for more details.
* <p/> * <p/>
* Notice that the iterator by {@link #iterator()} is <i>fail-fast</i>, * Notice that the iterator by {@link #iterator()} is <i>fail-fast</i>,
* similar to most {@link Collection}-derived classes. If the set is * similar to most {@link Collection}-derived classes. If the set is
@ -139,8 +139,8 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
* than a power of 2 takes at most <tt>O(lg(32))</tt> time. The number of operations * than a power of 2 takes at most <tt>O(lg(32))</tt> time. The number of operations
* is at most <tt>12 + 9 * ceil(lg(32))</tt>. * is at most <tt>12 + 9 * ceil(lg(32))</tt>.
* <p/> * <p/>
* See <a * See <a href="http://graphics.stanford.edu/~seander/bithacks.html">
* href="http://graphics.stanford.edu/~seander/bithacks.html">http://graphics.stanford.edu/~seander/bithacks.html</a> * http://graphics.stanford.edu/~seander/bithacks.html</a>
* *
* @param n number to divide * @param n number to divide
* *
@ -546,10 +546,10 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
*/ */
private void appendLiteral(int word) private void appendLiteral(int word)
{ {
// when we have a zero sequence of the maximum lenght (that is, // when we have a zero sequence of the maximum length (that is,
// 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen // 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen
// that we try to append a zero literal because the result of the given operation must be an // that we try to append a zero literal because the result of the given operation must be an
// empty set. Whitout the following test, we would have increased the // empty set. Without the following test, we would have increased the
// counter of the zero sequence, thus obtaining 0x02000000 that // counter of the zero sequence, thus obtaining 0x02000000 that
// represents a sequence with the first bit set! // represents a sequence with the first bit set!
if (lastWordIndex == 0 && word == ConciseSetUtils.ALL_ZEROS_LITERAL && words[0] == 0x01FFFFFF) { if (lastWordIndex == 0 && word == ConciseSetUtils.ALL_ZEROS_LITERAL && words[0] == 0x01FFFFFF) {
@ -1380,10 +1380,10 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
// -- If the literal is made up of all zeros, it definitely // -- If the literal is made up of all zeros, it definitely
// cannot be part of a sequence (otherwise it would not have // cannot be part of a sequence (otherwise it would not have
// been created). Thus, we can create a 1-bit literal word // been created). Thus, we can create a 1-bit literal word
// -- If there are MAX_LITERAL_LENGHT - 2 set bits, by adding // -- If there are MAX_LITERAL_LENGTH - 2 set bits, by adding
// the new one we potentially allow for a 1's sequence // the new one we potentially allow for a 1's sequence
// together with the successive word // together with the successive word
// -- If there are MAX_LITERAL_LENGHT - 1 set bits, by adding // -- If there are MAX_LITERAL_LENGTH - 1 set bits, by adding
// the new one we potentially allow for a 1's sequence // the new one we potentially allow for a 1's sequence
// together with the successive and/or the preceding words // together with the successive and/or the preceding words
if (!simulateWAH) { if (!simulateWAH) {
@ -1616,10 +1616,7 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
// completely "covers" the second operator // completely "covers" the second operator
if (isSequenceWithNoBits(this.words[0]) if (isSequenceWithNoBits(this.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
if (isZeroSequence(this.words[0])) { return !isZeroSequence(this.words[0]);
return false;
}
return true;
} }
if (isSequenceWithNoBits(other.words[0]) if (isSequenceWithNoBits(other.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
@ -1702,17 +1699,11 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
// disjoint sets // disjoint sets
if (isSequenceWithNoBits(this.words[0]) if (isSequenceWithNoBits(this.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
if (isZeroSequence(this.words[0])) { return !isZeroSequence(this.words[0]);
return false;
}
return true;
} }
if (isSequenceWithNoBits(other.words[0]) if (isSequenceWithNoBits(other.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
if (isZeroSequence(other.words[0])) { return !isZeroSequence(other.words[0]);
return false;
}
return true;
} }
// scan "this" and "other" // scan "this" and "other"
@ -1791,17 +1782,11 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
// disjoint sets // disjoint sets
if (isSequenceWithNoBits(this.words[0]) if (isSequenceWithNoBits(this.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
if (isZeroSequence(this.words[0])) { return !isZeroSequence(this.words[0]);
return false;
}
return true;
} }
if (isSequenceWithNoBits(other.words[0]) if (isSequenceWithNoBits(other.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
if (isZeroSequence(other.words[0])) { return !isZeroSequence(other.words[0]);
return false;
}
return true;
} }
// resulting size // resulting size

View File

@ -330,7 +330,7 @@ public class ConciseSetUtils
public static int clearBitsAfterInLastWord(int lastWord, int lastSetBit) public static int clearBitsAfterInLastWord(int lastWord, int lastSetBit)
{ {
return lastWord &= ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)); return lastWord & (ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)));
} }
public static int onesUntil(int bit) public static int onesUntil(int bit)

View File

@ -39,7 +39,7 @@ public class ImmutableConciseSet
{ {
// Comparison is first by index, then one fills < literals < zero fills // Comparison is first by index, then one fills < literals < zero fills
// one fills are sorted by length (longer one fills have priority) // one fills are sorted by length (longer one fills have priority)
// similarily, shorter zero fills have priority // similarly, shorter zero fills have priority
@Override @Override
public int compare(WordIterator i1, WordIterator i2) public int compare(WordIterator i1, WordIterator i2)
{ {
@ -82,7 +82,7 @@ public class ImmutableConciseSet
{ {
// Comparison is first by index, then zero fills < literals < one fills // Comparison is first by index, then zero fills < literals < one fills
// zero fills are sorted by length (longer zero fills have priority) // zero fills are sorted by length (longer zero fills have priority)
// similarily, shorter one fills have priority // similarly, shorter one fills have priority
@Override @Override
public int compare(WordIterator i1, WordIterator i2) public int compare(WordIterator i1, WordIterator i2)
{ {

View File

@ -25,6 +25,7 @@ import org.junit.runners.Parameterized;
import java.nio.IntBuffer; import java.nio.IntBuffer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
@ -1182,7 +1183,7 @@ public class ImmutableConciseSetTest
{ {
final int[] ints1 = {33, 100000}; final int[] ints1 = {33, 100000};
final int[] ints2 = {34, 100000}; final int[] ints2 = {34, 100000};
List<Integer> expected = Arrays.asList(100000); List<Integer> expected = Collections.singletonList(100000);
ConciseSet set1 = new ConciseSet(); ConciseSet set1 = new ConciseSet();
for (int i : ints1) { for (int i : ints1) {
@ -1655,7 +1656,7 @@ public class ImmutableConciseSetTest
@Test @Test
public void testIntersectionTerminates() throws Exception public void testIntersectionTerminates() throws Exception
{ {
verifyIntersection(Arrays.<Integer>asList(), Arrays.asList(new ImmutableConciseSet(), new ImmutableConciseSet())); verifyIntersection(Collections.emptyList(), Arrays.asList(new ImmutableConciseSet(), new ImmutableConciseSet()));
} }
private void verifyIntersection(ConciseSet set1, ConciseSet set2) private void verifyIntersection(ConciseSet set1, ConciseSet set2)