Update outdated RLE paper and improve some code refactoring (#4286)

* Update outdated RLE paper and improve some code refactoring

* Roll back CONCISE's abbreviation
This commit is contained in:
Benedict Jin 2017-05-19 03:26:24 +08:00 committed by Fangjin Yang
parent 8ca7f9410e
commit cdd521fb23
5 changed files with 21 additions and 36 deletions

View File

@ -126,8 +126,7 @@ public abstract class AbstractIntSet implements IntSet
public boolean containsAny(IntSet c)
{
IntIterator itr = c.iterator();
boolean res = true;
while (res && itr.hasNext()) {
while (itr.hasNext()) {
if (contains(itr.next())) {
return true;
}

View File

@ -41,9 +41,9 @@ import java.util.SortedSet;
* This class is an instance of {@link IntSet} internally represented by
* compressed bitmaps though a RLE (Run-Length Encoding) compression algorithm.
* See <a
* href="http://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf">http
* ://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf</a> for more
* details.
* href="http://ricerca.mat.uniroma3.it/users/dipietro/publications/0020-0190.pdf">
* http://ricerca.mat.uniroma3.it/users/dipietro/publications/0020-0190.pdf</a>
* for more details.
* <p/>
* Notice that the iterator by {@link #iterator()} is <i>fail-fast</i>,
* similar to most {@link Collection}-derived classes. If the set is
@ -139,8 +139,8 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
* than a power of 2 takes at most <tt>O(lg(32))</tt> time. The number of operations
* is at most <tt>12 + 9 * ceil(lg(32))</tt>.
* <p/>
* See <a
* href="http://graphics.stanford.edu/~seander/bithacks.html">http://graphics.stanford.edu/~seander/bithacks.html</a>
* See <a href="http://graphics.stanford.edu/~seander/bithacks.html">
* http://graphics.stanford.edu/~seander/bithacks.html</a>
*
* @param n number to divide
*
@ -546,10 +546,10 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
*/
private void appendLiteral(int word)
{
// when we have a zero sequence of the maximum lenght (that is,
// when we have a zero sequence of the maximum length (that is,
// 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen
// that we try to append a zero literal because the result of the given operation must be an
// empty set. Whitout the following test, we would have increased the
// empty set. Without the following test, we would have increased the
// counter of the zero sequence, thus obtaining 0x02000000 that
// represents a sequence with the first bit set!
if (lastWordIndex == 0 && word == ConciseSetUtils.ALL_ZEROS_LITERAL && words[0] == 0x01FFFFFF) {
@ -1380,10 +1380,10 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
// -- If the literal is made up of all zeros, it definitely
// cannot be part of a sequence (otherwise it would not have
// been created). Thus, we can create a 1-bit literal word
// -- If there are MAX_LITERAL_LENGHT - 2 set bits, by adding
// -- If there are MAX_LITERAL_LENGTH - 2 set bits, by adding
// the new one we potentially allow for a 1's sequence
// together with the successive word
// -- If there are MAX_LITERAL_LENGHT - 1 set bits, by adding
// -- If there are MAX_LITERAL_LENGTH - 1 set bits, by adding
// the new one we potentially allow for a 1's sequence
// together with the successive and/or the preceding words
if (!simulateWAH) {
@ -1616,10 +1616,7 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
// completely "covers" the second operator
if (isSequenceWithNoBits(this.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
if (isZeroSequence(this.words[0])) {
return false;
}
return true;
return !isZeroSequence(this.words[0]);
}
if (isSequenceWithNoBits(other.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
@ -1702,17 +1699,11 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
// disjoint sets
if (isSequenceWithNoBits(this.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
if (isZeroSequence(this.words[0])) {
return false;
}
return true;
return !isZeroSequence(this.words[0]);
}
if (isSequenceWithNoBits(other.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
if (isZeroSequence(other.words[0])) {
return false;
}
return true;
return !isZeroSequence(other.words[0]);
}
// scan "this" and "other"
@ -1791,17 +1782,11 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable
// disjoint sets
if (isSequenceWithNoBits(this.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) {
if (isZeroSequence(this.words[0])) {
return false;
}
return true;
return !isZeroSequence(this.words[0]);
}
if (isSequenceWithNoBits(other.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) {
if (isZeroSequence(other.words[0])) {
return false;
}
return true;
return !isZeroSequence(other.words[0]);
}
// resulting size

View File

@ -330,7 +330,7 @@ public class ConciseSetUtils
public static int clearBitsAfterInLastWord(int lastWord, int lastSetBit)
{
return lastWord &= ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit));
return lastWord & (ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)));
}
public static int onesUntil(int bit)

View File

@ -39,7 +39,7 @@ public class ImmutableConciseSet
{
// Comparison is first by index, then one fills < literals < zero fills
// one fills are sorted by length (longer one fills have priority)
// similarily, shorter zero fills have priority
// similarly, shorter zero fills have priority
@Override
public int compare(WordIterator i1, WordIterator i2)
{
@ -82,7 +82,7 @@ public class ImmutableConciseSet
{
// Comparison is first by index, then zero fills < literals < one fills
// zero fills are sorted by length (longer zero fills have priority)
// similarily, shorter one fills have priority
// similarly, shorter one fills have priority
@Override
public int compare(WordIterator i1, WordIterator i2)
{

View File

@ -25,6 +25,7 @@ import org.junit.runners.Parameterized;
import java.nio.IntBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.NoSuchElementException;
@ -1182,7 +1183,7 @@ public class ImmutableConciseSetTest
{
final int[] ints1 = {33, 100000};
final int[] ints2 = {34, 100000};
List<Integer> expected = Arrays.asList(100000);
List<Integer> expected = Collections.singletonList(100000);
ConciseSet set1 = new ConciseSet();
for (int i : ints1) {
@ -1655,7 +1656,7 @@ public class ImmutableConciseSetTest
@Test
public void testIntersectionTerminates() throws Exception
{
verifyIntersection(Arrays.<Integer>asList(), Arrays.asList(new ImmutableConciseSet(), new ImmutableConciseSet()));
verifyIntersection(Collections.emptyList(), Arrays.asList(new ImmutableConciseSet(), new ImmutableConciseSet()));
}
private void verifyIntersection(ConciseSet set1, ConciseSet set2)