From cdd521fb232b06f081993fa5d900024eaf510b69 Mon Sep 17 00:00:00 2001 From: Benedict Jin <1571805553@qq.com> Date: Fri, 19 May 2017 03:26:24 +0800 Subject: [PATCH] Update outdated RLE paper and improve some code refactoring (#4286) * Update outdated RLE paper and improve some code refactoring * Roll back CONCISE's abbreviation --- .../extendedset/intset/AbstractIntSet.java | 3 +- .../druid/extendedset/intset/ConciseSet.java | 43 ++++++------------- .../extendedset/intset/ConciseSetUtils.java | 2 +- .../intset/ImmutableConciseSet.java | 4 +- .../intset/ImmutableConciseSetTest.java | 5 ++- 5 files changed, 21 insertions(+), 36 deletions(-) diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java index 93066c918e7..9593b8ac180 100755 --- a/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java +++ b/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java @@ -126,8 +126,7 @@ public abstract class AbstractIntSet implements IntSet public boolean containsAny(IntSet c) { IntIterator itr = c.iterator(); - boolean res = true; - while (res && itr.hasNext()) { + while (itr.hasNext()) { if (contains(itr.next())) { return true; } diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java index 208f02affc6..fde4dde986f 100755 --- a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java +++ b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java @@ -41,9 +41,9 @@ import java.util.SortedSet; * This class is an instance of {@link IntSet} internally represented by * compressed bitmaps though a RLE (Run-Length Encoding) compression algorithm. * See http - * ://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf for more - * details. + * href="http://ricerca.mat.uniroma3.it/users/dipietro/publications/0020-0190.pdf"> + * http://ricerca.mat.uniroma3.it/users/dipietro/publications/0020-0190.pdf + * for more details. *
* Notice that the iterator by {@link #iterator()} is fail-fast, * similar to most {@link Collection}-derived classes. If the set is @@ -139,8 +139,8 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable * than a power of 2 takes at most O(lg(32)) time. The number of operations * is at most 12 + 9 * ceil(lg(32)). * - * See http://graphics.stanford.edu/~seander/bithacks.html + * See + * http://graphics.stanford.edu/~seander/bithacks.html * * @param n number to divide * @@ -546,10 +546,10 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable */ private void appendLiteral(int word) { - // when we have a zero sequence of the maximum lenght (that is, + // when we have a zero sequence of the maximum length (that is, // 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen // that we try to append a zero literal because the result of the given operation must be an - // empty set. Whitout the following test, we would have increased the + // empty set. Without the following test, we would have increased the // counter of the zero sequence, thus obtaining 0x02000000 that // represents a sequence with the first bit set! if (lastWordIndex == 0 && word == ConciseSetUtils.ALL_ZEROS_LITERAL && words[0] == 0x01FFFFFF) { @@ -1380,10 +1380,10 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable // -- If the literal is made up of all zeros, it definitely // cannot be part of a sequence (otherwise it would not have // been created). Thus, we can create a 1-bit literal word - // -- If there are MAX_LITERAL_LENGHT - 2 set bits, by adding + // -- If there are MAX_LITERAL_LENGTH - 2 set bits, by adding // the new one we potentially allow for a 1's sequence // together with the successive word - // -- If there are MAX_LITERAL_LENGHT - 1 set bits, by adding + // -- If there are MAX_LITERAL_LENGTH - 1 set bits, by adding // the new one we potentially allow for a 1's sequence // together with the successive and/or the preceding words if (!simulateWAH) { @@ -1616,10 +1616,7 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable // completely "covers" the second operator if (isSequenceWithNoBits(this.words[0]) && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { - if (isZeroSequence(this.words[0])) { - return false; - } - return true; + return !isZeroSequence(this.words[0]); } if (isSequenceWithNoBits(other.words[0]) && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { @@ -1702,17 +1699,11 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable // disjoint sets if (isSequenceWithNoBits(this.words[0]) && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { - if (isZeroSequence(this.words[0])) { - return false; - } - return true; + return !isZeroSequence(this.words[0]); } if (isSequenceWithNoBits(other.words[0]) && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { - if (isZeroSequence(other.words[0])) { - return false; - } - return true; + return !isZeroSequence(other.words[0]); } // scan "this" and "other" @@ -1791,17 +1782,11 @@ public class ConciseSet extends AbstractIntSet implements java.io.Serializable // disjoint sets if (isSequenceWithNoBits(this.words[0]) && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { - if (isZeroSequence(this.words[0])) { - return false; - } - return true; + return !isZeroSequence(this.words[0]); } if (isSequenceWithNoBits(other.words[0]) && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { - if (isZeroSequence(other.words[0])) { - return false; - } - return true; + return !isZeroSequence(other.words[0]); } // resulting size diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java index add784c904c..6c3ece0797e 100755 --- a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java +++ b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java @@ -330,7 +330,7 @@ public class ConciseSetUtils public static int clearBitsAfterInLastWord(int lastWord, int lastSetBit) { - return lastWord &= ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)); + return lastWord & (ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit))); } public static int onesUntil(int bit) diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java index c2a54da2b9d..34e82974a77 100755 --- a/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java +++ b/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java @@ -39,7 +39,7 @@ public class ImmutableConciseSet { // Comparison is first by index, then one fills < literals < zero fills // one fills are sorted by length (longer one fills have priority) - // similarily, shorter zero fills have priority + // similarly, shorter zero fills have priority @Override public int compare(WordIterator i1, WordIterator i2) { @@ -82,7 +82,7 @@ public class ImmutableConciseSet { // Comparison is first by index, then zero fills < literals < one fills // zero fills are sorted by length (longer zero fills have priority) - // similarily, shorter one fills have priority + // similarly, shorter one fills have priority @Override public int compare(WordIterator i1, WordIterator i2) { diff --git a/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java b/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java index 2438bdad4e1..b2084b5f0aa 100755 --- a/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java +++ b/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java @@ -25,6 +25,7 @@ import org.junit.runners.Parameterized; import java.nio.IntBuffer; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.NoSuchElementException; @@ -1182,7 +1183,7 @@ public class ImmutableConciseSetTest { final int[] ints1 = {33, 100000}; final int[] ints2 = {34, 100000}; - List