diff --git a/NOTICE b/NOTICE index cb8a439535b..285e571c796 100644 --- a/NOTICE +++ b/NOTICE @@ -50,3 +50,22 @@ This product contains SQL query planning code adapted from Apache Calcite * https://github.com/apache/calcite/blob/master/LICENSE (Apache License, Version 2.0) * HOMEPAGE: * https://calcite.apache.org/ + +This product contains a modified version of Metamarkets extendedset library + * LICENSE: + * https://github.com/metamx/extendedset/blob/master/LICENSE (Apache License, Version 2.0) + * HOMEPAGE: + * https://github.com/metamx/extendedset + * COMMIT TAG: + * https://github.com/metamx/extendedset/commit/c9d647d + +This product contains a modified version of Alessandro Colantonio's CONCISE +(COmpressed 'N' Composable Integer SEt) library, extending the functionality of +ConciseSet to use IntBuffers. + * (c) 2010 Alessandro Colantonio + * + * + * LICENSE: + * Apache License, Version 2.0 + * HOMEPAGE: + * https://sourceforge.net/projects/concise/ diff --git a/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java index adba83188fe..37196edae8e 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/BoundFilterBenchmark.java @@ -55,7 +55,7 @@ import io.druid.segment.data.Indexed; import io.druid.segment.data.RoaringBitmapSerdeFactory; import io.druid.segment.filter.BoundFilter; import io.druid.segment.serde.BitmapIndexColumnPartSupplier; -import it.uniroma3.mat.extendedset.intset.ConciseSetUtils; +import io.druid.extendedset.intset.ConciseSetUtils; @State(Scope.Benchmark) @Fork(value = 1) diff --git a/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java b/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java index feb8f0f4ba8..20625eace3e 100644 --- a/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java +++ b/benchmarks/src/main/java/io/druid/benchmark/ConciseComplementBenchmark.java @@ -31,7 +31,7 @@ import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.infra.Blackhole; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; @State(Scope.Benchmark) public class ConciseComplementBenchmark diff --git a/bytebuffer-collections/pom.xml b/bytebuffer-collections/pom.xml index 2f14b2f06fb..c8d4090fd1f 100755 --- a/bytebuffer-collections/pom.xml +++ b/bytebuffer-collections/pom.xml @@ -34,9 +34,9 @@ - com.metamx + io.druid extendedset - 1.3.10 + ${project.parent.version} com.google.guava diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java index 679f5331682..3efdc51d4f6 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/ConciseBitmapFactory.java @@ -22,7 +22,7 @@ package io.druid.collections.bitmap; import java.nio.ByteBuffer; import java.util.Iterator; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; /** * As the name suggests, this class instantiates bitmaps of the types diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java index 6fe730ce0f1..6888880e9a0 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseBitmap.java @@ -25,9 +25,9 @@ import org.roaringbitmap.IntIterator; import com.google.common.primitives.Ints; -import it.uniroma3.mat.extendedset.intset.ConciseSet; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; -import it.uniroma3.mat.extendedset.intset.IntSet; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.IntSet; public class WrappedConciseBitmap implements MutableBitmap { diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java index c357cefdebd..3d636faa161 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedConciseIntIterator.java @@ -21,7 +21,7 @@ package io.druid.collections.bitmap; import org.roaringbitmap.IntIterator; -import it.uniroma3.mat.extendedset.intset.IntSet; +import io.druid.extendedset.intset.IntSet; /** */ diff --git a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java index 5fe4515b89f..9a93b391ede 100755 --- a/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java +++ b/bytebuffer-collections/src/main/java/io/druid/collections/bitmap/WrappedImmutableConciseBitmap.java @@ -24,8 +24,8 @@ import java.nio.ByteBuffer; import org.roaringbitmap.IntIterator; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; -import it.uniroma3.mat.extendedset.intset.IntSet; +import io.druid.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.IntSet; public class WrappedImmutableConciseBitmap implements ImmutableBitmap { diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java index 3366319adaa..617e4ea6de0 100755 --- a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/BitmapBenchmark.java @@ -38,7 +38,7 @@ import com.carrotsearch.junitbenchmarks.BenchmarkRule; import com.carrotsearch.junitbenchmarks.Clock; import com.google.common.collect.Lists; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; @BenchmarkOptions(clock = Clock.NANO_TIME, benchmarkRounds = 50) diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java index 7ab0ce5301e..06599bc7c20 100755 --- a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/ConciseBitmapFactoryTest.java @@ -29,8 +29,8 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import it.uniroma3.mat.extendedset.intset.ConciseSet; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; import junit.framework.Assert; public class ConciseBitmapFactoryTest diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java index 84c170556cd..5549c85dac0 100755 --- a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/RangeBitmapBenchmarkTest.java @@ -29,8 +29,8 @@ import com.carrotsearch.junitbenchmarks.annotation.BenchmarkHistoryChart; import com.carrotsearch.junitbenchmarks.annotation.LabelType; import io.druid.test.annotation.Benchmark; -import it.uniroma3.mat.extendedset.intset.ConciseSet; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; @Category({Benchmark.class}) @BenchmarkHistoryChart(labelWith = LabelType.CUSTOM_KEY, maxRuns = 20) diff --git a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java index a88e3133fd5..6707a6c015a 100755 --- a/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java +++ b/bytebuffer-collections/src/test/java/io/druid/collections/bitmap/UniformBitmapBenchmarkTest.java @@ -29,8 +29,8 @@ import com.carrotsearch.junitbenchmarks.annotation.BenchmarkHistoryChart; import com.carrotsearch.junitbenchmarks.annotation.LabelType; import io.druid.test.annotation.Benchmark; -import it.uniroma3.mat.extendedset.intset.ConciseSet; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; @Category({Benchmark.class}) @BenchmarkHistoryChart(labelWith = LabelType.CUSTOM_KEY, maxRuns = 20) diff --git a/codestyle/checkstyle-suppressions.xml b/codestyle/checkstyle-suppressions.xml index e53dc96d6e2..30eca237c0b 100644 --- a/codestyle/checkstyle-suppressions.xml +++ b/codestyle/checkstyle-suppressions.xml @@ -33,4 +33,7 @@ + + + diff --git a/extendedset/pom.xml b/extendedset/pom.xml new file mode 100755 index 00000000000..eae8bc41e08 --- /dev/null +++ b/extendedset/pom.xml @@ -0,0 +1,54 @@ + + + + + 4.0.0 + + extendedset + extendedset + + Implementation of CONCISE (COmpressed 'N" Composable Integer SEt) bit map compression algorithm by Alessandro + Colantonio with some enhanced features - http://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf + + + + io.druid + druid + 0.9.3-SNAPSHOT + + + + + com.google.guava + guava + 16.0.1 + + + + + junit + junit + 4.8.1 + test + + + + diff --git a/extendedset/src/main/java/io/druid/extendedset/AbstractExtendedSet.java b/extendedset/src/main/java/io/druid/extendedset/AbstractExtendedSet.java new file mode 100755 index 00000000000..c47eb79bc0d --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/AbstractExtendedSet.java @@ -0,0 +1,1432 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset; + + +import java.util.AbstractCollection; +import java.util.AbstractSet; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.NoSuchElementException; + +/** + * This class provides a skeletal implementation of the {@link ExtendedSet} + * interface to minimize the effort required to implement this interface. + *

+ * The process of implementing a set by extending this class is very similar, + * for example, to that of implementing a {@link Collection} by extending + * {@link AbstractCollection}. + * + * @param the type of elements maintained by this set + * + * @author Alessandro Colantonio + * @version $Id: AbstractExtendedSet.java 157 2011-11-14 14:25:15Z cocciasik $ + */ +public abstract class AbstractExtendedSet extends AbstractSet implements ExtendedSet +{ + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet intersection(Collection other) + { + ExtendedSet clone = clone(); + clone.retainAll(other); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet union(Collection other) + { + ExtendedSet clone = clone(); + clone.addAll(other); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet difference(Collection other) + { + ExtendedSet clone = clone(); + clone.removeAll(other); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet symmetricDifference(Collection other) + { + ExtendedSet res = union(other); + res.removeAll(intersection(other)); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet complemented() + { + ExtendedSet clone = clone(); + clone.complement(); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(Collection other) + { + return other == null || other.isEmpty() || intersectionSize(other) > 0; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(Collection other, int minElements) + { + if (minElements < 1) { + throw new IllegalArgumentException(); + } + return intersectionSize(other) >= minElements; + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(Collection other) + { + if (other == null || other.isEmpty() || isEmpty()) { + return 0; + } + return intersection(other).size(); + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(Collection other) + { + return other == null ? size() : size() + other.size() - intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(Collection other) + { + return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(Collection other) + { + return other == null ? size() : size() - intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + return complemented().size(); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract ExtendedSet empty(); + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet headSet(T toElement) + { + return new ExtendedSubSet(null, toElement); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet subSet(T fromElement, T toElement) + { + return new ExtendedSubSet(fromElement, toElement); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet tailSet(T fromElement) + { + return new ExtendedSubSet(fromElement, null); + } + + /** + * {@inheritDoc} + */ + @Override + public T first() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return iterator().next(); + } + + /** + * {@inheritDoc} + */ + @Override + public T last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return descendingIterator().next(); + } + + /** + * {@inheritDoc} + *

+ * NOTE: When overriding this method, please note that + * Object.clone() is much slower then performing + * new and "manually" copying data! + */ + @SuppressWarnings("unchecked") + @Override + public ExtendedSet clone() + { + try { + return (ExtendedSet) super.clone(); + } + catch (CloneNotSupportedException e) { + throw new InternalError(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public abstract double bitmapCompressionRatio(); + + /** + * {@inheritDoc} + */ + @Override + public abstract double collectionCompressionRatio(); + + /** + * {@inheritDoc} + */ + @Override + @SuppressWarnings("unchecked") + public ExtendedIterator descendingIterator() + { + // used to compare items + Comparator tmpComp = AbstractExtendedSet.this.comparator(); + if (tmpComp == null) { + tmpComp = new Comparator() + { + @Override + public int compare(T o1, T o2) + { + return ((Comparable) o1).compareTo(o2); + } + }; + } + final Comparator comp = tmpComp; + + return new ExtendedIterator() + { + // iterator from last element + private final ListIterator itr = new ArrayList(AbstractExtendedSet.this) + .listIterator(AbstractExtendedSet.this.size()); + + @Override + public boolean hasNext() + { + return itr.hasPrevious(); + } + + @Override + public T next() + { + return itr.previous(); + } + + @Override + public void skipAllBefore(T element) + { + // iterate until the element is found + while (itr.hasPrevious()) { + int res = comp.compare(itr.previous(), element); + + // the element has not been found, thus the next call to + // itr.previous() will provide the right value + if (res < 0) { + return; + } + + // the element has been found. Hence, we have to get back + // to make itr.previous() provide the right value + if (res == 0) { + itr.next(); + return; + } + } + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public Iterable descending() + { + return new Iterable() + { + @Override + public Iterator iterator() + { + return descendingIterator(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public List> powerSet() + { + return powerSet(1, Integer.MAX_VALUE); + } + + /** + * {@inheritDoc} + */ + @Override + public List> powerSet(int min, int max) + { + if (min < 1 || max < min) { + throw new IllegalArgumentException(); + } + + // special cases + List> res = new ArrayList>(); + if (size() < min) { + return res; + } + if (size() == min) { + res.add(this.clone()); + return res; + } + if (size() == min + 1) { + for (T item : this.descending()) { + ExtendedSet set = this.clone(); + set.remove(item); + res.add(set); + } + if (max > min) { + res.add(this.clone()); + } + return res; + } + + // the first level contains only one prefix made up of all 1-subsets + List>> level = new ArrayList>>(); + level.add(new ArrayList>()); + for (T item : this) { + ExtendedSet single = this.empty(); + single.add(item); + level.get(0).add(single); + } + if (min == 1) { + res.addAll(level.get(0)); + } + + // all combinations + int l = 2; + while (!level.isEmpty() && l <= max) { + List>> newLevel = new ArrayList>>(); + for (List> prefix : level) { + for (int i = 0; i < prefix.size() - 1; i++) { + List> newPrefix = new ArrayList>(); + for (int j = i + 1; j < prefix.size(); j++) { + ExtendedSet x = prefix.get(i).clone(); + x.add(prefix.get(j).last()); + newPrefix.add(x); + if (l >= min) { + res.add(x); + } + } + if (newPrefix.size() > 1) { + newLevel.add(newPrefix); + } + } + } + level = newLevel; + l++; + } + + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize() + { + return isEmpty() ? 0 : (int) Math.pow(2, size()) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize(int min, int max) + { + if (min < 1 || max < min) { + throw new IllegalArgumentException(); + } + final int size = size(); + + // special cases + if (size < min) { + return 0; + } + if (size == min) { + return 1; + } + + /* + * Compute the sum of binomial coefficients ranging from (size choose + * max) to (size choose min) using dynamic programming + */ + + // trivial cases + max = Math.min(size, max); + if (max == min && (max == 0 || max == size)) { + return 1; + } + + // compute all binomial coefficients for "n" + int[] b = new int[size + 1]; + for (int i = 0; i <= size; i++) { + b[i] = 1; + } + for (int i = 1; i <= size; i++) { + for (int j = i - 1; j > 0; j--) { + b[j] += b[j - 1]; + } + } + + // sum binomial coefficients + int res = 0; + for (int i = min; i <= max; i++) { + res += b[i]; + } + return res; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public int compareTo(ExtendedSet o) + { + Iterator thisIterator = this.descendingIterator(); + Iterator otherIterator = o.descendingIterator(); + while (thisIterator.hasNext() && otherIterator.hasNext()) { + T thisItem = thisIterator.next(); + T otherItem = otherIterator.next(); + int res = ((Comparable) thisItem).compareTo(otherItem); + if (res != 0) { + return res; + } + } + return thisIterator.hasNext() ? 1 : (otherIterator.hasNext() ? -1 : 0); + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(T from, T to) + { + ExtendedSet toAdd = empty(); + toAdd.add(to); + toAdd.complement(); + toAdd.add(to); + + ExtendedSet toRemove = empty(); + toRemove.add(from); + toRemove.complement(); + + toAdd.removeAll(toRemove); + + this.addAll(toAdd); + } + + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public void clear(T from, T to) + { + ExtendedIterator itr = iterator(); + itr.skipAllBefore(from); + while (itr.hasNext()) { + if (((Comparable) itr.next()).compareTo(to) < 0) { + itr.remove(); + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(T e) + { + if (!add(e)) { + remove(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public T get(int i) + { + int size = size(); + if (i < 0 || i >= size) { + throw new IndexOutOfBoundsException(); + } + + Iterator itr; + if (i < (size / 2)) { + itr = iterator(); + for (int j = 0; j <= i - 1; j++) { + itr.next(); + } + } else { + itr = descendingIterator(); + for (int j = size - 1; j >= i + 1; j--) { + itr.next(); + } + } + return itr.next(); + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(T e) + { + Iterator itr = iterator(); + int i = 0; + while (itr.hasNext()) { + if (itr.next().equals(e)) { + return i; + } + i++; + } + return -1; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet unmodifiable() + { + return new UnmodifiableExtendedSet(); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract ExtendedIterator iterator(); + + /** + * {@inheritDoc} + */ + @Override + public double jaccardSimilarity(ExtendedSet other) + { + if (isEmpty() && other.isEmpty()) { + return 1D; + } + int inters = intersectionSize(other); + return (double) inters / (size() + other.size() - inters); + } + + /** + * {@inheritDoc} + */ + @Override + public double jaccardDistance(ExtendedSet other) + { + return 1D - jaccardSimilarity(other); + } + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardSimilarity(ExtendedSet other) + { + if (isEmpty() && other.isEmpty()) { + return 1D; + } + ExtendedSet inters = intersection(other); + double intersSum = 0D; + for (T t : inters) { + if (t instanceof Integer) { + intersSum += (Integer) t; + } else if (t instanceof Double) { + intersSum += (Double) t; + } else if (t instanceof Float) { + intersSum += (Float) t; + } else if (t instanceof Byte) { + intersSum += (Byte) t; + } else if (t instanceof Long) { + intersSum += (Long) t; + } else if (t instanceof Short) { + intersSum += (Short) t; + } else { + throw new IllegalArgumentException("A collection of numbers is required"); + } + } + + ExtendedSet symmetricDiff = symmetricDifference(other); + double symmetricDiffSum = 0D; + for (T t : symmetricDiff) { + if (t instanceof Integer) { + symmetricDiffSum += (Integer) t; + } else if (t instanceof Double) { + symmetricDiffSum += (Double) t; + } else if (t instanceof Float) { + symmetricDiffSum += (Float) t; + } else if (t instanceof Byte) { + symmetricDiffSum += (Byte) t; + } else if (t instanceof Long) { + symmetricDiffSum += (Long) t; + } else if (t instanceof Short) { + symmetricDiffSum += (Short) t; + } else { + throw new IllegalArgumentException("A collection of numbers is required"); + } + } + + return intersSum / (intersSum + symmetricDiffSum); + } + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardDistance(ExtendedSet other) + { + return 1D - weightedJaccardSimilarity(other); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet convert(Object... e) + { + if (e == null) { + return empty(); + } + return convert(Arrays.asList(e)); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public ExtendedSet convert(Collection c) + { + ExtendedSet res = empty(); + res.addAll((Collection) c); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return toString(); + } + + /** + * Base class for {@link ExtendedSubSet} and {@link UnmodifiableExtendedSet} + */ + protected abstract class FilteredSet implements ExtendedSet + { + /** + * @return the container instance, namely the "internal" representation + */ + protected abstract ExtendedSet raw(); + + /* + * Converter methods that allows for good performances with collection + * operations by directly working on internal representation + */ + @Override + public ExtendedSet convert(Collection c) + { + if (c instanceof AbstractExtendedSet.FilteredSet) { + convert(((AbstractExtendedSet.FilteredSet) c).raw()); + } + return raw().convert(c); + } + + @Override + public ExtendedSet convert(Object... e) + { + return raw().convert(e); + } + + /* + * Methods that directly apply to container instance + */ + @Override + public ExtendedSet clone() {return AbstractExtendedSet.this.clone();} + + @Override + public ExtendedSet empty() {return AbstractExtendedSet.this.empty();} + + @Override + public Comparator comparator() {return AbstractExtendedSet.this.comparator();} + + /* + * Read-only methods + */ + @Override + public ExtendedSet unmodifiable() {return raw().unmodifiable();} + + @Override + public ExtendedIterator iterator() {return raw().iterator();} + + @Override + public ExtendedIterator descendingIterator() {return raw().descendingIterator();} + + @Override + public boolean isEmpty() {return raw().isEmpty();} + + @Override + public boolean equals(Object o) {return raw().equals(o);} + + @Override + public int hashCode() {return raw().hashCode();} + + @Override + public int compareTo(ExtendedSet o) {return raw().compareTo(o);} + + @Override + public T first() {return raw().first();} + + @Override + public T last() {return raw().last();} + + @Override + public double bitmapCompressionRatio() {return raw().bitmapCompressionRatio();} + + @Override + public double collectionCompressionRatio() {return raw().collectionCompressionRatio();} + + @Override + public List> powerSet() {return raw().powerSet();} + + @Override + public List> powerSet(int mins, int maxs) {return raw().powerSet(mins, maxs);} + + @Override + public int powerSetSize() {return raw().powerSetSize();} + + @Override + public int powerSetSize(int mins, int maxs) {return raw().powerSetSize(mins, maxs);} + + @Override + public Object[] toArray() {return raw().toArray();} + + @Override + public X[] toArray(X[] a) {return raw().toArray(a);} + + @Override + public String toString() {return raw().toString();} + + @Override + public ExtendedSet complemented() {return raw().complemented();} + + @Override + public int complementSize() {return raw().complementSize();} + + @Override + public int size() {return raw().size();} + + @Override + public boolean contains(Object o) {return raw().contains(o);} + + @Override + public Iterable descending() {return raw().descending();} + + @Override + public String debugInfo() {return raw().debugInfo();} + + @Override + public T get(int i) {return raw().get(i);} + + @Override + public int indexOf(T e) {return raw().indexOf(e);} + + /* + * Methods that requires a call to convert() to assure good performances + */ + @Override + public double jaccardDistance(ExtendedSet other) {return raw().jaccardDistance(convert(other));} + + @Override + public double jaccardSimilarity(ExtendedSet other) {return raw().jaccardSimilarity(convert(other));} + + @Override + public double weightedJaccardDistance(ExtendedSet other) {return raw().weightedJaccardDistance(convert(other));} + + @Override + public double weightedJaccardSimilarity(ExtendedSet other) {return raw().weightedJaccardSimilarity(convert(other));} + + @Override + public ExtendedSet difference(Collection other) {return raw().difference(convert(other));} + + @Override + public ExtendedSet symmetricDifference(Collection other) + { + return raw().symmetricDifference(convert(other)); + } + + @Override + public ExtendedSet intersection(Collection other) {return raw().intersection(convert(other));} + + @Override + public ExtendedSet union(Collection other) {return raw().union(convert(other));} + + @Override + public int intersectionSize(Collection other) {return raw().intersectionSize(convert(other));} + + @Override + public int differenceSize(Collection other) {return raw().differenceSize(convert(other));} + + @Override + public int unionSize(Collection other) {return raw().unionSize(convert(other));} + + @Override + public int symmetricDifferenceSize(Collection other) + { + return raw().symmetricDifferenceSize(convert(other)); + } + + @Override + public boolean containsAll(Collection c) {return raw().containsAll(convert(c));} + + @Override + public boolean containsAny(Collection other) {return raw().containsAny(convert(other));} + + @Override + public boolean containsAtLeast( + Collection other, + int minElements + ) + {return raw().containsAtLeast(convert(other), minElements);} + } + + /** + * Read-only view of the set. + *

+ * Note that it extends {@link AbstractExtendedSet} instead of implementing + * {@link ExtendedSet} because of the methods {@link #tailSet(Object)}, + * {@link #headSet(Object)}, and {@link #subSet(Object, Object)}. + */ + protected class UnmodifiableExtendedSet extends AbstractExtendedSet.FilteredSet + { + // exception message when writing operations are performed on {@link #unmodifiable()} + private final static String UNSUPPORTED_MSG = "The class is read-only!"; + + /* + * Unsupported writing methods + */ + @Override + public boolean add(T e) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public boolean addAll(Collection c) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public boolean remove(Object o) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public boolean removeAll(Collection c) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public boolean retainAll(Collection c) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public void clear() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public void clear(T from, T to) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public void fill(T from, T to) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public void complement() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + @Override + public void flip(T e) {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + + /* + * Special purpose methods + */ + + // create new iterators where the remove() operation is not permitted + @Override + public ExtendedIterator iterator() + { + final ExtendedIterator itr = AbstractExtendedSet.this.iterator(); + return new ExtendedIterator() + { + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public T next() {return itr.next();} + + @Override + public void skipAllBefore(T element) {itr.skipAllBefore(element);} + + @Override + public void remove() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + }; + } + + @Override + public ExtendedIterator descendingIterator() + { + final ExtendedIterator itr = AbstractExtendedSet.this.descendingIterator(); + return new ExtendedIterator() + { + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public T next() {return itr.next();} + + @Override + public void skipAllBefore(T element) {itr.skipAllBefore(element);} + + @Override + public void remove() {throw new UnsupportedOperationException(UNSUPPORTED_MSG);} + }; + } + + /** + * Returns a read-only subset + */ + // TODO: There is a known bug. Indeed, this implementation does not work + // since modifications to the read-write set are not reflected to the + // read-only set. + private ExtendedSet unmodifiableSubSet(T min, T max) + { + ExtendedSet res; + ExtendedSet range = AbstractExtendedSet.this.empty(); + if (min != null && max != null) { + range.fill(min, max); + range.remove(max); + res = AbstractExtendedSet.this.intersection(range).unmodifiable(); + } else if (max != null) { + range.add(max); + range.complement(); + res = AbstractExtendedSet.this.intersection(range).unmodifiable(); + } else { + range.add(min); + range.complement(); + res = AbstractExtendedSet.this.difference(range).unmodifiable(); + } + return res; + } + + // subset operations must be read-only + @Override + public ExtendedSet headSet(T toElement) {return unmodifiableSubSet(null, toElement);} + + @Override + public ExtendedSet subSet(T fromElement, T toElement) {return unmodifiableSubSet(fromElement, toElement);} + + @Override + public ExtendedSet tailSet(T fromElement) {return unmodifiableSubSet(fromElement, null);} + + @Override + public ExtendedSet unmodifiable() + { + // useless to create another instance + return this; + } + + @Override + protected ExtendedSet raw() + { + return AbstractExtendedSet.this; + } + } + + /** + * Used by {@link AbstractExtendedSet#headSet(T)} , {@link AbstractExtendedSet#tailSet(T)} and {@link AbstractExtendedSet#subSet(T, T)} to offer a restricted view of the entire set + */ + protected class ExtendedSubSet extends AbstractExtendedSet.FilteredSet + { + /** + * Minimun allowed element (included) and maximum allowed element + * (excluded) + */ + private final T min; + + /** + * Minimun allowed element (included) and maximum allowed element + * (excluded) + */ + private final T max; + + /** + * When max != null, it contains all elements from {@link #min} to {@link #max} - 1. Otherwise, it contains all the elements strictly below {@link #min} + * + * @uml.property name="range" + * @uml.associationEnd + */ + private final ExtendedSet range; + /** + * Comparator for elements of type T + */ + private final Comparator localComparator; + + + + /* + * PRIVATE UTILITY METHODS + */ + + // initialize the comparator + { + final Comparator c = AbstractExtendedSet.this.comparator(); + if (c != null) { + localComparator = c; + } else { + localComparator = new Comparator() + { + @SuppressWarnings("unchecked") + @Override + public int compare(T o1, T o2) + { + return ((Comparable) o1).compareTo(o2); + } + }; + } + } + + /** + * Creates the subset + * + * @param min minimun allowed element (included) + * @param max maximum allowed element (excluded) + */ + public ExtendedSubSet(T min, T max) + { + if (min == null && max == null) { + throw new IllegalArgumentException(); + } + + if (min != null && max != null + && localComparator.compare(min, max) > 0) { + throw new IllegalArgumentException("min > max"); + } + + this.min = min; + this.max = max; + + // add all elements that are strictly less than "max" + range = AbstractExtendedSet.this.empty(); + if (min != null && max != null) { + range.fill(min, max); + range.remove(max); + } else if (max != null) { + range.add(max); + range.complement(); + } else { + range.add(min); + range.complement(); + } + } + + /** + * Checks if a given set is completely contained within {@link #min} and + * {@link #max} + * + * @param other given set + * + * @return true if the given set is completely contained + * within {@link #min} and {@link #max} + */ + private boolean isInRange(ExtendedSet other) + { + return other.isEmpty() || + ((max == null || localComparator.compare(other.last(), max) < 0) + && (min == null || localComparator.compare(other.first(), min) >= 0)); + } + + /** + * Checks if a given element is completely contained within {@link #min} + * and {@link #max} + * + * @param e given element + * + * @return true if the given element is completely + * contained within {@link #min} and {@link #max} + */ + @SuppressWarnings("unchecked") + private boolean isInRange(Object e) + { + return (max == null || localComparator.compare((T) e, max) < 0) + && (min == null || localComparator.compare((T) e, min) >= 0); + } + + /** + * Generates a set that represent a subview of the given set, namely + * elements from {@link #min} (included) to {@link #max} (excluded) + * + * @param toFilter given set + * + * @return the subview + */ + private ExtendedSet filter(ExtendedSet toFilter) + { + if (isInRange(toFilter)) { + return toFilter; + } + if (max != null) { + return toFilter.intersection(range); + } + return toFilter.difference(range); + } + + + @Override + protected ExtendedSet raw() + { + return filter(AbstractExtendedSet.this); + } + + + + /* + * PUBLIC METHODS + */ + + @Override + public ExtendedSet headSet(T toElement) + { + if (localComparator.compare(toElement, max) > 0) { + throw new IllegalArgumentException(); + } + return AbstractExtendedSet.this.new ExtendedSubSet(min, toElement); + } + + @Override + public ExtendedSet subSet(T fromElement, T toElement) + { + if (localComparator.compare(fromElement, min) < 0 + || localComparator.compare(toElement, max) > 0) { + throw new IllegalArgumentException(); + } + return AbstractExtendedSet.this.new ExtendedSubSet(fromElement, toElement); + } + + @Override + public ExtendedSet tailSet(T fromElement) + { + if (localComparator.compare(fromElement, min) < 0) { + throw new IllegalArgumentException(); + } + return AbstractExtendedSet.this.new ExtendedSubSet(fromElement, max); + } + + @Override + public boolean addAll(Collection c) + { + if (c == null) { + return false; + } + ExtendedSet other = convert(c); + if (!isInRange(other)) { + throw new IllegalArgumentException(); + } + return AbstractExtendedSet.this.addAll(other); + } + + @Override + public boolean removeAll(Collection c) + { + if (c == null) { + return false; + } + return AbstractExtendedSet.this.removeAll(filter(convert(c))); + } + + @Override + public boolean retainAll(Collection c) + { + if (c == null) { + return false; + } + ExtendedSet other = convert(c); + + if (isInRange(AbstractExtendedSet.this)) { + return AbstractExtendedSet.this.retainAll(other); + } + + int sizeBefore = AbstractExtendedSet.this.size(); + ExtendedSet res = AbstractExtendedSet.this.intersection(other); + clear(); + AbstractExtendedSet.this.addAll(res); + return AbstractExtendedSet.this.size() != sizeBefore; + } + + @Override + public boolean containsAll(Collection c) + { + if (c == null) { + return false; + } + ExtendedSet other = convert(c); + return isInRange(other) && AbstractExtendedSet.this.containsAll(other); + } + + @Override + public boolean add(T e) + { + if (!isInRange(e)) { + throw new IllegalArgumentException(); + } + return AbstractExtendedSet.this.add(e); + } + + @Override + public void clear() + { + if (isInRange(AbstractExtendedSet.this)) { + AbstractExtendedSet.this.clear(); + } else if (max != null) { + AbstractExtendedSet.this.removeAll(range); + } else { + AbstractExtendedSet.this.retainAll(range); + } + } + + @Override + public boolean contains(Object o) + { + return o != null && isInRange(o) && AbstractExtendedSet.this.contains(o); + } + + @Override + public boolean remove(Object o) + { + return o != null && isInRange(o) && AbstractExtendedSet.this.remove(o); + } + + @Override + public int size() + { + if (isInRange(AbstractExtendedSet.this)) { + return AbstractExtendedSet.this.size(); + } + if (max != null) { + return AbstractExtendedSet.this.intersectionSize(range); + } + return AbstractExtendedSet.this.differenceSize(range); + } + + @Override + public void complement() + { + ExtendedSet c = complemented(); + clear(); + AbstractExtendedSet.this.addAll(c); + } + + @Override + public int complementSize() + { + return complemented().size(); + } + + @Override + public ExtendedSet complemented() + { + return filter(raw().complemented()); + } + + @Override + public String debugInfo() + { + return String.format("min = %s, max = %s\nmask = %s\nelements = %s", + min.toString(), max.toString(), range.debugInfo(), AbstractExtendedSet.this.toString() + ); + } + + @Override + public void clear(T from, T to) + { + ExtendedSet toRemove = empty(); + toRemove.fill(from, to); + removeAll(toRemove); + } + + @Override + public boolean containsAny(Collection other) + { + return AbstractExtendedSet.this.containsAny(filter(convert(other))); + } + + @Override + public boolean containsAtLeast(Collection other, int minElements) + { + return AbstractExtendedSet.this.containsAtLeast(filter(convert(other)), minElements); + } + + @Override + public Iterable descending() + { + return new Iterable() + { + @Override + public Iterator iterator() + { + return descendingIterator(); + } + }; + } + + @Override + public void fill(T from, T to) + { + if (!isInRange(from) || !isInRange(to)) { + throw new IllegalArgumentException(); + } + AbstractExtendedSet.this.fill(from, to); + } + + @Override + public void flip(T e) + { + if (!isInRange(e)) { + throw new IllegalArgumentException(); + } + AbstractExtendedSet.this.flip(e); + } + + @Override + public T get(int i) + { + int minIndex = 0; + if (min != null) { + minIndex = AbstractExtendedSet.this.indexOf(min); + } + T r = AbstractExtendedSet.this.get(minIndex + i); + if (!isInRange(r)) { + throw new IllegalArgumentException(); + } + return r; + } + + @Override + public int indexOf(T e) + { + if (!isInRange(e)) { + throw new IllegalArgumentException(); + } + int minIndex = 0; + if (min != null) { + minIndex = AbstractExtendedSet.this.indexOf(min); + } + return AbstractExtendedSet.this.indexOf(e) - minIndex; + } + + @Override + public ExtendedSet clone() + { + return raw(); + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/ExtendedSet.java b/extendedset/src/main/java/io/druid/extendedset/ExtendedSet.java new file mode 100755 index 00000000000..beaa52368ad --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/ExtendedSet.java @@ -0,0 +1,592 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset; + + +import io.druid.extendedset.intset.ArraySet; +import io.druid.extendedset.intset.IntSet; +import io.druid.extendedset.wrappers.GenericExtendedSet; +import io.druid.extendedset.wrappers.IndexedSet; +import io.druid.extendedset.wrappers.IntegerSet; +import io.druid.extendedset.wrappers.LongSet; +import io.druid.extendedset.wrappers.matrix.PairSet; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.SortedSet; + +/** + * An interface which extends {@link SortedSet} by adding + * intersection/union/difference and other set operations. + * + * @param the type of elements maintained by this set + * + * @author Alessandro Colantonio + * @version $Id: ExtendedSet.java 140 2011-02-07 21:30:29Z cocciasik $ + * @see AbstractExtendedSet + * @see IndexedSet + * @see GenericExtendedSet + * @see ArraySet + * @see IntegerSet + * @see LongSet + * @see PairSet + */ +public interface ExtendedSet extends SortedSet, Cloneable, Comparable> +{ + /** + * Generates the intersection set + * + * @param other {@link ExtendedSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #retainAll(java.util.Collection) + */ + public ExtendedSet intersection(Collection other); + + /** + * Generates the union set + * + * @param other {@link ExtendedSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #addAll(java.util.Collection) + */ + public ExtendedSet union(Collection other); + + /** + * Generates the difference set + * + * @param other {@link ExtendedSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #removeAll(java.util.Collection) + */ + public ExtendedSet difference(Collection other); + + /** + * Generates the symmetric difference set + * + * @param other {@link ExtendedSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #flip(Object) + */ + public ExtendedSet symmetricDifference(Collection other); + + /** + * Generates the complement set. The returned set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @return the complement set + * + * @see ExtendedSet#complement() + */ + public ExtendedSet complemented(); + + /** + * Complements the current set. The modified set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @see ExtendedSet#complemented() + */ + public void complement(); + + /** + * Returns true if the specified {@link Collection} instance + * contains any elements that are also contained within this + * {@link ExtendedSet} instance + * + * @param other {@link ExtendedSet} to intersect with + * + * @return a boolean indicating whether this {@link ExtendedSet} intersects + * the specified {@link ExtendedSet}. + */ + public boolean containsAny(Collection other); + + /** + * Returns true if the specified {@link Collection} instance + * contains at least minElements elements that are also + * contained within this {@link ExtendedSet} instance + * + * @param other {@link Collection} instance to intersect with + * @param minElements minimum number of elements to be contained within this + * {@link ExtendedSet} instance + * + * @return a boolean indicating whether this {@link ExtendedSet} intersects + * the specified {@link Collection}. + * + * @throws IllegalArgumentException if minElements < 1 + */ + public boolean containsAtLeast(Collection other, int minElements); + + /** + * Computes the intersection set size. + *

+ * This is faster than calling {@link #intersection(Collection)} and + * then {@link #size()} + * + * @param other {@link Collection} instance that represents the right + * operand + * + * @return the size + */ + public int intersectionSize(Collection other); + + /** + * Computes the union set size. + *

+ * This is faster than calling {@link #union(Collection)} and then + * {@link #size()} + * + * @param other {@link Collection} instance that represents the right + * operand + * + * @return the size + */ + public int unionSize(Collection other); + + /** + * Computes the symmetric difference set size. + *

+ * This is faster than calling + * {@link #symmetricDifference(Collection)} and then {@link #size()} + * + * @param other {@link Collection} instance that represents the right + * operand + * + * @return the size + */ + public int symmetricDifferenceSize(Collection other); + + /** + * Computes the difference set size. + *

+ * This is faster than calling {@link #difference(Collection)} and + * then {@link #size()} + * + * @param other {@link Collection} instance that represents the right + * operand + * + * @return the size + */ + public int differenceSize(Collection other); + + /** + * Computes the complement set size. + *

+ * This is faster than calling {@link #complemented()} and then + * {@link #size()} + * + * @return the size + */ + public int complementSize(); + + /** + * Generates an empty set + * + * @return the empty set + */ + public ExtendedSet empty(); + + /** + * See the clone() of {@link Object} + * + * @return cloned object + */ + public ExtendedSet clone(); + + /** + * Computes the compression factor of the equivalent bitmap representation + * (1 means not compressed, namely a memory footprint similar to + * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) + * + * @return the compression factor + */ + public double bitmapCompressionRatio(); + + /** + * Computes the compression factor of the equivalent integer collection (1 + * means not compressed, namely a memory footprint similar to + * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) + * + * @return the compression factor + */ + public double collectionCompressionRatio(); + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator iterator(); + + /** + * Gets the descending order iterator over the elements of type + * T + * + * @return descending iterator + */ + public ExtendedIterator descendingIterator(); + + /** + * Allows to use the Java "for-each" statement in descending order + * + * @return {@link Iterable} instance to iterate items in descending + * order + */ + public Iterable descending(); + + /** + * Computes the power-set of the current set. + *

+ * It is a particular implementation of the algorithm Apriori (see: + * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + * Association Rules in Large Databases, in Proceedings of the + * 20th International Conference on Very Large Data Bases, + * p.487-499, 1994). The returned power-set does not contain the + * empty set. + *

+ * The subsets composing the powerset are returned in a list that is sorted + * according to the lexicographical order provided by the sorted set. + * + * @return the power-set + * + * @see #powerSet(int, int) + * @see #powerSetSize() + */ + public List> powerSet(); + + /** + * Computes a subset of the power-set of the current set, composed by those + * subsets that have cardinality between min and + * max. + *

+ * It is a particular implementation of the algorithm Apriori (see: + * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + * Association Rules in Large Databases, in Proceedings of the + * 20th International Conference on Very Large Data Bases, + * p.487-499, 1994). The power-set does not contains the empty set. + *

+ * The subsets composing the powerset are returned in a list that is sorted + * according to the lexicographical order provided by the sorted set. + * + * @param min minimum subset size (greater than zero) + * @param max maximum subset size + * + * @return the power-set + * + * @see #powerSet() + * @see #powerSetSize(int, int) + */ + public List> powerSet(int min, int max); + + /** + * Computes the power-set size of the current set. + *

+ * The power-set does not contains the empty set. + * + * @return the power-set size + * + * @see #powerSet() + */ + public int powerSetSize(); + + /** + * Computes the power-set size of the current set, composed by those subsets + * that have cardinality between min and max. + *

+ * The returned power-set does not contain the empty set. + * + * @param min minimum subset size (greater than zero) + * @param max maximum subset size + * + * @return the power-set size + * + * @see #powerSet(int, int) + */ + public int powerSetSize(int min, int max); + + /** + * Prints debug info about the given {@link ExtendedSet} implementation + * + * @return a string that describes the internal representation of the + * instance + */ + public String debugInfo(); + + /** + * Adds to the set all the elements between first and + * last, both included. It supposes that there is an ordering + * of the elements of type T and that the universe of all + * possible elements is known. + * + * @param from first element + * @param to last element + */ + public void fill(T from, T to); + + /** + * Removes from the set all the elements between first and + * last, both included. It supposes that there is an ordering + * of the elements of type T and that the universe of all + * possible elements is known. + * + * @param from first element + * @param to last element + */ + public void clear(T from, T to); + + /** + * Adds the element if it not existing, or removes it if existing + * + * @param e element to flip + * + * @see #symmetricDifference(Collection) + */ + public void flip(T e); + + /** + * Gets the read-only version of the current set + * + * @return the read-only version of the current set + */ + public ExtendedSet unmodifiable(); + + /** + * Gets the ith element of the set + * + * @param i position of the element in the sorted set + * + * @return the ith element of the set + * + * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to + * {@link #size()} + */ + public T get(int i); + + /** + * Provides position of element within the set. + *

+ * It returns -1 if the element does not exist within the set. + * + * @param e element of the set + * + * @return the element position + */ + public int indexOf(T e); + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet tailSet(T fromElement); + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet headSet(T toElement); + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet subSet(T fromElement, T toElement); + + /** + * Converts a given {@link Collection} instance into an instance of the + * current class. NOTE: when the collection is already an instance of + * the current class, the method returns the collection itself. + * + * @param c collection to use to generate the new instance + * + * @return the converted collection + * + * @see #convert(Object...) + */ + public ExtendedSet convert(Collection c); + + /** + * Converts a given integer array into an instance of the current class + * + * @param e objects to use to generate the new instance + * + * @return the converted collection + * + * @see #convert(Collection) + */ + public ExtendedSet convert(Object... e); + + /** + * Computes the Jaccard similarity coefficient between this set and the + * given set. + *

+ * The coefficient is defined as + * |A intersection B| / |A union B|. + * + * @param other the other set + * + * @return the Jaccard similarity coefficient + * + * @see #jaccardDistance(ExtendedSet) + */ + public double jaccardSimilarity(ExtendedSet other); + + /** + * Computes the Jaccard distance between this set and the given set. + *

+ * The coefficient is defined as + * 1 - {@link #jaccardSimilarity(ExtendedSet)}. + * + * @param other the other set + * + * @return the Jaccard distance + * + * @see #jaccardSimilarity(ExtendedSet) + */ + public double jaccardDistance(ExtendedSet other); + + /** + * Computes the weighted version of the Jaccard similarity coefficient + * between this set and the given set. + *

+ * The coefficient is defined as + * sum of min(A_i, B_i) / sum of max(A_i, B_i). + *

+ * NOTE: T must be a number, namely one of + * {@link Integer}, {@link Double}, {@link Float}, {@link Byte}, + * {@link Long}, {@link Short}. + * + * @param other the other set + * + * @return the weighted Jaccard similarity coefficient + * + * @throws IllegalArgumentException if T is not a number + * @see #weightedJaccardDistance(ExtendedSet) + */ + public double weightedJaccardSimilarity(ExtendedSet other); + + /** + * Computes the weighted version of the Jaccard distance between this set + * and the given set. + *

+ * The coefficient is defined as 1 - + * {@link #weightedJaccardSimilarity(ExtendedSet)}. + *

+ * NOTE: T must be a number, namely one of + * {@link Integer}, {@link Double}, {@link Float}, {@link Byte}, + * {@link Long}, {@link Short}. + * + * @param other the other set + * + * @return the weighted Jaccard distance + * + * @throws IllegalArgumentException if T is not a number + * @see #weightedJaccardSimilarity(ExtendedSet) + */ + public double weightedJaccardDistance(ExtendedSet other); + + /** + * Compares this object with the specified object for order. Returns a + * negative integer, zero, or a positive integer as this object is less + * than, equal to, or greater than the specified object. An {@link IntSet} + * instance A is less than another {@link IntSet} instance + * B if B-A (that is, the elements in + * B that are not contained in A) contains at + * least one element that is greater than all the elements in + * A-B. + *

+ *

+ * The implementor must ensure sgn(x.compareTo(y)) == + * -sgn(y.compareTo(x)) for all x and y. (This + * implies that x.compareTo(y) must throw an exception iff + * y.compareTo(x) throws an exception.) + *

+ *

+ * The implementor must also ensure that the relation is transitive: + * (x.compareTo(y)>0 && y.compareTo(z)>0) implies + * x.compareTo(z)>0. + *

+ *

+ * Finally, the implementor must ensure that x.compareTo(y)==0 + * implies that sgn(x.compareTo(z)) == sgn(y.compareTo(z)), for all + * z. + *

+ *

+ * It is strongly recommended, but not strictly required that + * (x.compareTo(y)==0) == (x.equals(y)). Generally speaking, any + * class that implements the Comparable interface and violates this + * condition should clearly indicate this fact. The recommended language is + * "Note: this class has a natural ordering that is inconsistent with + * equals." + *

+ *

+ * In the foregoing description, the notation sgn(expression + * ) designates the mathematical signum function, which is + * defined to return one of -1, 0, or 1 according + * to whether the value of expression is negative, zero or positive. + * + * @param o the object to be compared. + * + * @return a negative integer, zero, or a positive integer as this object is + * less than, equal to, or greater than the specified object. + * + * @throws ClassCastException if the specified object's type prevents it from being + * compared to this object. + */ + @Override + public int compareTo(ExtendedSet o); + + /** + * Extended version of the {@link Iterator} interface that allows to "skip" + * some elements of the set + * + * @param the type of elements maintained by this set + */ + public interface ExtendedIterator extends Iterator + { + /** + * Skips all the elements before the the specified element, so that + * {@link Iterator#next()} gives the given element or, if it does not + * exist, the element immediately after according to the sorting + * provided by this {@link SortedSet} instance. + *

+ * If element is less than the next element, it does + * nothing + * + * @param element first element to not skip + */ + public void skipAllBefore(X element); + } +} + + diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java new file mode 100755 index 00000000000..48805215ee1 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/AbstractIntSet.java @@ -0,0 +1,744 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.intset; + + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * This class provides a skeletal implementation of the {@link IntSet} + * interface to minimize the effort required to implement this interface. + * + * @author Alessandro Colantonio + * @version $Id: AbstractIntSet.java 156 2011-09-01 00:13:57Z cocciasik $ + */ +public abstract class AbstractIntSet implements IntSet +{ + /** + * {@inheritDoc} + */ + @Override + public IntSet union(IntSet other) + { + IntSet res = clone(); + res.addAll(other); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet difference(IntSet other) + { + IntSet res = clone(); + res.removeAll(other); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet intersection(IntSet other) + { + IntSet res = clone(); + res.retainAll(other); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet symmetricDifference(IntSet c) + { + IntSet res = clone(); + IntIterator itr = c.iterator(); + while (itr.hasNext()) { + res.flip(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet complemented() + { + IntSet res = clone(); + res.complement(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + if (isEmpty()) { + return; + } + for (int e = last(); e >= 0; e--) { + flip(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + IntIterator itr = c.iterator(); + boolean res = true; + while (res && itr.hasNext()) { + res &= contains(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet c) + { + IntIterator itr = c.iterator(); + boolean res = true; + while (res && itr.hasNext()) { + if (contains(itr.next())) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet c, int minElements) + { + IntIterator itr = c.iterator(); + while (minElements > 0 && itr.hasNext()) { + if (contains(itr.next())) { + minElements--; + } + } + return minElements == 0; + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet c) + { + int res = 0; + IntIterator itr = c.iterator(); + while (itr.hasNext()) { + if (contains(itr.next())) { + res++; + } + } + return res; + + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(IntSet other) + { + return other == null ? size() : size() + other.size() - intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(IntSet other) + { + return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(IntSet other) + { + return other == null ? size() : size() - intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + if (isEmpty()) { + return 0; + } + return last() - size() + 1; + } + + /** + * {@inheritDoc} + */ + @Override + public abstract IntSet empty(); + + /** + * {@inheritDoc} + */ + @Override + public abstract IntSet clone(); + + /** + * {@inheritDoc} + */ + @Override + public abstract double bitmapCompressionRatio(); + + /** + * {@inheritDoc} + */ + @Override + public abstract double collectionCompressionRatio(); + + /** + * {@inheritDoc} + */ + @Override + public abstract IntIterator iterator(); + + /** + * {@inheritDoc} + */ + @Override + public abstract IntIterator descendingIterator(); + + /** + * {@inheritDoc} + */ + @Override + public abstract String debugInfo(); + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + IntIterator itr = iterator(); + while (itr.hasNext()) { + itr.next(); + itr.remove(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(int from, int to) + { + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + for (int e = from; e <= to; e++) { + remove(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(int from, int to) + { + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + for (int e = from; e <= to; e++) { + add(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(int e) + { + if (!add(e)) { + remove(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public abstract int get(int i); + + /** + * {@inheritDoc} + */ + @Override + public abstract int indexOf(int e); + + /** + * {@inheritDoc} + */ + @Override + public abstract IntSet convert(int... a); + + /** + * {@inheritDoc} + */ + @Override + public abstract IntSet convert(Collection c); + + /** + * {@inheritDoc} + */ + @Override + public int first() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return iterator().next(); + } + + /** + * {@inheritDoc} + */ + @Override + public abstract int last(); + + /** + * {@inheritDoc} + */ + @Override + public abstract int size(); + + /** + * {@inheritDoc} + */ + @Override + public abstract boolean isEmpty(); + + /** + * {@inheritDoc} + */ + @Override + public abstract boolean contains(int i); + + /** + * {@inheritDoc} + */ + @Override + public abstract boolean add(int i); + + /** + * {@inheritDoc} + */ + @Override + public abstract boolean remove(int i); + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + IntIterator itr = c.iterator(); + boolean res = false; + while (itr.hasNext()) { + res |= add(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + IntIterator itr = c.iterator(); + boolean res = false; + while (itr.hasNext()) { + res |= remove(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + IntIterator itr = iterator(); + boolean res = false; + while (itr.hasNext()) { + int e = itr.next(); + if (!c.contains(e)) { + res = true; + itr.remove(); + } + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int[] toArray() + { + if (isEmpty()) { + return null; + } + return toArray(new int[size()]); + } + + /** + * {@inheritDoc} + */ + @Override + public int[] toArray(int[] a) + { + if (a.length < size()) { + a = new int[size()]; + } + IntIterator itr = iterator(); + int i = 0; + while (itr.hasNext()) { + a[i++] = itr.next(); + } + for (; i < a.length; i++) { + a[i] = 0; + } + return a; + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + IntIterator itr = iterator(); + if (!itr.hasNext()) { + return "[]"; + } + + StringBuilder sb = new StringBuilder(); + sb.append('['); + for (; ; ) { + int e = itr.next(); + sb.append(e); + if (!itr.hasNext()) { + return sb.append(']').toString(); + } + sb.append(", "); + } + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(IntSet o) + { + IntIterator thisIterator = this.descendingIterator(); + IntIterator otherIterator = o.descendingIterator(); + while (thisIterator.hasNext() && otherIterator.hasNext()) { + int thisItem = thisIterator.next(); + int otherItem = otherIterator.next(); + if (thisItem < otherItem) { + return -1; + } + if (thisItem > otherItem) { + return 1; + } + } + return thisIterator.hasNext() ? 1 : (otherIterator.hasNext() ? -1 : 0); + } + + /** + * {@inheritDoc} + */ + @Override + public List powerSet() + { + return powerSet(1, Integer.MAX_VALUE); + } + + /** + * {@inheritDoc} + */ + @Override + public List powerSet(int min, int max) + { + if (min < 1 || max < min) { + throw new IllegalArgumentException(); + } + + // special cases + List res = new ArrayList(); + if (size() < min) { + return res; + } + if (size() == min) { + res.add(clone()); + return res; + } + if (size() == min + 1) { + IntIterator itr = descendingIterator(); + while (itr.hasNext()) { + IntSet set = clone(); + set.remove(itr.next()); + res.add(set); + } + if (max > min) { + res.add(clone()); + } + return res; + } + + // the first level contains only one prefix made up of all 1-subsets + List> level = new ArrayList>(); + level.add(new ArrayList()); + IntIterator itr = iterator(); + while (itr.hasNext()) { + IntSet single = empty(); + single.add(itr.next()); + level.get(0).add(single); + } + if (min == 1) { + res.addAll(level.get(0)); + } + + // all combinations + int lvl = 2; + while (!level.isEmpty() && lvl <= max) { + List> newLevel = new ArrayList>(); + for (List prefix : level) { + for (int i = 0; i < prefix.size() - 1; i++) { + List newPrefix = new ArrayList(); + for (int j = i + 1; j < prefix.size(); j++) { + IntSet x = prefix.get(i).clone(); + x.add(prefix.get(j).last()); + newPrefix.add(x); + if (lvl >= min) { + res.add(x); + } + } + if (newPrefix.size() > 1) { + newLevel.add(newPrefix); + } + } + } + level = newLevel; + lvl++; + } + + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize() + { + return isEmpty() ? 0 : (int) Math.pow(2, size()) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize(int min, int max) + { + if (min < 1 || max < min) { + throw new IllegalArgumentException(); + } + final int size = size(); + + // special cases + if (size < min) { + return 0; + } + if (size == min) { + return 1; + } + + /* + * Compute the sum of binomial coefficients ranging from (size choose + * max) to (size choose min) using dynamic programming + */ + + // trivial cases + max = Math.min(size, max); + if (max == min && (max == 0 || max == size)) { + return 1; + } + + // compute all binomial coefficients for "n" + int[] b = new int[size + 1]; + for (int i = 0; i <= size; i++) { + b[i] = 1; + } + for (int i = 1; i <= size; i++) { + for (int j = i - 1; j > 0; j--) { + b[j] += b[j - 1]; + } + } + + // sum binomial coefficients + int res = 0; + for (int i = min; i <= max; i++) { + res += b[i]; + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public double jaccardSimilarity(IntSet other) + { + if (isEmpty() && other.isEmpty()) { + return 1D; + } + int inters = intersectionSize(other); + return (double) inters / (size() + other.size() - inters); + } + + /** + * {@inheritDoc} + */ + @Override + public double jaccardDistance(IntSet other) + { + return 1D - jaccardSimilarity(other); + } + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardSimilarity(IntSet other) + { + if (isEmpty() && other.isEmpty()) { + return 1D; + } + IntIterator itr = intersection(other).iterator(); + double intersectionSum = 0D; + while (itr.hasNext()) { + intersectionSum += itr.next(); + } + + itr = symmetricDifference(other).iterator(); + double symmetricDifferenceSum = 0D; + while (itr.hasNext()) { + symmetricDifferenceSum += itr.next(); + } + + return intersectionSum / (intersectionSum + symmetricDifferenceSum); + } + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardDistance(IntSet other) + { + return 1D - weightedJaccardSimilarity(other); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + // special cases + if (this == obj) { + return true; + } + if (!(obj instanceof IntSet)) { + return false; + } + if (size() != ((IntSet) obj).size()) { + return false; + } + + // compare all the integrals, according to their natural order + IntIterator itr1 = iterator(); + IntIterator itr2 = ((IntSet) obj).iterator(); + while (itr1.hasNext()) { + if (itr1.next() != itr2.next()) { + return false; + } + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + if (isEmpty()) { + return 0; + } + int h = 1; + IntIterator itr = iterator(); + if (!itr.hasNext()) { + h = (h << 5) - h + itr.next(); + } + return h; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ArraySet.java b/extendedset/src/main/java/io/druid/extendedset/intset/ArraySet.java new file mode 100755 index 00000000000..aee867240a9 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/ArraySet.java @@ -0,0 +1,1157 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.intset; + + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.SortedSet; + +/** + * {@link IntSet}-based class internally managed by a sorted array of + * ints. + * + * @author Alessandro Colantonio + * @version $Id: ArraySet.java 156 2011-09-01 00:13:57Z cocciasik $ + */ +public class ArraySet extends AbstractIntSet +{ + /** + * elements of the set + */ + private int[] elements; + + /** + * set cardinality + */ + private int size; + + /** + * Empty-set constructor + */ + public ArraySet() + { + size = 0; + elements = null; + } + + /** + * Replace the content of the current instance with the content of another + * instance + * + * @param other + */ + private void replaceWith(ArraySet other) + { + size = other.size; + elements = other.elements; + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return size() / Math.ceil(elements[size - 1] / 32D); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return isEmpty() ? 0D : 1D; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet empty() + { + return new ArraySet(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator iterator() + { + return new IntIterator() + { + int next = 0; + + @Override + public void skipAllBefore(int e) + { + if (e <= elements[next]) { + return; + } + next = Arrays.binarySearch(elements, next + 1, size, e); + if (next < 0) { + next = -(next + 1); + } + } + + @Override + public boolean hasNext() + { + return next < size; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return elements[next++]; + } + + @Override + public void remove() + { + next--; + size--; + System.arraycopy(elements, next + 1, elements, next, size - next); + compact(); + } + + @Override + public IntIterator clone() + { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator descendingIterator() + { + return new IntIterator() + { + int next = size - 1; + + @Override + public void skipAllBefore(int e) + { + if (e >= elements[next]) { + return; + } + next = Arrays.binarySearch(elements, 0, next, e); + if (next < 0) { + next = -(next + 1) - 1; + } + } + + @Override + public boolean hasNext() + { + return next >= 0; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return elements[next--]; + } + + @Override + public void remove() + { + next++; + size--; + System.arraycopy(elements, next + 1, elements, next, size - next); + compact(); + } + + @Override + public IntIterator clone() + { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + ArraySet c = empty(); + if (!isEmpty()) { + c.elements = Arrays.copyOf(elements, elements.length); + c.size = size; + } + return c; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return toString(); + } + + /** + * Assures that the size of {@link #elements} is sufficient to contain + * {@link #size} elements. + */ + private void ensureCapacity() + { + int capacity = elements == null ? 0 : elements.length; + if (capacity >= size) { + return; + } + capacity = Math.max(capacity << 1, size); + + if (elements == null) { + // nothing to copy + elements = new int[capacity]; + return; + } + elements = Arrays.copyOf(elements, capacity); + } + + /** + * Removes unused allocated words at the end of {@link #words} only when they + * are more than twice of the needed space + */ + private void compact() + { + if (size == 0) { + elements = null; + return; + } + if (elements != null && (size << 1) < elements.length) { + elements = Arrays.copyOf(elements, size); + } + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(int element) + { + // append + if (isEmpty() || elements[size - 1] < element) { + size++; + ensureCapacity(); + elements[size - 1] = element; + return true; + } + + // insert + int pos = Arrays.binarySearch(elements, 0, size, element); + if (pos >= 0) { + return false; + } + + size++; + ensureCapacity(); + pos = -(pos + 1); + System.arraycopy(elements, pos, elements, pos + 1, size - pos - 1); + elements[pos] = element; + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(int element) + { + if (element < 0) { + return false; + } + + int pos = Arrays.binarySearch(elements, 0, size, element); + if (pos < 0) { + return false; + } + + size--; + System.arraycopy(elements, pos + 1, elements, pos, size - pos); + compact(); + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(int element) + { + // first + if (isEmpty()) { + size++; + ensureCapacity(); + elements[size - 1] = element; + return; + } + + int pos = Arrays.binarySearch(elements, 0, size, element); + + // add + if (pos < 0) { + size++; + ensureCapacity(); + pos = -(pos + 1); + System.arraycopy(elements, pos, elements, pos + 1, size - pos - 1); + elements[pos] = element; + return; + } + + // remove + size--; + System.arraycopy(elements, pos + 1, elements, pos, size - pos); + compact(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(int element) + { + if (isEmpty()) { + return false; + } + return Arrays.binarySearch(elements, 0, size, element) >= 0; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + if (c == null || c.isEmpty() || c == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final ArraySet o = convert(c); + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] < otherElements[otherIndex]) { + if (thisIndex == size - 1) { + return false; + } + thisIndex++; + } + if (thisElements[thisIndex] > otherElements[otherIndex]) { + return false; + } + } + return otherIndex == otherSize - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet other) + { + if (other == null || other.isEmpty() || other == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final ArraySet o = convert(other); + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + if (otherIndex == otherSize - 1) { + return false; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + if (thisIndex == size - 1) { + return false; + } + thisIndex++; + } + } + return true; + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet other, int minElements) + { + if (minElements < 1) { + throw new IllegalArgumentException(); + } + if ((size >= 0 && size < minElements) || other == null || other.isEmpty() || isEmpty()) { + return false; + } + if (this == other) { + return size() >= minElements; + } + + final ArraySet o = convert(other); + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int res = 0; + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + if (otherIndex == otherSize - 1) { + return false; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + if (thisIndex == size - 1) { + return false; + } + thisIndex++; + } + } + res++; + if (res >= minElements) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + ArraySet res = union(c); + boolean r = !equals(res); + replaceWith(res); + return r; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + ArraySet res = intersection(c); + boolean r = !equals(res); + replaceWith(res); + return r; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + ArraySet res = difference(c); + boolean r = !equals(res); + replaceWith(res); + return r; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + if (isEmpty()) { + return 0; + } + final int[] thisElements = elements; // faster + int h = 1; + for (int i = 0; i < size; i++) { + h = (h << 5) - h + thisElements[i]; + } + return h; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof ArraySet)) { + return super.equals(obj); + } + final ArraySet other = (ArraySet) obj; + if (size != other.size) { + return false; + } + final int[] thisElements = elements; // faster + final int[] otherElements = other.elements; // faster + for (int i = 0; i < size; i++) { + if (thisElements[i] != otherElements[i]) { + return false; + } + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return size; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return size == 0; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + elements = null; + size = 0; + } + + /** + * {@inheritDoc} + */ + @Override + public int first() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return elements[0]; + } + + /** + * {@inheritDoc} + */ + @Override + public int last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return elements[size - 1]; + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return 0; + } + if (this == other) { + return size(); + } + + final ArraySet o = convert(other); + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int res = 0; + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + if (otherIndex == otherSize - 1) { + return res; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + if (thisIndex == size - 1) { + return res; + } + thisIndex++; + } + } + res++; + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet intersection(IntSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return empty(); + } + if (this == other) { + return clone(); + } + + final ArraySet o = convert(other); + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int resSize = 0; + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + final int[] resElements = new int[Math.min(size, otherSize)]; + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + if (otherIndex == otherSize - 1) { + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + if (thisIndex == size - 1) { + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + thisIndex++; + } + } + resElements[resSize++] = thisElements[thisIndex]; + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet union(IntSet other) + { + if (this == other || other == null || other.isEmpty()) { + return clone(); + } + if (isEmpty()) { + ArraySet cloned = convert(other); + if (cloned == other) { + cloned = cloned.clone(); + } + return cloned; + } + + final ArraySet o = convert(other); + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int resSize = 0; + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + final int[] resElements = new int[size + otherSize]; +mainLoop: + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + resElements[resSize++] = otherElements[otherIndex]; + if (otherIndex == otherSize - 1) { + resElements[resSize++] = thisElements[thisIndex]; + break mainLoop; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + resElements[resSize++] = thisElements[thisIndex]; + if (thisIndex == size - 1) { + resElements[resSize++] = otherElements[otherIndex]; + break mainLoop; + } + thisIndex++; + } + } + resElements[resSize++] = thisElements[thisIndex]; + } + while (thisIndex < size - 1) { + resElements[resSize++] = thisElements[++thisIndex]; + } + while (otherIndex < otherSize - 1) { + resElements[resSize++] = otherElements[++otherIndex]; + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet difference(IntSet other) + { + if (isEmpty() || this == other) { + return empty(); + } + if (other == null || other.isEmpty()) { + return clone(); + } + + final ArraySet o = convert(other); + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int resSize = 0; + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + final int[] resElements = new int[size]; +mainLoop: + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + if (otherIndex == otherSize - 1) { + resElements[resSize++] = thisElements[thisIndex]; + break mainLoop; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + resElements[resSize++] = thisElements[thisIndex]; + if (thisIndex == size - 1) { + break mainLoop; + } + thisIndex++; + } + } + } + while (thisIndex < size - 1) { + resElements[resSize++] = thisElements[++thisIndex]; + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet symmetricDifference(IntSet other) + { + if (this == other || other == null || other.isEmpty()) { + return clone(); + } + if (isEmpty()) { + return convert(other).clone(); + } + + final ArraySet o = convert(other); + int otherSize = o.size; + int thisIndex = -1; + int otherIndex = -1; + int resSize = 0; + final int[] thisElements = elements; // faster + final int[] otherElements = o.elements; // faster + final int[] resElements = new int[size + otherSize]; +mainLoop: + while (thisIndex < (size - 1) && otherIndex < (otherSize - 1)) { + thisIndex++; + otherIndex++; + while (thisElements[thisIndex] != otherElements[otherIndex]) { + while (thisElements[thisIndex] > otherElements[otherIndex]) { + resElements[resSize++] = otherElements[otherIndex]; + if (otherIndex == otherSize - 1) { + resElements[resSize++] = thisElements[thisIndex]; + break mainLoop; + } + otherIndex++; + } + if (thisElements[thisIndex] == otherElements[otherIndex]) { + break; + } + while (thisElements[thisIndex] < otherElements[otherIndex]) { + resElements[resSize++] = thisElements[thisIndex]; + if (thisIndex == size - 1) { + resElements[resSize++] = otherElements[otherIndex]; + break mainLoop; + } + thisIndex++; + } + } + } + while (thisIndex < size - 1) { + resElements[resSize++] = thisElements[++thisIndex]; + } + while (otherIndex < otherSize - 1) { + resElements[resSize++] = otherElements[++otherIndex]; + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + res.compact(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + if (isEmpty()) { + return; + } + + IntIterator thisItr = clone().iterator(); // avoid concurrency + elements = new int[complementSize()]; + final int[] thisElements = elements; // faster + size = 0; + int u = -1; + while (thisItr.hasNext()) { + int c = thisItr.next(); + while (++u < c) { + thisElements[size++] = u; + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(int from, int to) + { + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + if (from == to) { + add(from); + return; + } + + int[] thisElements = elements; // faster + + if (isEmpty()) { + size = to - from + 1; + ensureCapacity(); + thisElements = elements; + for (int i = 0; i < size; i++) { + thisElements[i] = from++; + } + return; + } + + // increase capacity, if necessary + int posFrom = Arrays.binarySearch(thisElements, 0, size, from); + boolean fromMissing = posFrom < 0; + if (fromMissing) { + posFrom = -posFrom - 1; + } + + int posTo = Arrays.binarySearch(thisElements, posFrom, size, to); + boolean toMissing = posTo < 0; + if (toMissing) { + posTo = -posTo - 1; + } + + int delta = 0; + if (toMissing || (fromMissing && (posFrom == posTo + 1))) { + delta = 1; + } + + int gap = to - from; + delta += gap - (posTo - posFrom); + if (delta > 0) { + size += delta; + ensureCapacity(); + thisElements = elements; + System.arraycopy(thisElements, posTo, thisElements, posTo + delta, size - delta - posTo); + posTo = posFrom + gap; + + // set values + for (int i = posFrom; i <= posTo; i++) { + thisElements[i] = from++; + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(int from, int to) + { + if (isEmpty()) { + return; + } + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + if (from == to) { + remove(from); + return; + } + + int posFrom = Arrays.binarySearch(elements, 0, size, from); + if (posFrom < 0) { + posFrom = -posFrom - 1; + } + if (posFrom >= size) { + return; + } + int posTo = Arrays.binarySearch(elements, posFrom, size, to); + if (posTo >= 0) { + posTo++; + } else { + posTo = -posTo - 1; + } + if (posFrom == posTo) { + return; + } + System.arraycopy(elements, posTo, elements, posFrom, size - posTo); + size -= posTo - posFrom; + } + + /** + * Convert a generic {@link IntSet} instance to an {@link ArraySet} instance + * + * @param c + * + * @return + */ + private ArraySet convert(IntSet c) + { + if (c instanceof ArraySet) { + return (ArraySet) c; + } + + int[] resElements = new int[c.size()]; + int resSize = 0; + IntIterator itr = c.iterator(); + while (itr.hasNext()) { + resElements[resSize++] = itr.next(); + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet convert(int... a) + { + int[] resElements = null; + int resSize = 0; + int last = -1; + if (a != null) { + resElements = new int[a.length]; + a = Arrays.copyOf(a, a.length); + Arrays.sort(a); + if (a[0] < 0) { + throw new ArrayIndexOutOfBoundsException(Integer.toString(a[0])); + } + for (int i : a) { + if (last != i) { + resElements[resSize++] = last = i; + } + } + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet convert(Collection c) + { + Collection sorted; + int[] resElements = null; + int resSize = 0; + int last = -1; + if (c != null) { + resElements = new int[c.size()]; + if (c instanceof SortedSet && ((SortedSet) c).comparator() == null) { + sorted = c; + } else { + sorted = new ArrayList(c); + Collections.sort((List) sorted); + int first = ((ArrayList) sorted).get(0).intValue(); + if (first < 0) { + throw new ArrayIndexOutOfBoundsException(Integer.toString(first)); + } + } + for (int i : sorted) { + if (last != i) { + resElements[resSize++] = last = i; + } + } + } + + ArraySet res = empty(); + res.elements = resElements; + res.size = resSize; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ArraySet complemented() + { + ArraySet res = clone(); + res.complement(); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int get(int i) + { + if (i < 0 || i >= size) { + throw new IndexOutOfBoundsException(Integer.toString(i)); + } + return elements[i]; + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(int e) + { + if (e < 0) { + throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); + } + int pos = Arrays.binarySearch(elements, 0, size, e); + if (pos < 0) { + return -1; + } + return pos; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java new file mode 100755 index 00000000000..b3f3d87bc0c --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSet.java @@ -0,0 +1,3178 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.intset; + + +import io.druid.extendedset.utilities.BitCount; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.ConcurrentModificationException; +import java.util.Formatter; +import java.util.List; +import java.util.Locale; +import java.util.NoSuchElementException; +import java.util.SortedSet; + +/** + * This is CONCISE: COmpressed 'N' Composable Integer SEt. + *

+ * This class is an instance of {@link IntSet} internally represented by + * compressed bitmaps though a RLE (Run-Length Encoding) compression algorithm. + * See http + * ://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf for more + * details. + *

+ * Notice that the iterator by {@link #iterator()} is fail-fast, + * similar to most {@link Collection}-derived classes. If the set is + * structurally modified at any time after the iterator is created, the iterator + * will throw a {@link ConcurrentModificationException}. Thus, in the face of + * concurrent modification, the iterator fails quickly and cleanly, rather than + * risking arbitrary, non-deterministic behavior at an undetermined time in the + * future. The iterator throws a {@link ConcurrentModificationException} on a + * best-effort basis. Therefore, it would be wrong to write a program that + * depended on this exception for its correctness: the fail-fast behavior of + * iterators should be used only to detect bugs. + * + * @author Alessandro Colantonio + * @version $Id$ + */ +public class ConciseSet extends AbstractIntSet implements java.io.Serializable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = 560068054685367266L; + /** + * true if the class must simulate the behavior of WAH + */ + private final boolean simulateWAH; + /** + * User for fail-fast iterator. It counts the number of operations + * that do modify {@link #words} + */ + protected transient volatile int modCount = 0; + /** + * This is the compressed bitmap, that is a collection of words. For each + * word: + *

    + *
  • 1* (0x80000000) means that it is a 31-bit literal. + *
  • 00* (0x00000000) indicates a sequence made up of at + * most one set bit in the first 31 bits, and followed by blocks of 31 0's. + * The following 5 bits (00xxxxx*) indicates which is the set bit ( + * 00000 = no set bit, 00001 = LSB, 11111 = MSB), + * while the remaining 25 bits indicate the number of following 0's blocks. + *
  • 01* (0x40000000) indicates a sequence made up of at + * most one unset bit in the first 31 bits, and followed by blocks of + * 31 1's. (see the 00* case above). + *
+ *

+ * Note that literal words 0xFFFFFFFF and 0x80000000 are allowed, thus + * zero-length sequences (i.e., such that getSequenceCount() == 0) cannot + * exists. + */ + private int[] words; + /** + * Most significant set bit within the uncompressed bit string. + */ + private transient int last; + /** + * Cached cardinality of the bit-set. Defined for efficient {@link #size()} + * calls. When -1, the cache is invalid. + */ + private transient int size; + /** + * Index of the last word in {@link #words} + */ + private transient int lastWordIndex; + + /** + * Creates an empty integer set + */ + public ConciseSet() + { + this(false); + } + + /** + * Creates an empty integer set + * + * @param simulateWAH true if the class must simulate the behavior of + * WAH + */ + public ConciseSet(boolean simulateWAH) + { + this.simulateWAH = simulateWAH; + reset(); + } + + public ConciseSet(int[] words, boolean simulateWAH) + { + this.words = words; + this.lastWordIndex = isEmpty() ? -1 : words.length - 1; + this.size = -1; + updateLast(); + this.simulateWAH = simulateWAH; + } + + /** + * Calculates the modulus division by 31 in a faster way than using n % 31 + *

+ * This method of finding modulus division by an integer that is one less + * than a power of 2 takes at most O(lg(32)) time. The number of operations + * is at most 12 + 9 * ceil(lg(32)). + *

+ * See http://graphics.stanford.edu/~seander/bithacks.html + * + * @param n number to divide + * + * @return n % 31 + */ + private static int maxLiteralLengthModulus(int n) + { + int m = (n & 0xC1F07C1F) + ((n >>> 5) & 0xC1F07C1F); + m = (m >>> 15) + (m & 0x00007FFF); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + return m == 31 ? 0 : m; + } + + /** + * Calculates the multiplication by 31 in a faster way than using n * 31 + * + * @param n number to multiply + * + * @return n * 31 + */ + private static int maxLiteralLengthMultiplication(int n) + { + return (n << 5) - n; + } + + /** + * Calculates the division by 31 + * + * @param n number to divide + * + * @return n / 31 + */ + private static int maxLiteralLengthDivision(int n) + { + return n / 31; + } + + /** + * Checks whether a word is a literal one + * + * @param word word to check + * + * @return true if the given word is a literal word + */ + private static boolean isLiteral(int word) + { + // "word" must be 1* + // NOTE: this is faster than "return (word & 0x80000000) == 0x80000000" + return (word & 0x80000000) != 0; + } + + /** + * Checks whether a word contains a sequence of 1's + * + * @param word word to check + * + * @return true if the given word is a sequence of 1's + */ + private static boolean isOneSequence(int word) + { + // "word" must be 01* + return (word & 0xC0000000) == ConciseSetUtils.SEQUENCE_BIT; + } + + /** + * Checks whether a word contains a sequence of 0's + * + * @param word word to check + * + * @return true if the given word is a sequence of 0's + */ + private static boolean isZeroSequence(int word) + { + // "word" must be 00* + return (word & 0xC0000000) == 0; + } + + /** + * Checks whether a word contains a sequence of 0's with no set bit, or 1's + * with no unset bit. + *

+ * NOTE: when {@link #simulateWAH} is true, it is + * equivalent to (and as fast as) !{@link #isLiteral(int)} + * + * @param word word to check + * + * @return true if the given word is a sequence of 0's or 1's + * but with no (un)set bit + */ + private static boolean isSequenceWithNoBits(int word) + { + // "word" must be 0?00000* + return (word & 0xBE000000) == 0x00000000; + } + + /** + * Gets the number of blocks of 1's or 0's stored in a sequence word + * + * @param word word to check + * + * @return the number of blocks that follow the first block of 31 bits + */ + private static int getSequenceCount(int word) + { + // get the 25 LSB bits + return word & 0x01FFFFFF; + } + + /** + * Clears the (un)set bit in a sequence + * + * @param word word to check + * + * @return the sequence corresponding to the given sequence and with no + * (un)set bits + */ + private static int getSequenceWithNoBits(int word) + { + // clear 29 to 25 LSB bits + return (word & 0xC1FFFFFF); + } + + /** + * Gets the position of the flipped bit within a sequence word. If the + * sequence has no set/unset bit, returns -1. + *

+ * Note that the parameter must a sequence word, otherwise the + * result is meaningless. + * + * @param word sequence word to check + * + * @return the position of the set bit, from 0 to 31. If the sequence has no + * set/unset bit, returns -1. + */ + private static int getFlippedBit(int word) + { + // get bits from 30 to 26 + // NOTE: "-1" is required since 00000 represents no bits and 00001 the LSB bit set + return ((word >>> 25) & 0x0000001F) - 1; + } + + /** + * Gets the number of set bits within the literal word + * + * @param word literal word + * + * @return the number of set bits within the literal word + */ + private static int getLiteralBitCount(int word) + { + return BitCount.count(getLiteralBits(word)); + } + + /** + * Gets the bits contained within the literal word + * + * @param word literal word + * + * @return the literal word with the most significant bit cleared + */ + private static int getLiteralBits(int word) + { + return ConciseSetUtils.ALL_ONES_WITHOUT_MSB & word; + } + + /** + * Returns true when the given 31-bit literal string (namely, + * with MSB set) contains only one set bit + * + * @param literal literal word (namely, with MSB unset) + * + * @return true when the given literal contains only one set + * bit + */ + private static boolean containsOnlyOneBit(int literal) + { + return (literal & (literal - 1)) == 0; + } + + /** + * Generates the 32-bit binary representation of a given word (debug only) + * + * @param word word to represent + * + * @return 32-character string that represents the given word + */ + private static String toBinaryString(int word) + { + String lsb = Integer.toBinaryString(word); + StringBuilder pad = new StringBuilder(); + for (int i = lsb.length(); i < 32; i++) { + pad.append('0'); + } + return pad.append(lsb).toString(); + } + + /** + * Resets to an empty set + * + * @see #ConciseSet() + * {@link #clear()} + */ + private void reset() + { + modCount++; + words = null; + last = -1; + size = 0; + lastWordIndex = -1; + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet clone() + { + if (isEmpty()) { + return empty(); + } + + // NOTE: do not use super.clone() since it is 10 times slower! + ConciseSet res = empty(); + res.last = last; + res.lastWordIndex = lastWordIndex; + res.modCount = 0; + res.size = size; + res.words = Arrays.copyOf(words, lastWordIndex + 1); + return res; + } + + /** + * Gets the literal word that represents the first 31 bits of the given the + * word (i.e. the first block of a sequence word, or the bits of a literal word). + *

+ * If the word is a literal, it returns the unmodified word. In case of a + * sequence, it returns a literal that represents the first 31 bits of the + * given sequence word. + * + * @param word word to check + * + * @return the literal contained within the given word, with the most + * significant bit set to 1. + */ + private /*static*/ int getLiteral(int word) + { + if (isLiteral(word)) { + return word; + } + + if (simulateWAH) { + return isZeroSequence(word) ? ConciseSetUtils.ALL_ZEROS_LITERAL : ConciseSetUtils.ALL_ONES_LITERAL; + } + + // get bits from 30 to 26 and use them to set the corresponding bit + // NOTE: "1 << (word >>> 25)" and "1 << ((word >>> 25) & 0x0000001F)" are equivalent + // NOTE: ">>> 1" is required since 00000 represents no bits and 00001 the LSB bit set + int literal = (1 << (word >>> 25)) >>> 1; + return isZeroSequence(word) + ? (ConciseSetUtils.ALL_ZEROS_LITERAL | literal) + : (ConciseSetUtils.ALL_ONES_LITERAL & ~literal); + } + + /** + * Clears bits from MSB (excluded, since it indicates the word type) to the + * specified bit (excluded). Last word is supposed to be a literal one. + * + * @param lastSetBit leftmost bit to preserve + */ + private void clearBitsAfterInLastWord(int lastSetBit) + { + words[lastWordIndex] &= ConciseSetUtils.ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)); + } + + /** + * Assures that the length of {@link #words} is sufficient to contain + * the given index. + */ + private void ensureCapacity(int index) + { + int capacity = words == null ? 0 : words.length; + if (capacity > index) { + return; + } + capacity = Math.max(capacity << 1, index + 1); + + if (words == null) { + // nothing to copy + words = new int[capacity]; + return; + } + words = Arrays.copyOf(words, capacity); + } + + /** + * Removes unused allocated words at the end of {@link #words} only when they + * are more than twice of the needed space + */ + private void compact() + { + if (words != null && ((lastWordIndex + 1) << 1) < words.length) { + words = Arrays.copyOf(words, lastWordIndex + 1); + } + } + + /** + * Sets the bit at the given absolute position within the uncompressed bit + * string. The bit must be appendable, that is it must represent an + * integer that is strictly greater than the maximum integer in the set. + * Note that the parameter range check is performed by the public method + * {@link #add(Integer)} and not in this method. + *

+ * NOTE: This method assumes that the last element of {@link #words} + * (i.e. getLastWord()) must be one of the + * following: + *

    + *
  • a literal word with at least one set bit; + *
  • a sequence of ones. + *
+ * Hence, the last word in {@link #words} cannot be: + *
    + *
  • a literal word containing only zeros; + *
  • a sequence of zeros. + *
+ * + * @param i the absolute position of the bit to set (i.e., the integer to add) + */ + private void append(int i) + { + // special case of empty set + if (isEmpty()) { + int zeroBlocks = maxLiteralLengthDivision(i); + if (zeroBlocks == 0) { + words = new int[1]; + lastWordIndex = 0; + } else if (zeroBlocks == 1) { + words = new int[2]; + lastWordIndex = 1; + words[0] = ConciseSetUtils.ALL_ZEROS_LITERAL; + } else { + words = new int[2]; + lastWordIndex = 1; + words[0] = zeroBlocks - 1; + } + last = i; + size = 1; + words[lastWordIndex] = ConciseSetUtils.ALL_ZEROS_LITERAL | (1 << maxLiteralLengthModulus(i)); + return; + } + + // position of the next bit to set within the current literal + int bit = maxLiteralLengthModulus(last) + i - last; + + // if we are outside the current literal, add zeros in + // between the current word and the new 1-bit literal word + if (bit >= ConciseSetUtils.MAX_LITERAL_LENGTH) { + int zeroBlocks = maxLiteralLengthDivision(bit) - 1; + bit = maxLiteralLengthModulus(bit); + if (zeroBlocks == 0) { + ensureCapacity(lastWordIndex + 1); + } else { + ensureCapacity(lastWordIndex + 2); + appendFill(zeroBlocks, 0); + } + appendLiteral(ConciseSetUtils.ALL_ZEROS_LITERAL | 1 << bit); + } else { + words[lastWordIndex] |= 1 << bit; + if (words[lastWordIndex] == ConciseSetUtils.ALL_ONES_LITERAL) { + lastWordIndex--; + appendLiteral(ConciseSetUtils.ALL_ONES_LITERAL); + } + } + + // update other info + last = i; + if (size >= 0) { + size++; + } + } + + /** + * Append a literal word after the last word + * + * @param word the new literal word. Note that the leftmost bit must + * be set to 1. + */ + private void appendLiteral(int word) + { + // when we have a zero sequence of the maximum lenght (that is, + // 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen + // that we try to append a zero literal because the result of the given operation must be an + // empty set. Whitout the following test, we would have increased the + // counter of the zero sequence, thus obtaining 0x02000000 that + // represents a sequence with the first bit set! + if (lastWordIndex == 0 && word == ConciseSetUtils.ALL_ZEROS_LITERAL && words[0] == 0x01FFFFFF) { + return; + } + + // first addition + if (lastWordIndex < 0) { + words[lastWordIndex = 0] = word; + return; + } + + final int lastWord = words[lastWordIndex]; + if (word == ConciseSetUtils.ALL_ZEROS_LITERAL) { + if (lastWord == ConciseSetUtils.ALL_ZEROS_LITERAL) { + words[lastWordIndex] = 1; + } else if (isZeroSequence(lastWord)) { + words[lastWordIndex]++; + } else if (!simulateWAH && containsOnlyOneBit(getLiteralBits(lastWord))) { + words[lastWordIndex] = 1 | ((1 + Integer.numberOfTrailingZeros(lastWord)) << 25); + } else { + words[++lastWordIndex] = word; + } + } else if (word == ConciseSetUtils.ALL_ONES_LITERAL) { + if (lastWord == ConciseSetUtils.ALL_ONES_LITERAL) { + words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | 1; + } else if (isOneSequence(lastWord)) { + words[lastWordIndex]++; + } else if (!simulateWAH && containsOnlyOneBit(~lastWord)) { + words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | 1 | ((1 + Integer.numberOfTrailingZeros(~lastWord)) + << 25); + } else { + words[++lastWordIndex] = word; + } + } else { + words[++lastWordIndex] = word; + } + } + + /** + * Append a sequence word after the last word + * + * @param length sequence length + * @param fillType sequence word with a count that equals 0 + */ + private void appendFill(int length, int fillType) + { + assert length > 0; + assert lastWordIndex >= -1; + + fillType &= ConciseSetUtils.SEQUENCE_BIT; + + // it is actually a literal... + if (length == 1) { + appendLiteral(fillType == 0 ? ConciseSetUtils.ALL_ZEROS_LITERAL : ConciseSetUtils.ALL_ONES_LITERAL); + return; + } + + // empty set + if (lastWordIndex < 0) { + words[lastWordIndex = 0] = fillType | (length - 1); + return; + } + + final int lastWord = words[lastWordIndex]; + if (isLiteral(lastWord)) { + if (fillType == 0 && lastWord == ConciseSetUtils.ALL_ZEROS_LITERAL) { + words[lastWordIndex] = length; + } else if (fillType == ConciseSetUtils.SEQUENCE_BIT && lastWord == ConciseSetUtils.ALL_ONES_LITERAL) { + words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | length; + } else if (!simulateWAH) { + if (fillType == 0 && containsOnlyOneBit(getLiteralBits(lastWord))) { + words[lastWordIndex] = length | ((1 + Integer.numberOfTrailingZeros(lastWord)) << 25); + } else if (fillType == ConciseSetUtils.SEQUENCE_BIT && containsOnlyOneBit(~lastWord)) { + words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | length | ((1 + Integer.numberOfTrailingZeros(~lastWord)) + << 25); + } else { + words[++lastWordIndex] = fillType | (length - 1); + } + } else { + words[++lastWordIndex] = fillType | (length - 1); + } + } else { + if ((lastWord & 0xC0000000) == fillType) { + words[lastWordIndex] += length; + } else { + words[++lastWordIndex] = fillType | (length - 1); + } + } + } + + /** + * Recalculate a fresh value for {@link ConciseSet#last} + */ + private void updateLast() + { + if (isEmpty()) { + last = -1; + return; + } + + last = 0; + for (int i = 0; i <= lastWordIndex; i++) { + int w = words[i]; + if (isLiteral(w)) { + last += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + last += maxLiteralLengthMultiplication(getSequenceCount(w) + 1); + } + } + + int w = words[lastWordIndex]; + if (isLiteral(w)) { + last -= Integer.numberOfLeadingZeros(getLiteralBits(w)); + } else { + last--; + } + } + + /** + * Performs the given operation over the bit-sets + * + * @param other {@link ConciseSet} instance that represents the right + * operand + * @param operator operator + * + * @return the result of the operation + */ + private ConciseSet performOperation(ConciseSet other, Operator operator) + { + // non-empty arguments + if (this.isEmpty() || other.isEmpty()) { + return operator.combineEmptySets(this, other); + } + + // if the two operands are disjoint, the operation is faster + ConciseSet res = operator.combineDisjointSets(this, other); + if (res != null) { + return res; + } + + // Allocate a sufficient number of words to contain all possible results. + // NOTE: since lastWordIndex is the index of the last used word in "words", + // we require "+2" to have the actual maximum required space. + // In any case, we do not allocate more than the maximum space required + // for the uncompressed representation. + // Another "+1" is required to allows for the addition of the last word + // before compacting. + res = empty(); + res.words = new int[1 + Math.min( + this.lastWordIndex + other.lastWordIndex + 2, + maxLiteralLengthDivision(Math.max(this.last, other.last)) << (simulateWAH ? 1 : 0) + )]; + + // scan "this" and "other" + WordIterator thisItr = new WordIterator(); + WordIterator otherItr = other.new WordIterator(); + while (true) { + if (!thisItr.isLiteral) { + if (!otherItr.isLiteral) { + int minCount = Math.min(thisItr.count, otherItr.count); + res.appendFill(minCount, operator.combineLiterals(thisItr.word, otherItr.word)); + if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || + { + break; + } + } else { + res.appendLiteral(operator.combineLiterals(thisItr.toLiteral(), otherItr.word)); + thisItr.word--; + if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" + { + break; + } + } + } else if (!otherItr.isLiteral) { + res.appendLiteral(operator.combineLiterals(thisItr.word, otherItr.toLiteral())); + otherItr.word--; + if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" + { + break; + } + } else { + res.appendLiteral(operator.combineLiterals(thisItr.word, otherItr.word)); + if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" + { + break; + } + } + } + + // invalidate the size + res.size = -1; + boolean invalidLast = true; + + // if one bit string is greater than the other one, we add the remaining + // bits depending on the given operation. + switch (operator) { + case AND: + break; + case OR: + res.last = Math.max(this.last, other.last); + invalidLast = false; + invalidLast |= thisItr.flush(res); + invalidLast |= otherItr.flush(res); + break; + case XOR: + if (this.last != other.last) { + res.last = Math.max(this.last, other.last); + invalidLast = false; + } + invalidLast |= thisItr.flush(res); + invalidLast |= otherItr.flush(res); + break; + case ANDNOT: + if (this.last > other.last) { + res.last = this.last; + invalidLast = false; + } + invalidLast |= thisItr.flush(res); + break; + } + + // remove trailing zeros + res.trimZeros(); + if (res.isEmpty()) { + return res; + } + + // compute the greatest element + if (invalidLast) { + res.updateLast(); + } + + // compact the memory + res.compact(); + + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet o) + { + // special cases + if (isEmpty() || o == null || o.isEmpty()) { + return 0; + } + if (this == o) { + return size(); + } + + final ConciseSet other = convert(o); + + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(this.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { + if (isZeroSequence(this.words[0])) { + return 0; + } + return other.size(); + } + if (isSequenceWithNoBits(other.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { + if (isZeroSequence(other.words[0])) { + return 0; + } + return this.size(); + } + + int res = 0; + + // scan "this" and "other" + WordIterator thisItr = new WordIterator(); + WordIterator otherItr = other.new WordIterator(); + while (true) { + if (!thisItr.isLiteral) { + if (!otherItr.isLiteral) { + int minCount = Math.min(thisItr.count, otherItr.count); + if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word & otherItr.word) != 0) { + res += maxLiteralLengthMultiplication(minCount); + } + if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || + { + break; + } + } else { + res += getLiteralBitCount(thisItr.toLiteral() & otherItr.word); + thisItr.word--; + if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" + { + break; + } + } + } else if (!otherItr.isLiteral) { + res += getLiteralBitCount(thisItr.word & otherItr.toLiteral()); + otherItr.word--; + if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" + { + break; + } + } else { + res += getLiteralBitCount(thisItr.word & otherItr.word); + if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" + { + break; + } + } + } + + return res; + } + + /** + * {@inheritDoc} + */ + public ByteBuffer toByteBuffer() + { + ByteBuffer buffer = ByteBuffer.allocate((lastWordIndex + 1) * 4); + buffer.asIntBuffer().put(Arrays.copyOf(words, lastWordIndex + 1)); + return buffer; + } + + /** + * {@inheritDoc} + */ + public int[] getWords() + { + if (words == null) { + return new int[]{}; + } + return Arrays.copyOf(words, lastWordIndex + 1); + } + + /** + * {@inheritDoc} + */ + @Override + public int get(int i) + { + if (i < 0) { + throw new IndexOutOfBoundsException(); + } + + // initialize data + int firstSetBitInWord = 0; + int position = i; + int setBitsInCurrentWord = 0; + for (int j = 0; j <= lastWordIndex; j++) { + int w = words[j]; + if (isLiteral(w)) { + // number of bits in the current word + setBitsInCurrentWord = getLiteralBitCount(w); + + // check if the desired bit is in the current word + if (position < setBitsInCurrentWord) { + int currSetBitInWord = -1; + for (; position >= 0; position--) { + currSetBitInWord = Integer.numberOfTrailingZeros(w & (0xFFFFFFFF << (currSetBitInWord + 1))); + } + return firstSetBitInWord + currSetBitInWord; + } + + // skip the 31-bit block + firstSetBitInWord += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + // number of involved bits (31 * blocks) + int sequenceLength = maxLiteralLengthMultiplication(getSequenceCount(w) + 1); + + // check the sequence type + if (isOneSequence(w)) { + if (simulateWAH || isSequenceWithNoBits(w)) { + setBitsInCurrentWord = sequenceLength; + if (position < setBitsInCurrentWord) { + return firstSetBitInWord + position; + } + } else { + setBitsInCurrentWord = sequenceLength - 1; + if (position < setBitsInCurrentWord) + // check whether the desired set bit is after the + // flipped bit (or after the first block) + { + return firstSetBitInWord + position + (position < getFlippedBit(w) ? 0 : 1); + } + } + } else { + if (simulateWAH || isSequenceWithNoBits(w)) { + setBitsInCurrentWord = 0; + } else { + setBitsInCurrentWord = 1; + if (position == 0) { + return firstSetBitInWord + getFlippedBit(w); + } + } + } + + // skip the 31-bit blocks + firstSetBitInWord += sequenceLength; + } + + // update the number of found set bits + position -= setBitsInCurrentWord; + } + + throw new IndexOutOfBoundsException(Integer.toString(i)); + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(int e) + { + if (e < 0) { + throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); + } + if (isEmpty()) { + return -1; + } + + // returned value + int index = 0; + + int blockIndex = maxLiteralLengthDivision(e); + int bitPosition = maxLiteralLengthModulus(e); + for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) { + int w = words[i]; + if (isLiteral(w)) { + // check if the current literal word is the "right" one + if (blockIndex == 0) { + if ((w & (1 << bitPosition)) == 0) { + return -1; + } + return index + BitCount.count(w & ~(0xFFFFFFFF << bitPosition)); + } + blockIndex--; + index += getLiteralBitCount(w); + } else { + if (simulateWAH) { + if (isOneSequence(w) && blockIndex <= getSequenceCount(w)) { + return index + maxLiteralLengthMultiplication(blockIndex) + bitPosition; + } + } else { + // if we are at the beginning of a sequence, and it is + // a set bit, the bit already exists + if (blockIndex == 0) { + int l = getLiteral(w); + if ((l & (1 << bitPosition)) == 0) { + return -1; + } + return index + BitCount.count(l & ~(0xFFFFFFFF << bitPosition)); + } + + // if we are in the middle of a sequence of 1's, the bit already exist + if (blockIndex > 0 + && blockIndex <= getSequenceCount(w) + && isOneSequence(w)) { + return index + maxLiteralLengthMultiplication(blockIndex) + bitPosition - (isSequenceWithNoBits(w) ? 0 : 1); + } + } + + // next word + int blocks = getSequenceCount(w) + 1; + blockIndex -= blocks; + if (isZeroSequence(w)) { + if (!simulateWAH && !isSequenceWithNoBits(w)) { + index++; + } + } else { + index += maxLiteralLengthMultiplication(blocks); + if (!simulateWAH && !isSequenceWithNoBits(w)) { + index--; + } + } + } + } + + // not found + return -1; + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet intersection(IntSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return empty(); + } + if (other == this) { + return clone(); + } + return performOperation(convert(other), Operator.AND); + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet union(IntSet other) + { + if (other == null || other.isEmpty() || other == this) { + return clone(); + } + return performOperation(convert(other), Operator.OR); + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet difference(IntSet other) + { + if (other == this) { + return empty(); + } + if (other == null || other.isEmpty()) { + return clone(); + } + return performOperation(convert(other), Operator.ANDNOT); + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet symmetricDifference(IntSet other) + { + if (other == this) { + return empty(); + } + if (other == null || other.isEmpty()) { + return clone(); + } + return performOperation(convert(other), Operator.XOR); + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet complemented() + { + ConciseSet cloned = clone(); + cloned.complement(); + return cloned; + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + modCount++; + + if (isEmpty()) { + return; + } + + if (last == ConciseSetUtils.MIN_ALLOWED_SET_BIT) { + clear(); + return; + } + + // update size + if (size >= 0) { + size = last - size + 1; + } + + // complement each word + for (int i = 0; i <= lastWordIndex; i++) { + int w = words[i]; + if (isLiteral(w)) + // negate the bits and set the most significant bit to 1 + { + words[i] = ConciseSetUtils.ALL_ZEROS_LITERAL | ~w; + } else + // switch the sequence type + { + words[i] ^= ConciseSetUtils.SEQUENCE_BIT; + } + } + + // do not complement after the last element + if (isLiteral(words[lastWordIndex])) { + clearBitsAfterInLastWord(maxLiteralLengthModulus(last)); + } + + // remove trailing zeros + trimZeros(); + if (isEmpty()) { + return; + } + + // calculate the maximal element + last = 0; + int w = 0; + for (int i = 0; i <= lastWordIndex; i++) { + w = words[i]; + if (isLiteral(w)) { + last += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + last += maxLiteralLengthMultiplication(getSequenceCount(w) + 1); + } + } + + // manage the last word (that must be a literal or a sequence of 1's) + if (isLiteral(w)) { + last -= Integer.numberOfLeadingZeros(getLiteralBits(w)); + } else { + last--; + } + } + + /** + * Removes trailing zeros + */ + private void trimZeros() + { + // loop over ALL_ZEROS_LITERAL words + int w; + do { + w = words[lastWordIndex]; + if (w == ConciseSetUtils.ALL_ZEROS_LITERAL) { + lastWordIndex--; + } else if (isZeroSequence(w)) { + if (simulateWAH || isSequenceWithNoBits(w)) { + lastWordIndex--; + } else { + // convert the sequence in a 1-bit literal word + words[lastWordIndex] = getLiteral(w); + return; + } + } else { + // one sequence or literal + return; + } + if (lastWordIndex < 0) { + reset(); + return; + } + } while (true); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator iterator() + { + if (isEmpty()) { + return new IntIterator() + { + @Override + public void skipAllBefore(int element) {/*empty*/} + + @Override + public boolean hasNext() {return false;} + + @Override + public int next() {throw new NoSuchElementException();} + + @Override + public void remove() {throw new UnsupportedOperationException();} + + @Override + public IntIterator clone() {throw new UnsupportedOperationException();} + }; + } + return new BitIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator descendingIterator() + { + if (isEmpty()) { + return new IntIterator() + { + @Override + public void skipAllBefore(int element) {/*empty*/} + + @Override + public boolean hasNext() {return false;} + + @Override + public int next() {throw new NoSuchElementException();} + + @Override + public void remove() {throw new UnsupportedOperationException();} + + @Override + public IntIterator clone() {throw new UnsupportedOperationException();} + }; + } + return new ReverseBitIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + reset(); + } + + /** + * {@inheritDoc} + */ + @Override + public int last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return last; + } + + /** + * Convert a given collection to a {@link ConciseSet} instance + */ + private ConciseSet convert(IntSet c) + { + if (c instanceof ConciseSet && simulateWAH == ((ConciseSet) c).simulateWAH) { + return (ConciseSet) c; + } + if (c == null) { + return empty(); + } + + ConciseSet res = empty(); + IntIterator itr = c.iterator(); + while (itr.hasNext()) { + res.add(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet convert(int... a) + { + ConciseSet res = empty(); + if (a != null) { + a = Arrays.copyOf(a, a.length); + Arrays.sort(a); + for (int i : a) { + if (res.last != i) { + res.add(i); + } + } + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet convert(Collection c) + { + ConciseSet res = empty(); + Collection sorted; + if (c != null) { + if (c instanceof SortedSet && ((SortedSet) c).comparator() == null) { + sorted = c; + } else { + sorted = new ArrayList(c); + Collections.sort((List) sorted); + } + for (int i : sorted) { + if (res.last != i) { + res.add(i); + } + } + } + return res; + } + + /** + * Replace the current instance with another {@link ConciseSet} instance. It + * also returns true if the given set is actually different + * from the current one + * + * @param other {@link ConciseSet} instance to use to replace the current one + * + * @return true if the given set is different from the current + * set + */ + private boolean replaceWith(ConciseSet other) + { + if (this == other) { + return false; + } + + boolean isSimilar = (this.lastWordIndex == other.lastWordIndex) + && (this.last == other.last); + for (int i = 0; isSimilar && (i <= lastWordIndex); i++) { + isSimilar &= this.words[i] == other.words[i]; + } + + if (isSimilar) { + if (other.size >= 0) { + this.size = other.size; + } + return false; + } + + this.words = other.words; + this.size = other.size; + this.last = other.last; + this.lastWordIndex = other.lastWordIndex; + this.modCount++; + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(int e) + { + modCount++; + + // range check + if (e < ConciseSetUtils.MIN_ALLOWED_SET_BIT || e > ConciseSetUtils.MAX_ALLOWED_INTEGER) { + throw new IndexOutOfBoundsException(String.valueOf(e)); + } + + // the element can be simply appended + if (e > last) { + append(e); + return true; + } + + if (e == last) { + return false; + } + + // check if the element can be put in a literal word + int blockIndex = maxLiteralLengthDivision(e); + int bitPosition = maxLiteralLengthModulus(e); + for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) { + int w = words[i]; + if (isLiteral(w)) { + // check if the current literal word is the "right" one + if (blockIndex == 0) { + // bit already set + if ((w & (1 << bitPosition)) != 0) { + return false; + } + + // By adding the bit we potentially create a sequence: + // -- If the literal is made up of all zeros, it definitely + // cannot be part of a sequence (otherwise it would not have + // been created). Thus, we can create a 1-bit literal word + // -- If there are MAX_LITERAL_LENGHT - 2 set bits, by adding + // the new one we potentially allow for a 1's sequence + // together with the successive word + // -- If there are MAX_LITERAL_LENGHT - 1 set bits, by adding + // the new one we potentially allow for a 1's sequence + // together with the successive and/or the preceding words + if (!simulateWAH) { + int bitCount = getLiteralBitCount(w); + if (bitCount >= ConciseSetUtils.MAX_LITERAL_LENGTH - 2) { + break; + } + } else { + if (containsOnlyOneBit(~w) || w == ConciseSetUtils.ALL_ONES_LITERAL) { + break; + } + } + + // set the bit + words[i] |= 1 << bitPosition; + if (size >= 0) { + size++; + } + return true; + } + + blockIndex--; + } else { + if (simulateWAH) { + if (isOneSequence(w) && blockIndex <= getSequenceCount(w)) { + return false; + } + } else { + // if we are at the beginning of a sequence, and it is + // a set bit, the bit already exists + if (blockIndex == 0 + && (getLiteral(w) & (1 << bitPosition)) != 0) { + return false; + } + + // if we are in the middle of a sequence of 1's, the bit already exist + if (blockIndex > 0 + && blockIndex <= getSequenceCount(w) + && isOneSequence(w)) { + return false; + } + } + + // next word + blockIndex -= getSequenceCount(w) + 1; + } + } + + // the bit is in the middle of a sequence or it may cause a literal to + // become a sequence, thus the "easiest" way to add it is by ORing + return replaceWith(performOperation(convert(e), Operator.OR)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(int o) + { + modCount++; + + if (isEmpty()) { + return false; + } + + // the element cannot exist + if (o > last) { + return false; + } + + // check if the element can be removed from a literal word + int blockIndex = maxLiteralLengthDivision(o); + int bitPosition = maxLiteralLengthModulus(o); + for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) { + final int w = words[i]; + if (isLiteral(w)) { + // check if the current literal word is the "right" one + if (blockIndex == 0) { + // the bit is already unset + if ((w & (1 << bitPosition)) == 0) { + return false; + } + + // By removing the bit we potentially create a sequence: + // -- If the literal is made up of all ones, it definitely + // cannot be part of a sequence (otherwise it would not have + // been created). Thus, we can create a 30-bit literal word + // -- If there are 2 set bits, by removing the specified + // one we potentially allow for a 1's sequence together with + // the successive word + // -- If there is 1 set bit, by removing the new one we + // potentially allow for a 0's sequence + // together with the successive and/or the preceding words + if (!simulateWAH) { + int bitCount = getLiteralBitCount(w); + if (bitCount <= 2) { + break; + } + } else { + final int l = getLiteralBits(w); + if (l == 0 || containsOnlyOneBit(l)) { + break; + } + } + + // unset the bit + words[i] &= ~(1 << bitPosition); + if (size >= 0) { + size--; + } + + // if the bit is the maximal element, update it + if (o == last) { + last -= maxLiteralLengthModulus(last) - (ConciseSetUtils.MAX_LITERAL_LENGTH + - Integer.numberOfLeadingZeros(getLiteralBits(words[i]))); + } + return true; + } + + blockIndex--; + } else { + if (simulateWAH) { + if (isZeroSequence(w) && blockIndex <= getSequenceCount(w)) { + return false; + } + } else { + // if we are at the beginning of a sequence, and it is + // an unset bit, the bit does not exist + if (blockIndex == 0 + && (getLiteral(w) & (1 << bitPosition)) == 0) { + return false; + } + + // if we are in the middle of a sequence of 0's, the bit does not exist + if (blockIndex > 0 + && blockIndex <= getSequenceCount(w) + && isZeroSequence(w)) { + return false; + } + } + + // next word + blockIndex -= getSequenceCount(w) + 1; + } + } + + // the bit is in the middle of a sequence or it may cause a literal to + // become a sequence, thus the "easiest" way to remove it by ANDNOTing + return replaceWith(performOperation(convert(o), Operator.ANDNOT)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(int o) + { + if (isEmpty() || o > last || o < 0) { + return false; + } + + // check if the element is within a literal word + int block = maxLiteralLengthDivision(o); + int bit = maxLiteralLengthModulus(o); + for (int i = 0; i <= lastWordIndex; i++) { + final int w = words[i]; + final int t = w & 0xC0000000; // the first two bits... + switch (t) { + case 0x80000000: // LITERAL + case 0xC0000000: // LITERAL + // check if the current literal word is the "right" one + if (block == 0) { + return (w & (1 << bit)) != 0; + } + block--; + break; + case 0x00000000: // ZERO SEQUENCE + if (!simulateWAH) { + if (block == 0 && ((w >> 25) - 1) == bit) { + return true; + } + } + block -= getSequenceCount(w) + 1; + if (block < 0) { + return false; + } + break; + case 0x40000000: // ONE SEQUENCE + if (!simulateWAH) { + if (block == 0 && (0x0000001F & (w >> 25) - 1) == bit) { + return false; + } + } + block -= getSequenceCount(w) + 1; + if (block < 0) { + return true; + } + break; + } + } + + // no more words + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + if (c == null || c.isEmpty() || c == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final ConciseSet other = convert(c); + if (other.last > last) { + return false; + } + if (size >= 0 && other.size > size) { + return false; + } + if (other.size == 1) { + return contains(other.last); + } + + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(this.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { + if (isZeroSequence(this.words[0])) { + return false; + } + return true; + } + if (isSequenceWithNoBits(other.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { + return false; + } + + // scan "this" and "other" + WordIterator thisItr = new WordIterator(); + WordIterator otherItr = other.new WordIterator(); + while (true) { + if (!thisItr.isLiteral) { + if (!otherItr.isLiteral) { + int minCount = Math.min(thisItr.count, otherItr.count); + if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word) == 0 + && (ConciseSetUtils.SEQUENCE_BIT & otherItr.word) != 0) { + return false; + } + if (!otherItr.prepareNext(minCount)) { + return true; + } + if (!thisItr.prepareNext(minCount)) { + return false; + } + } else { + if ((thisItr.toLiteral() & otherItr.word) != otherItr.word) { + return false; + } + thisItr.word--; + if (!otherItr.prepareNext()) { + return true; + } + if (!thisItr.prepareNext(1)) { + return false; + } + } + } else if (!otherItr.isLiteral) { + int o = otherItr.toLiteral(); + if ((thisItr.word & otherItr.toLiteral()) != o) { + return false; + } + otherItr.word--; + if (!otherItr.prepareNext(1)) { + return true; + } + if (!thisItr.prepareNext()) { + return false; + } + } else { + if ((thisItr.word & otherItr.word) != otherItr.word) { + return false; + } + if (!otherItr.prepareNext()) { + return true; + } + if (!thisItr.prepareNext()) { + return false; + } + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet c) + { + if (c == null || c.isEmpty() || c == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final ConciseSet other = convert(c); + if (other.size == 1) { + return contains(other.last); + } + + // disjoint sets + if (isSequenceWithNoBits(this.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { + if (isZeroSequence(this.words[0])) { + return false; + } + return true; + } + if (isSequenceWithNoBits(other.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { + if (isZeroSequence(other.words[0])) { + return false; + } + return true; + } + + // scan "this" and "other" + WordIterator thisItr = new WordIterator(); + WordIterator otherItr = other.new WordIterator(); + while (true) { + if (!thisItr.isLiteral) { + if (!otherItr.isLiteral) { + int minCount = Math.min(thisItr.count, otherItr.count); + if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word & otherItr.word) != 0) { + return true; + } + if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || + { + return false; + } + } else { + if ((thisItr.toLiteral() & otherItr.word) != ConciseSetUtils.ALL_ZEROS_LITERAL) { + return true; + } + thisItr.word--; + if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" + { + return false; + } + } + } else if (!otherItr.isLiteral) { + if ((thisItr.word & otherItr.toLiteral()) != ConciseSetUtils.ALL_ZEROS_LITERAL) { + return true; + } + otherItr.word--; + if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" + { + return false; + } + } else { + if ((thisItr.word & otherItr.word) != ConciseSetUtils.ALL_ZEROS_LITERAL) { + return true; + } + if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" + { + return false; + } + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet c, int minElements) + { + if (minElements < 1) { + throw new IllegalArgumentException(); + } + if ((size >= 0 && size < minElements) || c == null || c.isEmpty() || isEmpty()) { + return false; + } + if (this == c) { + return size() >= minElements; + } + + // convert the other set in order to perform a more complex intersection + ConciseSet other = convert(c); + if (other.size >= 0 && other.size < minElements) { + return false; + } + if (minElements == 1 && other.size == 1) { + return contains(other.last); + } + if (minElements == 1 && size == 1) { + return other.contains(last); + } + + // disjoint sets + if (isSequenceWithNoBits(this.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(this.words[0]) + 1) > other.last) { + if (isZeroSequence(this.words[0])) { + return false; + } + return true; + } + if (isSequenceWithNoBits(other.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(other.words[0]) + 1) > this.last) { + if (isZeroSequence(other.words[0])) { + return false; + } + return true; + } + + // resulting size + int res = 0; + + // scan "this" and "other" + WordIterator thisItr = new WordIterator(); + WordIterator otherItr = other.new WordIterator(); + while (true) { + if (!thisItr.isLiteral) { + if (!otherItr.isLiteral) { + int minCount = Math.min(thisItr.count, otherItr.count); + if ((ConciseSetUtils.SEQUENCE_BIT & thisItr.word & otherItr.word) != 0) { + res += maxLiteralLengthMultiplication(minCount); + if (res >= minElements) { + return true; + } + } + if (!thisItr.prepareNext(minCount) | !otherItr.prepareNext(minCount)) // NOT || + { + return false; + } + } else { + res += getLiteralBitCount(thisItr.toLiteral() & otherItr.word); + if (res >= minElements) { + return true; + } + thisItr.word--; + if (!thisItr.prepareNext(1) | !otherItr.prepareNext()) // do NOT use "||" + { + return false; + } + } + } else if (!otherItr.isLiteral) { + res += getLiteralBitCount(thisItr.word & otherItr.toLiteral()); + if (res >= minElements) { + return true; + } + otherItr.word--; + if (!thisItr.prepareNext() | !otherItr.prepareNext(1)) // do NOT use "||" + { + return false; + } + } else { + res += getLiteralBitCount(thisItr.word & otherItr.word); + if (res >= minElements) { + return true; + } + if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" + { + return false; + } + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return words == null; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + modCount++; + + if (isEmpty() || c == this) { + return false; + } + if (c == null || c.isEmpty()) { + clear(); + return true; + } + + ConciseSet other = convert(c); + if (other.size == 1) { + if (contains(other.last)) { + if (size == 1) { + return false; + } + return replaceWith(convert(other.last)); + } + clear(); + return true; + } + + return replaceWith(performOperation(other, Operator.AND)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + modCount++; + if (c == null || c.isEmpty() || this == c) { + return false; + } + + ConciseSet other = convert(c); + if (other.size == 1) { + return add(other.last); + } + + return replaceWith(performOperation(convert(c), Operator.OR)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + modCount++; + + if (c == null || c.isEmpty() || isEmpty()) { + return false; + } + if (c == this) { + clear(); + return true; + } + + ConciseSet other = convert(c); + if (other.size == 1) { + return remove(other.last); + } + + return replaceWith(performOperation(convert(c), Operator.ANDNOT)); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + if (size < 0) { + size = 0; + for (int i = 0; i <= lastWordIndex; i++) { + int w = words[i]; + if (isLiteral(w)) { + size += getLiteralBitCount(w); + } else { + if (isZeroSequence(w)) { + if (!isSequenceWithNoBits(w)) { + size++; + } + } else { + size += maxLiteralLengthMultiplication(getSequenceCount(w) + 1); + if (!isSequenceWithNoBits(w)) { + size--; + } + } + } + } + } + return size; + } + + /** + * {@inheritDoc} + */ + @Override + public ConciseSet empty() + { + return new ConciseSet(simulateWAH); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + int h = 1; + for (int i = 0; i <= lastWordIndex; i++) { + h = (h << 5) - h + words[i]; + } + return h; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof ConciseSet)) { + return super.equals(obj); + } + + final ConciseSet other = (ConciseSet) obj; + if (simulateWAH != other.simulateWAH) { + return super.equals(obj); + } + + if (size() != other.size()) { + return false; + } + if (isEmpty()) { + return true; + } + if (last != other.last) { + return false; + } + for (int i = 0; i <= lastWordIndex; i++) { + if (words[i] != other.words[i]) { + return false; + } + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(IntSet o) + { + // empty set cases + if (this.isEmpty() && o.isEmpty()) { + return 0; + } + if (this.isEmpty()) { + return -1; + } + if (o.isEmpty()) { + return 1; + } + + final ConciseSet other = convert(o); + + // the word at the end must be the same + int res = this.last - other.last; + if (res != 0) { + return res < 0 ? -1 : 1; + } + + // scan words from MSB to LSB + int thisIndex = this.lastWordIndex; + int otherIndex = other.lastWordIndex; + int thisWord = this.words[thisIndex]; + int otherWord = other.words[otherIndex]; + while (thisIndex >= 0 && otherIndex >= 0) { + if (!isLiteral(thisWord)) { + if (!isLiteral(otherWord)) { + // compare two sequences + // note that they are made up of at least two blocks, and we + // start comparing from the end, that is at blocks with no + // (un)set bits + if (isZeroSequence(thisWord)) { + if (isOneSequence(otherWord)) + // zeros < ones + { + return -1; + } + // compare two sequences of zeros + res = getSequenceCount(otherWord) - getSequenceCount(thisWord); + if (res != 0) { + return res < 0 ? -1 : 1; + } + } else { + if (isZeroSequence(otherWord)) + // ones > zeros + { + return 1; + } + // compare two sequences of ones + res = getSequenceCount(thisWord) - getSequenceCount(otherWord); + if (res != 0) { + return res < 0 ? -1 : 1; + } + } + // if the sequences are the same (both zeros or both ones) + // and have the same length, compare the first blocks in the + // next loop since such blocks might contain (un)set bits + thisWord = getLiteral(thisWord); + otherWord = getLiteral(otherWord); + } else { + // zeros < literal --> -1 + // ones > literal --> +1 + // note that the sequence is made up of at least two blocks, + // and we start comparing from the end, that is at a block + // with no (un)set bits + if (isZeroSequence(thisWord)) { + if (otherWord != ConciseSetUtils.ALL_ZEROS_LITERAL) { + return -1; + } + } else { + if (otherWord != ConciseSetUtils.ALL_ONES_LITERAL) { + return 1; + } + } + if (getSequenceCount(thisWord) == 1) { + thisWord = getLiteral(thisWord); + } else { + thisWord--; + } + if (--otherIndex >= 0) { + otherWord = other.words[otherIndex]; + } + } + } else if (!isLiteral(otherWord)) { + // literal > zeros --> +1 + // literal < ones --> -1 + // note that the sequence is made up of at least two blocks, + // and we start comparing from the end, that is at a block + // with no (un)set bits + if (isZeroSequence(otherWord)) { + if (thisWord != ConciseSetUtils.ALL_ZEROS_LITERAL) { + return 1; + } + } else { + if (thisWord != ConciseSetUtils.ALL_ONES_LITERAL) { + return -1; + } + } + if (--thisIndex >= 0) { + thisWord = this.words[thisIndex]; + } + if (getSequenceCount(otherWord) == 1) { + otherWord = getLiteral(otherWord); + } else { + otherWord--; + } + } else { + res = thisWord - otherWord; // equals getLiteralBits(thisWord) - getLiteralBits(otherWord) + if (res != 0) { + return res < 0 ? -1 : 1; + } + if (--thisIndex >= 0) { + thisWord = this.words[thisIndex]; + } + if (--otherIndex >= 0) { + otherWord = other.words[otherIndex]; + } + } + } + return thisIndex >= 0 ? 1 : (otherIndex >= 0 ? -1 : 0); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(int from, int to) + { + ConciseSet toRemove = empty(); + toRemove.fill(from, to); + this.removeAll(toRemove); + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(int from, int to) + { + ConciseSet toAdd = empty(); + toAdd.add(to); + toAdd.complement(); + toAdd.add(to); + + ConciseSet toRemove = empty(); + toRemove.add(from); + toRemove.complement(); + + toAdd.removeAll(toRemove); + + this.addAll(toAdd); + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(int e) + { + if (!add(e)) { + remove(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return (lastWordIndex + 1) / Math.ceil((1 + last) / 32D); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return (double) (lastWordIndex + 1) / size(); + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + final StringBuilder s = new StringBuilder("INTERNAL REPRESENTATION:\n"); + final Formatter f = new Formatter(s, Locale.ENGLISH); + + if (isEmpty()) { + return s.append("null\n").toString(); + } + + f.format("Elements: %s\n", toString()); + + // elements + int firstBitInWord = 0; + for (int i = 0; i <= lastWordIndex; i++) { + // raw representation of words[i] + f.format("words[%d] = ", i); + String ws = toBinaryString(words[i]); + if (isLiteral(words[i])) { + s.append(ws.substring(0, 1)); + s.append("--"); + s.append(ws.substring(1)); + } else { + s.append(ws.substring(0, 2)); + s.append('-'); + if (simulateWAH) { + s.append("xxxxx"); + } else { + s.append(ws.substring(2, 7)); + } + s.append('-'); + s.append(ws.substring(7)); + } + s.append(" --> "); + + // decode words[i] + if (isLiteral(words[i])) { + // literal + s.append("literal: "); + s.append(toBinaryString(words[i]).substring(1)); + f.format(" ---> [from %d to %d] ", firstBitInWord, firstBitInWord + ConciseSetUtils.MAX_LITERAL_LENGTH - 1); + firstBitInWord += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + // sequence + if (isOneSequence(words[i])) { + s.append('1'); + } else { + s.append('0'); + } + s.append(" block: "); + s.append(toBinaryString(getLiteralBits(getLiteral(words[i]))).substring(1)); + if (!simulateWAH) { + s.append(" (bit="); + int bit = (words[i] & 0x3E000000) >>> 25; + if (bit == 0) { + s.append("none"); + } else { + s.append(String.format("%4d", bit - 1)); + } + s.append(')'); + } + int count = getSequenceCount(words[i]); + f.format( + " followed by %d blocks (%d bits)", + getSequenceCount(words[i]), + maxLiteralLengthMultiplication(count) + ); + f.format( + " ---> [from %d to %d] ", + firstBitInWord, + firstBitInWord + (count + 1) * ConciseSetUtils.MAX_LITERAL_LENGTH - 1 + ); + firstBitInWord += (count + 1) * ConciseSetUtils.MAX_LITERAL_LENGTH; + } + s.append('\n'); + } + + // object attributes + f.format("simulateWAH: %b\n", simulateWAH); + f.format("last: %d\n", last); + f.format("size: %s\n", (size == -1 ? "invalid" : Integer.toString(size))); + f.format("words.length: %d\n", words.length); + f.format("lastWordIndex: %d\n", lastWordIndex); + + // compression + f.format("bitmap compression: %.2f%%\n", 100D * bitmapCompressionRatio()); + f.format("collection compression: %.2f%%\n", 100D * collectionCompressionRatio()); + + return s.toString(); + } + + /** + * Save the state of the instance to a stream + */ + private void writeObject(ObjectOutputStream s) throws IOException + { + if (words != null && lastWordIndex < words.length - 1) + // compact before serializing + { + words = Arrays.copyOf(words, lastWordIndex + 1); + } + s.defaultWriteObject(); + } + + /** + * Reconstruct the instance from a stream + */ + private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException + { + s.defaultReadObject(); + if (words == null) { + reset(); + return; + } + lastWordIndex = words.length - 1; + updateLast(); + size = -1; + } + + /** + * Possible operations + */ + private enum Operator + { + /** + * @uml.property name="aND" + * @uml.associationEnd + */ + AND { + @Override + public int combineLiterals(int literal1, int literal2) + { + return literal1 & literal2; + } + + @Override + public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) + { + return op1.empty(); + } + + /** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */ + private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2) + { + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(op1.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { + // op2 is completely hidden by op1 + if (isZeroSequence(op1.words[0])) { + return op1.empty(); + } + // op2 is left unchanged, but the rest of op1 is hidden + return op2.clone(); + } + return null; + } + + @Override + public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) + { + ConciseSet res = oneWayCombineDisjointSets(op1, op2); + if (res == null) { + res = oneWayCombineDisjointSets(op2, op1); + } + return res; + } + }, + + /** + * @uml.property name="oR" + * @uml.associationEnd + */ + OR { + @Override + public int combineLiterals(int literal1, int literal2) + { + return literal1 | literal2; + } + + @Override + public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) + { + if (!op1.isEmpty()) { + return op1.clone(); + } + if (!op2.isEmpty()) { + return op2.clone(); + } + return op1.empty(); + } + + /** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */ + private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2) + { + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(op1.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { + // op2 is completely hidden by op1 + if (isOneSequence(op1.words[0])) { + return op1.clone(); + } + // op2 is left unchanged, but the rest of op1 must be appended... + + // ... first, allocate sufficient space for the result + ConciseSet res = op1.empty(); + res.words = new int[op1.lastWordIndex + op2.lastWordIndex + 3]; + res.lastWordIndex = op2.lastWordIndex; + + // ... then, copy op2 + System.arraycopy(op2.words, 0, res.words, 0, op2.lastWordIndex + 1); + + // ... finally, append op1 + WordIterator wordIterator = op1.new WordIterator(); + wordIterator.prepareNext(maxLiteralLengthDivision(op2.last) + 1); + wordIterator.flush(res); + if (op1.size < 0 || op2.size < 0) { + res.size = -1; + } else { + res.size = op1.size + op2.size; + } + res.last = op1.last; + res.compact(); + return res; + } + return null; + } + + @Override + public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) + { + ConciseSet res = oneWayCombineDisjointSets(op1, op2); + if (res == null) { + res = oneWayCombineDisjointSets(op2, op1); + } + return res; + } + }, + + /** + * @uml.property name="xOR" + * @uml.associationEnd + */ + XOR { + @Override + public int combineLiterals(int literal1, int literal2) + { + return ConciseSetUtils.ALL_ZEROS_LITERAL | (literal1 ^ literal2); + } + + @Override + public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) + { + if (!op1.isEmpty()) { + return op1.clone(); + } + if (!op2.isEmpty()) { + return op2.clone(); + } + return op1.empty(); + } + + /** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */ + private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2) + { + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(op1.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { + // op2 is left unchanged by op1 + if (isZeroSequence(op1.words[0])) { + return OR.combineDisjointSets(op1, op2); + } + // op2 must be complemented, then op1 must be appended + // it is better to perform it normally... + return null; + } + return null; + } + + @Override + public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) + { + ConciseSet res = oneWayCombineDisjointSets(op1, op2); + if (res == null) { + res = oneWayCombineDisjointSets(op2, op1); + } + return res; + } + }, + + /** + * @uml.property name="aNDNOT" + * @uml.associationEnd + */ + ANDNOT { + @Override + public int combineLiterals(int literal1, int literal2) + { + return ConciseSetUtils.ALL_ZEROS_LITERAL | (literal1 & (~literal2)); + } + + @Override + public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2) + { + if (!op1.isEmpty()) { + return op1.clone(); + } + return op1.empty(); + } + + @Override + public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2) + { + // check whether the first operator starts with a sequence that + // completely "covers" the second operator + if (isSequenceWithNoBits(op1.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) { + // op1 is left unchanged by op2 + if (isZeroSequence(op1.words[0])) { + return op1.clone(); + } + // op2 must be complemented, then op1 must be appended + // it is better to perform it normally... + return null; + } + // check whether the second operator starts with a sequence that + // completely "covers" the first operator + if (isSequenceWithNoBits(op2.words[0]) + && maxLiteralLengthMultiplication(getSequenceCount(op2.words[0]) + 1) > op1.last) { + // op1 is left unchanged by op2 + if (isZeroSequence(op2.words[0])) { + return op1.clone(); + } + // op1 is cleared by op2 + return op1.empty(); + } + return null; + } + },; + + /** + * Performs the operation on the given literals + * + * @param literal1 left operand + * @param literal2 right operand + * + * @return literal representing the result of the specified operation + */ + public abstract int combineLiterals(int literal1, int literal2); + + /** + * Performs the operation when one or both operands are empty set + *

+ * NOTE: the caller MUST assure that one or both the operands + * are empty!!! + * + * @param op1 left operand + * @param op2 right operand + * + * @return null if both operands are non-empty + */ + public abstract ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2); + + /** + * Performs the operation in the special case of "disjoint" sets, namely + * when the first (or the second) operand starts with a sequence (it + * does not matter if 0's or 1's) that completely covers all the bits of + * the second (or the first) operand. + * + * @param op1 left operand + * @param op2 right operand + * + * @return null if operands are non-disjoint + */ + public abstract ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2); + } + + /** + * Iterator over the bits of a single literal/fill word + */ + private interface WordExpander + { + public boolean hasNext(); + + public boolean hasPrevious(); + + public int next(); + + public int previous(); + + public void skipAllAfter(int i); + + public void skipAllBefore(int i); + + public void reset(int offset, int word, boolean fromBeginning); + } + + /** + * Iterates over words, from the rightmost (LSB) to the leftmost (MSB). + *

+ * When {@link ConciseSet#simulateWAH} is false, mixed + * sequences are "broken" into a literal (i.e., the first block is coded + * with a literal in {@link #word}) and a "pure" sequence (i.e., the + * remaining blocks are coded with a sequence with no bits in {@link #word}) + */ + private class WordIterator + { + /** + * copy of the current word + */ + int word; + + /** + * current word index + */ + int index; + + /** + * true if {@link #word} is a literal + */ + boolean isLiteral; + + /** + * number of blocks in the current word (1 for literals, > 1 for sequences) + */ + int count; + + /** + * Initialize data + */ + WordIterator() + { + isLiteral = false; + index = -1; + prepareNext(); + } + + /** + * @return true if there is no current word + */ + boolean exhausted() + { + return index > lastWordIndex; + } + + /** + * Prepare the next value for {@link #word} after skipping a given + * number of 31-bit blocks in the current sequence. + *

+ * NOTE: it works only when the current word is within a + * sequence, namely a literal cannot be skipped. Moreover, the number of + * blocks to skip must be less than the remaining blocks in the current + * sequence. + * + * @param c number of 31-bit "blocks" to skip + * + * @return false if the next word does not exists + */ + boolean prepareNext(int c) + { + assert c <= count; + count -= c; + if (count == 0) { + return prepareNext(); + } + return true; + } + + /** + * Prepare the next value for {@link #word} + * + * @return false if the next word does not exists + */ + boolean prepareNext() + { + if (!simulateWAH && isLiteral && count > 1) { + count--; + isLiteral = false; + word = getSequenceWithNoBits(words[index]) - 1; + return true; + } + + index++; + if (index > lastWordIndex) { + return false; + } + word = words[index]; + isLiteral = isLiteral(word); + if (!isLiteral) { + count = getSequenceCount(word) + 1; + if (!simulateWAH && !isSequenceWithNoBits(word)) { + isLiteral = true; + int bit = (1 << (word >>> 25)) >>> 1; + word = isZeroSequence(word) + ? (ConciseSetUtils.ALL_ZEROS_LITERAL | bit) + : (ConciseSetUtils.ALL_ONES_LITERAL & ~bit); + } + } else { + count = 1; + } + return true; + } + + /** + * @return the literal word corresponding to each block contained in the + * current sequence word. Not to be used with literal words! + */ + int toLiteral() + { + assert !isLiteral; + return ConciseSetUtils.ALL_ZEROS_LITERAL | ((word << 1) >> ConciseSetUtils.MAX_LITERAL_LENGTH); + } + + /** + * Copies all the remaining words in the given set + * + * @param s set where the words must be copied + * + * @return false if there are no words to copy + */ + private boolean flush(ConciseSet s) + { + // nothing to flush + if (exhausted()) { + return false; + } + + // try to "compress" the first few words + do { + if (isLiteral) { + s.appendLiteral(word); + } else { + s.appendFill(count, word); + } + } while (prepareNext() && s.words[s.lastWordIndex] != word); + + // copy remaining words "as-is" + int delta = lastWordIndex - index + 1; + System.arraycopy(words, index, s.words, s.lastWordIndex + 1, delta); + s.lastWordIndex += delta; + s.last = last; + return true; + } + } + + /* + * DEBUGGING METHODS + */ + + /** + * Iterator over the bits of literal and zero-fill words + */ + private class LiteralAndZeroFillExpander implements WordExpander + { + final int[] buffer = new int[ConciseSetUtils.MAX_LITERAL_LENGTH]; + int len = 0; + int current = 0; + + @Override + public boolean hasNext() + { + return current < len; + } + + @Override + public boolean hasPrevious() + { + return current > 0; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return buffer[current++]; + } + + @Override + public int previous() + { + if (!hasPrevious()) { + throw new NoSuchElementException(); + } + return buffer[--current]; + } + + @Override + public void skipAllAfter(int i) + { + while (hasPrevious() && buffer[current - 1] > i) { + current--; + } + } + + @Override + public void skipAllBefore(int i) + { + while (hasNext() && buffer[current] < i) { + current++; + } + } + + @Override + public void reset(int offset, int word, boolean fromBeginning) + { + if (isLiteral(word)) { + len = 0; + for (int i = 0; i < ConciseSetUtils.MAX_LITERAL_LENGTH; i++) { + if ((word & (1 << i)) != 0) { + buffer[len++] = offset + i; + } + } + current = fromBeginning ? 0 : len; + } else { + if (isZeroSequence(word)) { + if (simulateWAH || isSequenceWithNoBits(word)) { + len = 0; + current = 0; + } else { + len = 1; + buffer[0] = offset + ((0x3FFFFFFF & word) >>> 25) - 1; + current = fromBeginning ? 0 : 1; + } + } else { + throw new RuntimeException("sequence of ones!"); + } + } + } + } + + /** + * Iterator over the bits of one-fill words + */ + private class OneFillExpander implements WordExpander + { + int firstInt = 1; + int lastInt = -1; + int current = 0; + int exception = -1; + + @Override + public boolean hasNext() + { + return current < lastInt; + } + + @Override + public boolean hasPrevious() + { + return current > firstInt; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + current++; + if (!simulateWAH && current == exception) { + current++; + } + return current; + } + + @Override + public int previous() + { + if (!hasPrevious()) { + throw new NoSuchElementException(); + } + current--; + if (!simulateWAH && current == exception) { + current--; + } + return current; + } + + @Override + public void skipAllAfter(int i) + { + if (i >= current) { + return; + } + current = i + 1; + } + + @Override + public void skipAllBefore(int i) + { + if (i <= current) { + return; + } + current = i - 1; + } + + @Override + public void reset(int offset, int word, boolean fromBeginning) + { + if (!isOneSequence(word)) { + throw new RuntimeException("NOT a sequence of ones!"); + } + firstInt = offset; + lastInt = offset + maxLiteralLengthMultiplication(getSequenceCount(word) + 1) - 1; + if (!simulateWAH) { + exception = offset + ((0x3FFFFFFF & word) >>> 25) - 1; + if (exception == firstInt) { + firstInt++; + } + if (exception == lastInt) { + lastInt--; + } + } + current = fromBeginning ? (firstInt - 1) : (lastInt + 1); + } + } + + /** + * Iterator for all the integers of a {@link ConciseSet} instance + */ + private class BitIterator implements IntIterator + { + /** + * @uml.property name="litExp" + * @uml.associationEnd + */ + final LiteralAndZeroFillExpander litExp = new LiteralAndZeroFillExpander(); + /** + * @uml.property name="oneExp" + * @uml.associationEnd + */ + final OneFillExpander oneExp = new OneFillExpander(); + /** + * @uml.property name="exp" + * @uml.associationEnd + */ + WordExpander exp; + int nextIndex = 0; + int nextOffset = 0; + + private BitIterator() + { + nextWord(); + } + + private void nextWord() + { + final int word = words[nextIndex++]; + exp = isOneSequence(word) ? oneExp : litExp; + exp.reset(nextOffset, word, true); + + // prepare next offset + if (isLiteral(word)) { + nextOffset += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + nextOffset += maxLiteralLengthMultiplication(getSequenceCount(word) + 1); + } + } + + @Override + public boolean hasNext() + { + return nextIndex <= lastWordIndex || exp.hasNext(); + } + + @Override + public int next() + { + while (!exp.hasNext()) { + if (nextIndex > lastWordIndex) { + throw new NoSuchElementException(); + } + nextWord(); + } + return exp.next(); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public void skipAllBefore(int element) + { + while (true) { + exp.skipAllBefore(element); + if (exp.hasNext() || nextIndex > lastWordIndex) { + return; + } + nextWord(); + } + } + + @Override + public IntIterator clone() + { + BitIterator retVal = new BitIterator(); + retVal.exp = exp; + retVal.nextIndex = nextIndex; + retVal.nextOffset = nextOffset; + return retVal; + } + } + + /** + * @author alessandrocolantonio + */ + private class ReverseBitIterator implements IntIterator + { + /** + * @uml.property name="litExp" + * @uml.associationEnd + */ + final LiteralAndZeroFillExpander litExp = new LiteralAndZeroFillExpander(); + /** + * @uml.property name="oneExp" + * @uml.associationEnd + */ + final OneFillExpander oneExp = new OneFillExpander(); + /** + * @uml.property name="exp" + * @uml.associationEnd + */ + WordExpander exp; + int nextIndex = lastWordIndex; + int nextOffset = maxLiteralLengthMultiplication(maxLiteralLengthDivision(last) + 1); + int firstIndex; // first non-zero block + + ReverseBitIterator() + { + // identify the first non-zero block + if ((isSequenceWithNoBits(words[0]) && isZeroSequence(words[0])) || (isLiteral(words[0]) + && words[0] + == ConciseSetUtils.ALL_ZEROS_LITERAL)) { + firstIndex = 1; + } else { + firstIndex = 0; + } + previousWord(); + } + + void previousWord() + { + final int word = words[nextIndex--]; + exp = isOneSequence(word) ? oneExp : litExp; + if (isLiteral(word)) { + nextOffset -= ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + nextOffset -= maxLiteralLengthMultiplication(getSequenceCount(word) + 1); + } + exp.reset(nextOffset, word, false); + } + + @Override + public boolean hasNext() + { + return nextIndex >= firstIndex || exp.hasPrevious(); + } + + @Override + public int next() + { + while (!exp.hasPrevious()) { + if (nextIndex < firstIndex) { + throw new NoSuchElementException(); + } + previousWord(); + } + return exp.previous(); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public void skipAllBefore(int element) + { + while (true) { + exp.skipAllAfter(element); + if (exp.hasPrevious() || nextIndex < firstIndex) { + return; + } + previousWord(); + } + } + + @Override + public IntIterator clone() + { + ReverseBitIterator retVal = new ReverseBitIterator(); + retVal.exp = exp; + retVal.nextIndex = nextIndex; + retVal.nextOffset = nextOffset; + retVal.firstIndex = firstIndex; + return retVal; + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java new file mode 100755 index 00000000000..b9465d9dc3d --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/ConciseSetUtils.java @@ -0,0 +1,563 @@ +package io.druid.extendedset.intset; + +import io.druid.extendedset.utilities.BitCount; + +import java.util.NoSuchElementException; + +/** + */ +public class ConciseSetUtils +{ + /** + * The highest representable integer. + *

+ * Its value is computed as follows. The number of bits required to + * represent the longest sequence of 0's or 1's is + * ceil(log2(({@link Integer#MAX_VALUE} - 31) / 31)) = 27. + * Indeed, at least one literal exists, and the other bits may all be 0's or + * 1's, that is {@link Integer#MAX_VALUE} - 31. If we use: + *

    + *
  • 2 bits for the sequence type; + *
  • 5 bits to indicate which bit is set; + *
+ * then 32 - 5 - 2 = 25 is the number of available bits to + * represent the maximum sequence of 0's and 1's. Thus, the maximal bit that + * can be set is represented by a number of 0's equals to + * 31 * (1 << 25), followed by a literal with 30 0's and the + * MSB (31st bit) equal to 1 + */ + public final static int MAX_ALLOWED_INTEGER = 31 * (1 << 25) + 30; // 1040187422 + + /** + * The lowest representable integer. + */ + public final static int MIN_ALLOWED_SET_BIT = 0; + + /** + * Maximum number of representable bits within a literal + */ + public final static int MAX_LITERAL_LENGTH = 31; + + /** + * Literal that represents all bits set to 1 (and MSB = 1) + */ + public final static int ALL_ONES_LITERAL = 0xFFFFFFFF; + + /** + * Literal that represents all bits set to 0 (and MSB = 1) + */ + public final static int ALL_ZEROS_LITERAL = 0x80000000; + + /** + * All bits set to 1 and MSB = 0 + */ + public final static int ALL_ONES_WITHOUT_MSB = 0x7FFFFFFF; + + /** + * Sequence bit + */ + public final static int SEQUENCE_BIT = 0x40000000; + + /** + * Calculates the modulus division by 31 in a faster way than using n % 31 + *

+ * This method of finding modulus division by an integer that is one less + * than a power of 2 takes at most O(lg(32)) time. The number of operations + * is at most 12 + 9 * ceil(lg(32)). + *

+ * See http://graphics.stanford.edu/~seander/bithacks.html + * + * @param n number to divide + * + * @return n % 31 + */ + public static int maxLiteralLengthModulus(int n) + { + int m = (n & 0xC1F07C1F) + ((n >>> 5) & 0xC1F07C1F); + m = (m >>> 15) + (m & 0x00007FFF); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + if (m <= 31) { + return m == 31 ? 0 : m; + } + m = (m >>> 5) + (m & 0x0000001F); + return m == 31 ? 0 : m; + } + + /** + * Calculates the multiplication by 31 in a faster way than using n * 31 + * + * @param n number to multiply + * + * @return n * 31 + */ + public static int maxLiteralLengthMultiplication(int n) + { + return (n << 5) - n; + } + + /** + * Calculates the division by 31 + * + * @param n number to divide + * + * @return n / 31 + */ + public static int maxLiteralLengthDivision(int n) + { + return n / 31; + } + + /** + * Checks whether a word is a literal one + * + * @param word word to check + * + * @return true if the given word is a literal word + */ + public static boolean isLiteral(int word) + { + // "word" must be 1* + // NOTE: this is faster than "return (word & 0x80000000) == 0x80000000" + return (word & 0x80000000) != 0; + } + + /** + * Checks whether a word contains a sequence of 1's + * + * @param word word to check + * + * @return true if the given word is a sequence of 1's + */ + public static boolean isOneSequence(int word) + { + // "word" must be 01* + return (word & 0xC0000000) == SEQUENCE_BIT; + } + + /** + * Checks whether a word contains a sequence of 0's + * + * @param word word to check + * + * @return true if the given word is a sequence of 0's + */ + public static boolean isZeroSequence(int word) + { + // "word" must be 00* + return (word & 0xC0000000) == 0; + } + + /** + * Checks whether a word contains a sequence of 0's with no set bit, or 1's + * with no unset bit. + *

+ * NOTE: when {@link #simulateWAH} is true, it is + * equivalent to (and as fast as) !{@link #isLiteral(int)} + * + * @param word word to check + * + * @return true if the given word is a sequence of 0's or 1's + * but with no (un)set bit + */ + public static boolean isSequenceWithNoBits(int word) + { + // "word" must be 0?00000* + return (word & 0xBE000000) == 0x00000000; + } + + /** + * Gets the number of blocks of 1's or 0's stored in a sequence word + * + * @param word word to check + * + * @return the number of blocks that follow the first block of 31 bits + */ + public static int getSequenceCount(int word) + { + // get the 25 LSB bits + return word & 0x01FFFFFF; + } + + public static int getSequenceNumWords(int word) + { + return getSequenceCount(word) + 1; + } + + /** + * Clears the (un)set bit in a sequence + * + * @param word word to check + * + * @return the sequence corresponding to the given sequence and with no + * (un)set bits + */ + public static int getSequenceWithNoBits(int word) + { + // clear 29 to 25 LSB bits + return (word & 0xC1FFFFFF); + } + + /** + * Gets the literal word that represents the first 31 bits of the given the + * word (i.e. the first block of a sequence word, or the bits of a literal word). + *

+ * If the word is a literal, it returns the unmodified word. In case of a + * sequence, it returns a literal that represents the first 31 bits of the + * given sequence word. + * + * @param word word to check + * + * @return the literal contained within the given word, with the most + * significant bit set to 1. + */ + public static int getLiteral(int word, boolean simulateWAH) + { + if (isLiteral(word)) { + return word; + } + + if (simulateWAH) { + return isZeroSequence(word) ? ALL_ZEROS_LITERAL : ALL_ONES_LITERAL; + } + + // get bits from 30 to 26 and use them to set the corresponding bit + // NOTE: "1 << (word >>> 25)" and "1 << ((word >>> 25) & 0x0000001F)" are equivalent + // NOTE: ">>> 1" is required since 00000 represents no bits and 00001 the LSB bit set + int literal = (1 << (word >>> 25)) >>> 1; + return isZeroSequence(word) + ? (ALL_ZEROS_LITERAL | literal) + : (ALL_ONES_LITERAL & ~literal); + } + + public static int getLiteralFromZeroSeqFlipBit(int word) + { + int flipBit = getFlippedBit(word); + if (flipBit > -1) { + return ALL_ZEROS_LITERAL | flipBitAsBinaryString(flipBit); + } + return ALL_ZEROS_LITERAL; + } + + public static int getLiteralFromOneSeqFlipBit(int word) + { + int flipBit = getFlippedBit(word); + if (flipBit > -1) { + return ALL_ONES_LITERAL ^ flipBitAsBinaryString(flipBit); + } + return ALL_ONES_LITERAL; + } + + /** + * Gets the position of the flipped bit within a sequence word. If the + * sequence has no set/unset bit, returns -1. + *

+ * Note that the parameter must a sequence word, otherwise the + * result is meaningless. + * + * @param word sequence word to check + * + * @return the position of the set bit, from 0 to 31. If the sequence has no + * set/unset bit, returns -1. + */ + public static int getFlippedBit(int word) + { + // get bits from 30 to 26 + // NOTE: "-1" is required since 00000 represents no bits and 00001 the LSB bit set + return ((word >>> 25) & 0x0000001F) - 1; + } + + public static int flipBitAsBinaryString(int flipBit) + { + return ((Number) Math.pow(2, flipBit)).intValue(); + } + + /** + * Gets the number of set bits within the literal word + * + * @param word literal word + * + * @return the number of set bits within the literal word + */ + public static int getLiteralBitCount(int word) + { + return BitCount.count(getLiteralBits(word)); + } + + /** + * Gets the bits contained within the literal word + * + * @param word literal word + * + * @return the literal word with the most significant bit cleared + */ + public static int getLiteralBits(int word) + { + return ALL_ONES_WITHOUT_MSB & word; + } + + public static boolean isAllOnesLiteral(int word) + { + return (word & -1) == -1; + } + + public static boolean isAllZerosLiteral(int word) + { + return (word | 0x80000000) == 0x80000000; + } + + public static boolean isLiteralWithSingleZeroBit(int word) + { + return isLiteral(word) && (Integer.bitCount(~word) == 1); + } + + public static boolean isLiteralWithSingleOneBit(int word) + { + return isLiteral(word) && (Integer.bitCount(word) == 2); + } + + public static int clearBitsAfterInLastWord(int lastWord, int lastSetBit) + { + return lastWord &= ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit)); + } + + public static int onesUntil(int bit) + { + return 0x80000000 | ((1 << bit) - 1); + } + + public static LiteralAndZeroFillExpander newLiteralAndZeroFillExpander() + { + return new LiteralAndZeroFillExpander(); + } + + public static OneFillExpander newOneFillExpander() + { + return new OneFillExpander(); + } + + public interface WordExpander + { + public boolean hasNext(); + + public boolean hasPrevious(); + + public int next(); + + public int previous(); + + public void skipAllAfter(int i); + + public void skipAllBefore(int i); + + public void reset(int offset, int word, boolean fromBeginning); + + public WordExpander clone(); + } + + /** + * Iterator over the bits of literal and zero-fill words + */ + public static class LiteralAndZeroFillExpander implements WordExpander + { + final int[] buffer = new int[MAX_LITERAL_LENGTH]; + int len = 0; + int current = 0; + + @Override + public boolean hasNext() + { + return current < len; + } + + @Override + public boolean hasPrevious() + { + return current > 0; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return buffer[current++]; + } + + @Override + public int previous() + { + if (!hasPrevious()) { + throw new NoSuchElementException(); + } + return buffer[--current]; + } + + @Override + public void skipAllAfter(int i) + { + while (hasPrevious() && buffer[current - 1] > i) { + current--; + } + } + + @Override + public void skipAllBefore(int i) + { + while (hasNext() && buffer[current] < i) { + current++; + } + } + + @Override + public void reset(int offset, int word, boolean fromBeginning) + { + if (isLiteral(word)) { + len = 0; + for (int i = 0; i < MAX_LITERAL_LENGTH; i++) { + if ((word & (1 << i)) != 0) { + buffer[len++] = offset + i; + } + } + current = fromBeginning ? 0 : len; + } else { + if (isZeroSequence(word)) { + if (isSequenceWithNoBits(word)) { + len = 0; + current = 0; + } else { + len = 1; + buffer[0] = offset + ((0x3FFFFFFF & word) >>> 25) - 1; + current = fromBeginning ? 0 : 1; + } + } else { + throw new RuntimeException("sequence of ones!"); + } + } + } + + @Override + public WordExpander clone() + { + LiteralAndZeroFillExpander retVal = new LiteralAndZeroFillExpander(); + System.arraycopy(buffer, 0, retVal.buffer, 0, buffer.length); + retVal.len = len; + retVal.current = current; + return retVal; + } + } + + /** + * Iterator over the bits of one-fill words + */ + public static class OneFillExpander implements WordExpander + { + int firstInt = 1; + int lastInt = -1; + int current = 0; + int exception = -1; + + @Override + public boolean hasNext() + { + return current < lastInt; + } + + @Override + public boolean hasPrevious() + { + return current > firstInt; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + current++; + if (current == exception) { + current++; + } + return current; + } + + @Override + public int previous() + { + if (!hasPrevious()) { + throw new NoSuchElementException(); + } + current--; + if (current == exception) { + current--; + } + return current; + } + + @Override + public void skipAllAfter(int i) + { + if (i >= current) { + return; + } + current = i + 1; + } + + @Override + public void skipAllBefore(int i) + { + if (i <= current) { + return; + } + current = i - 1; + } + + @Override + public void reset(int offset, int word, boolean fromBeginning) + { + if (!isOneSequence(word)) { + throw new RuntimeException("NOT a sequence of ones!"); + } + firstInt = offset; + lastInt = offset + maxLiteralLengthMultiplication(getSequenceCount(word) + 1) - 1; + + exception = offset + ((0x3FFFFFFF & word) >>> 25) - 1; + if (exception == firstInt) { + firstInt++; + } + if (exception == lastInt) { + lastInt--; + } + + current = fromBeginning ? (firstInt - 1) : (lastInt + 1); + } + + @Override + public WordExpander clone() + { + OneFillExpander retVal = new OneFillExpander(); + retVal.firstInt = firstInt; + retVal.lastInt = lastInt; + retVal.current = current; + retVal.exception = exception; + return retVal; + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/FastSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/FastSet.java new file mode 100755 index 00000000000..d0f6fe4bd16 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/FastSet.java @@ -0,0 +1,1403 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.intset; + + +import io.druid.extendedset.utilities.BitCount; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collection; +import java.util.Formatter; +import java.util.Locale; +import java.util.NoSuchElementException; + +/** + * An {@link IntSet} implementation, representing a set of integers, based on an + * uncompressed bitmap. + *

+ * It actually is an extension of {@link BitSet}. More specifically, union and + * intersection operations are mainly derived from the code of {@link BitSet} to + * provide bitwise "or" and "and". + *

+ * The iterator implemented for this class allows for modifications during the + * iteration, that is it is possible to add/remove elements through + * {@link #add(int)}, {@link #remove(int)}, {@link #addAll(IntSet)}, + * {@link #removeAll(IntSet)}, {@link #retainAll(IntSet)}, etc.. In this case, + * {@link IntIterator#next()} returns the first integral greater than the last + * visited one. + * + * @author Alessandro Colantonio + * @version $Id$ + */ +public class FastSet extends AbstractIntSet implements java.io.Serializable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = 6519808981110513440L; + + /** + * number of bits within each word + */ + private final static int WORD_SIZE = 32; + + /** + * 32-bit string of all 1's + */ + private static final int ALL_ONES_WORD = 0xFFFFFFFF; + + /** + * all bits, grouped in blocks of length 32 + */ + private int[] words; + + /** + * index of the first empty word, that is the number of words in the logical + * size of this {@link FastSet} + */ + private transient int firstEmptyWord; + + /** + * cached set size (only for fast size() call). When -1, the cache is invalid + */ + private transient int size; + + /** + * Creates a new, empty set. + */ + public FastSet() + { + clear(); + } + + /** + * Creates a new, empty set. It preallocates the space for + * maxWordsInUse words. + */ + private FastSet(int wordsToAllocate) + { + firstEmptyWord = 0; + size = 0; + words = new int[wordsToAllocate]; + } + + /** + * Given a number, it returns the multiplication by the number of bits for each block + */ + private static int multiplyByWordSize(int i) + { + return i << 5; // i * WORD_SIZE; + } + + /** + * Given a bit index, it returns the index of the word containing it + */ + private static int wordIndex(int bitIndex) + { + if (bitIndex < 0) { + throw new IndexOutOfBoundsException("index < 0: " + bitIndex); + } + return bitIndex >> 5; + } + + /** + * Given a bit index, it returns the index of the word containing it + */ + private static int wordIndexNoCheck(int bitIndex) + { + return bitIndex >> 5; + } + + /** + * Generates the 32-bit binary representation of a given word (debug only) + * + * @param word word to represent + * + * @return 32-character string that represents the given word + */ + private static String toBinaryString(int word) + { + String lsb = Integer.toBinaryString(word); + StringBuilder pad = new StringBuilder(); + for (int i = lsb.length(); i < 32; i++) { + pad.append('0'); + } + return pad.append(lsb).toString(); + } + + /** + * Sets the field {@link #firstEmptyWord} with the logical size in words of the + * bit set. + */ + private void fixFirstEmptyWord() + { + int i = firstEmptyWord - 1; + final int[] localWords = words; // faster + while (i >= 0 && localWords[i] == 0) { + i--; + } + firstEmptyWord = i + 1; + } + + /** + * Ensures that the {@link FastSet} can hold enough words. + * + * @param wordsRequired the minimum acceptable number of words. + */ + private void ensureCapacity(int wordsRequired) + { + if (words.length >= wordsRequired) { + return; + } + int newLength = Math.max(words.length << 1, wordsRequired); + words = Arrays.copyOf(words, newLength); + } + + /** + * Ensures that the {@link FastSet} can accommodate a given word index + * + * @param wordIndex the index to be accommodated. + */ + private void expandTo(int wordIndex) + { + int wordsRequired = wordIndex + 1; + if (firstEmptyWord < wordsRequired) { + ensureCapacity(wordsRequired); + firstEmptyWord = wordsRequired; + } + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + FastSet res = new FastSet(); + res.firstEmptyWord = firstEmptyWord; + res.size = size; + res.words = Arrays.copyOf(words, firstEmptyWord); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + int h = 1; + final int[] localWords = words; // faster + for (int i = 0; i < firstEmptyWord; i++) { + h = (h << 5) - h + localWords[i]; + } + return h; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof FastSet)) { + return super.equals(obj); + } + + final FastSet other = (FastSet) obj; + if (firstEmptyWord != other.firstEmptyWord) { + return false; + } + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + for (int i = 0; i < firstEmptyWord; i++) { + if (localWords[i] != localOtherWords[i]) { + return false; + } + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return firstEmptyWord == 0; + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + // check if the cached size is invalid + if (size < 0) { + size = BitCount.count(words, firstEmptyWord); + } + return size; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(int i) + { + int wordIndex = wordIndex(i); + expandTo(wordIndex); + int before = words[wordIndex]; + words[wordIndex] |= (1 << i); + if (before != words[wordIndex]) { + if (size >= 0) { + size++; + } + return true; + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(int i) + { + if (i < 0) { + return false; + } + + int wordIndex = wordIndex(i); + if (wordIndex >= firstEmptyWord) { + return false; + } + int before = words[wordIndex]; + words[wordIndex] &= ~(1 << i); + if (before != words[wordIndex]) { + if (size >= 0) { + size--; + } + fixFirstEmptyWord(); + return true; + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + if (c == null || c.isEmpty() || this == c) { + return false; + } + + final FastSet other = convert(c); + + int wordsInCommon = Math.min(firstEmptyWord, other.firstEmptyWord); + + boolean modified = false; + if (firstEmptyWord < other.firstEmptyWord) { + modified = true; + ensureCapacity(other.firstEmptyWord); + firstEmptyWord = other.firstEmptyWord; + } + + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + // Perform logical OR on words in common + for (int i = 0; i < wordsInCommon; i++) { + int before = localWords[i]; + localWords[i] |= localOtherWords[i]; + modified = modified || before != localWords[i]; + } + + // Copy any remaining words + if (wordsInCommon < other.firstEmptyWord) { + modified = true; + System.arraycopy( + other.words, wordsInCommon, words, + wordsInCommon, firstEmptyWord - wordsInCommon + ); + } + if (modified) { + size = -1; + } + return modified; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + if (c == null || c.isEmpty() || isEmpty()) { + return false; + } + if (c == this) { + clear(); + return true; + } + + final FastSet other = convert(c); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + // Perform logical (a & !b) on words in common + boolean modified = false; + for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { + int before = localWords[i]; + localWords[i] &= ~localOtherWords[i]; + modified = modified || before != localWords[i]; + } + if (modified) { + fixFirstEmptyWord(); + size = -1; + } + return modified; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + if (isEmpty() || c == this) { + return false; + } + if (c == null || c.isEmpty()) { + clear(); + return true; + } + + final FastSet other = convert(c); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + boolean modified = false; + if (firstEmptyWord > other.firstEmptyWord) { + modified = true; + while (firstEmptyWord > other.firstEmptyWord) { + localWords[--firstEmptyWord] = 0; + } + } + + // Perform logical AND on words in common + for (int i = 0; i < firstEmptyWord; i++) { + int before = localWords[i]; + localWords[i] &= localOtherWords[i]; + modified = modified || before != localWords[i]; + } + if (modified) { + fixFirstEmptyWord(); + size = -1; + } + return modified; + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + words = new int[10]; + firstEmptyWord = 0; + size = 0; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(int i) + { + if (isEmpty() || i < 0) { + return false; + } + int wordIndex = wordIndexNoCheck(i); + return (wordIndex < firstEmptyWord) + && ((words[wordIndex] & (1 << i)) != 0); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + if (c == null || c.isEmpty() || c == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final FastSet other = convert(c); + + if (other.firstEmptyWord > firstEmptyWord) { + return false; + } + + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + for (int i = 0; i < other.firstEmptyWord; i++) { + int o = localOtherWords[i]; + if ((localWords[i] & o) != o) { + return false; + } + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet c, int minElements) + { + if (minElements < 1) { + throw new IllegalArgumentException(); + } + if ((size >= 0 && size < minElements) || c == null || c.isEmpty() || isEmpty()) { + return false; + } + if (this == c) { + return size() >= minElements; + } + + final FastSet other = convert(c); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + int count = 0; + for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { + count += BitCount.count(localWords[i] & localOtherWords[i]); + if (count >= minElements) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet c) + { + if (c == null || c.isEmpty() || c == this) { + return true; + } + if (isEmpty()) { + return false; + } + + final FastSet other = convert(c); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { + if ((localWords[i] & localOtherWords[i]) != 0) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet c) + { + if (c == null || c.isEmpty()) { + return 0; + } + if (c == this) { + return size(); + } + if (isEmpty()) { + return 0; + } + + final FastSet other = convert(c); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + int count = 0; + for (int i = Math.min(firstEmptyWord, other.firstEmptyWord) - 1; i >= 0; i--) { + count += BitCount.count(localWords[i] & localOtherWords[i]); + } + return count; + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator iterator() + { + return new BitIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator descendingIterator() + { + return new ReverseBitIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public int last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + return multiplyByWordSize(firstEmptyWord - 1) + + (WORD_SIZE - Integer.numberOfLeadingZeros(words[firstEmptyWord - 1])) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + if (isEmpty()) { + return; + } + if (size > 0) { + size = last() - size + 1; + } + int lastWordMask = ALL_ONES_WORD >>> Integer.numberOfLeadingZeros(words[firstEmptyWord - 1]); + final int[] localWords = words; // faster + for (int i = 0; i < firstEmptyWord - 1; i++) { + localWords[i] ^= ALL_ONES_WORD; + } + localWords[firstEmptyWord - 1] ^= lastWordMask; + fixFirstEmptyWord(); + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet complemented() + { + FastSet clone = clone(); + clone.complement(); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet empty() + { + return new FastSet(); + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return 1D; + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return (double) firstEmptyWord / size(); + } + + /** + * Convert a given collection to a {@link FastSet} instance + */ + private FastSet convert(IntSet c) + { + if (c instanceof FastSet) { + return (FastSet) c; + } + if (c == null) { + return new FastSet(); + } + + FastSet res = new FastSet(); + IntIterator itr = c.iterator(); + while (itr.hasNext()) { + res.add(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet convert(Collection c) + { + FastSet res = empty(); + if (c != null) { + for (int i : c) { + res.add(i); + } + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet convert(int... a) + { + FastSet res = new FastSet(); + if (a != null) { + for (int i : a) { + res.add(i); + } + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(int fromIndex, int toIndex) + { + if (fromIndex > toIndex) { + throw new IndexOutOfBoundsException( + "fromIndex: " + fromIndex + + " > toIndex: " + toIndex + ); + } + if (fromIndex == toIndex) { + add(fromIndex); + return; + } + + // Increase capacity if necessary + int startWordIndex = wordIndex(fromIndex); + int endWordIndex = wordIndex(toIndex); + expandTo(endWordIndex); + + final int[] localWords = words; // faster + + boolean modified = false; + int firstWordMask = ALL_ONES_WORD << fromIndex; + int lastWordMask = ALL_ONES_WORD >>> -(toIndex + 1); + if (startWordIndex == endWordIndex) { + // Case 1: One word + int before = localWords[startWordIndex]; + localWords[startWordIndex] |= (firstWordMask & lastWordMask); + modified = localWords[startWordIndex] != before; + } else { + // Case 2: Multiple words + // Handle first word + int before = localWords[startWordIndex]; + localWords[startWordIndex] |= firstWordMask; + modified = localWords[startWordIndex] != before; + + // Handle intermediate words, if any + for (int i = startWordIndex + 1; i < endWordIndex; i++) { + modified = modified || localWords[i] != ALL_ONES_WORD; + localWords[i] = ALL_ONES_WORD; + } + + // Handle last word + before = localWords[endWordIndex]; + localWords[endWordIndex] |= lastWordMask; + modified = modified || localWords[endWordIndex] != before; + } + if (modified) { + size = -1; + } + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(int fromIndex, int toIndex) + { + if (fromIndex > toIndex) { + throw new IndexOutOfBoundsException( + "fromIndex: " + fromIndex + + " > toIndex: " + toIndex + ); + } + if (fromIndex == toIndex) { + remove(fromIndex); + return; + } + + int startWordIndex = wordIndex(fromIndex); + if (startWordIndex >= firstEmptyWord) { + return; + } + + int endWordIndex = wordIndex(toIndex); + if (endWordIndex >= firstEmptyWord) { + toIndex = last(); + endWordIndex = firstEmptyWord - 1; + } + + final int[] localWords = words; // faster + + boolean modified = false; + int firstWordMask = ALL_ONES_WORD << fromIndex; + int lastWordMask = ALL_ONES_WORD >>> -(toIndex + 1); + if (startWordIndex == endWordIndex) { + // Case 1: One word + int before = localWords[startWordIndex]; + localWords[startWordIndex] &= ~(firstWordMask & lastWordMask); + modified = localWords[startWordIndex] != before; + } else { + // Case 2: Multiple words + // Handle first word + int before = localWords[startWordIndex]; + localWords[startWordIndex] &= ~firstWordMask; + modified = localWords[startWordIndex] != before; + + // Handle intermediate words, if any + for (int i = startWordIndex + 1; i < endWordIndex; i++) { + modified = modified || localWords[i] != 0; + localWords[i] = 0; + } + + // Handle last word + before = localWords[endWordIndex]; + localWords[endWordIndex] &= ~lastWordMask; + modified = modified || localWords[endWordIndex] != before; + } + if (modified) { + fixFirstEmptyWord(); + size = -1; + } + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(int e) + { + int wordIndex = wordIndex(e); + expandTo(wordIndex); + int mask = (1 << e); + words[wordIndex] ^= mask; + fixFirstEmptyWord(); + if (size >= 0) { + if ((words[wordIndex] & mask) == 0) { + size--; + } else { + size++; + } + } + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(IntSet o) + { + // empty set cases + if (this.isEmpty() && o.isEmpty()) { + return 0; + } + if (this.isEmpty()) { + return -1; + } + if (o.isEmpty()) { + return 1; + } + + final FastSet other = convert(o); + final int[] localWords = words; // faster + final int[] localOtherWords = other.words; // faster + + if (firstEmptyWord > other.firstEmptyWord) { + return 1; + } + if (firstEmptyWord < other.firstEmptyWord) { + return -1; + } + for (int i = firstEmptyWord - 1; i >= 0; i--) { + long w1 = localWords[i] & 0xFFFFFFFFL; + long w2 = localOtherWords[i] & 0xFFFFFFFFL; + int res = w1 < w2 ? -1 : (w1 > w2 ? 1 : 0); + if (res != 0) { + return res; + } + } + return 0; + } + + /** + * {@inheritDoc} + */ + @Override + public int get(int index) + { + if (index < 0) { + throw new IndexOutOfBoundsException(); + } + + int count = 0; + final int[] localWords = words; // faster + for (int j = 0; j < firstEmptyWord; j++) { + int w = localWords[j]; + int current = BitCount.count(w); + if (index < count + current) { + int bit = -1; + for (int skip = index - count; skip >= 0; skip--) { + bit = Integer.numberOfTrailingZeros(w & (ALL_ONES_WORD << (bit + 1))); + } + return multiplyByWordSize(j) + bit; + } + count += current; + } + throw new NoSuchElementException(); + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(int e) + { + if (e < 0) { + throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); + } + if (isEmpty()) { + return -1; + } + + int index = wordIndex(e); + if (index >= firstEmptyWord || (words[index] & (1 << e)) == 0) { + return -1; + } + int count = BitCount.count(words, index); + count += BitCount.count(words[index] & ~(ALL_ONES_WORD << e)); + return count; + + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet intersection(IntSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return empty(); + } + if (other == this) { + return clone(); + } + + final FastSet o = convert(other); + FastSet res = new FastSet(Math.min(firstEmptyWord, o.firstEmptyWord)); + res.firstEmptyWord = res.words.length; + + final int[] localWords = words; // faster + final int[] localOtherWords = o.words; // faster + final int[] localResWords = res.words; // faster + + for (int i = 0; i < res.firstEmptyWord; i++) { + localResWords[i] = localWords[i] & localOtherWords[i]; + } + res.fixFirstEmptyWord(); + res.size = -1; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet union(IntSet other) + { + if (other == null || other.isEmpty() || this == other) { + return clone(); + } + + final FastSet o = convert(other); + if (isEmpty()) { + return o.clone(); + } + + FastSet res = new FastSet(Math.max(firstEmptyWord, o.firstEmptyWord)); + res.firstEmptyWord = res.words.length; + final int wordsInCommon = Math.min(firstEmptyWord, o.firstEmptyWord); + + final int[] localWords = words; // faster + final int[] localOtherWords = o.words; // faster + final int[] localResWords = res.words; // faster + + for (int i = 0; i < wordsInCommon; i++) { + localResWords[i] = localWords[i] | localOtherWords[i]; + } + + if (wordsInCommon < firstEmptyWord) { + System.arraycopy( + localWords, wordsInCommon, localResWords, wordsInCommon, + res.firstEmptyWord - wordsInCommon + ); + } + if (wordsInCommon < o.firstEmptyWord) { + System.arraycopy( + localOtherWords, wordsInCommon, localResWords, wordsInCommon, + res.firstEmptyWord - wordsInCommon + ); + } + res.size = -1; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet difference(IntSet other) + { + if (other == null || other.isEmpty()) { + return clone(); + } + if (other == this || isEmpty()) { + return empty(); + } + + final FastSet o = convert(other); + FastSet res = new FastSet(firstEmptyWord); + res.firstEmptyWord = firstEmptyWord; + + final int[] localWords = words; // faster + final int[] localOtherWords = o.words; // faster + final int[] localResWords = res.words; // faster + + int i = 0; + final int m = Math.min(firstEmptyWord, o.firstEmptyWord); + for (; i < m; i++) { + localResWords[i] = localWords[i] & ~localOtherWords[i]; + } + if (i < firstEmptyWord) { + System.arraycopy(localWords, i, localResWords, i, firstEmptyWord - i); + } else { + res.fixFirstEmptyWord(); + } + res.size = -1; + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public FastSet symmetricDifference(IntSet other) + { + if (other == null || other.isEmpty()) { + return clone(); + } + if (other == this) { + return empty(); + } + + final FastSet o = convert(other); + if (isEmpty()) { + return o.clone(); + } + + FastSet res = new FastSet(Math.max(firstEmptyWord, o.firstEmptyWord)); + res.firstEmptyWord = res.words.length; + final int wordsInCommon = Math.min(firstEmptyWord, o.firstEmptyWord); + + final int[] localWords = words; // faster + final int[] localOtherWords = o.words; // faster + final int[] localResWords = res.words; // faster + + for (int i = 0; i < wordsInCommon; i++) { + localResWords[i] = localWords[i] ^ localOtherWords[i]; + } + + if (wordsInCommon < firstEmptyWord) { + System.arraycopy( + localWords, wordsInCommon, localResWords, wordsInCommon, + res.firstEmptyWord - wordsInCommon + ); + } else if (wordsInCommon < o.firstEmptyWord) { + System.arraycopy( + localOtherWords, wordsInCommon, localResWords, wordsInCommon, + res.firstEmptyWord - wordsInCommon + ); + } else { + res.fixFirstEmptyWord(); + } + res.size = -1; + return res; + } + + /** + * Save the state of the {@link ConciseSet}instance to a stream + */ + private void writeObject(ObjectOutputStream s) throws IOException + { + assert words != null; + if (firstEmptyWord < words.length) { + words = Arrays.copyOf(words, firstEmptyWord); + } + s.defaultWriteObject(); + } + + /** + * Reconstruct the {@link ConciseSet} instance from a stream + */ + private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException + { + s.defaultReadObject(); + firstEmptyWord = words.length; + size = -1; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + final StringBuilder s = new StringBuilder("INTERNAL REPRESENTATION:\n"); + final Formatter f = new Formatter(s, Locale.ENGLISH); + + if (isEmpty()) { + return s.append("null\n").toString(); + } + + // elements + f.format("Elements: %s\n", toString()); + + // raw representation of words + for (int i = 0; i < firstEmptyWord; i++) { + f.format( + "words[%d] = %s (from %d to %d)\n", + Integer.valueOf(i), + toBinaryString(words[i]), + Integer.valueOf(multiplyByWordSize(i)), + Integer.valueOf(multiplyByWordSize(i + 1) - 1) + ); + } + + // object attributes + f.format("wordsInUse: %d\n", firstEmptyWord); + f.format("size: %s\n", (size == -1 ? "invalid" : Integer.toString(size))); + f.format("words.length: %d\n", words.length); + + // compression + f.format("bitmap compression: %.2f%%\n", 100D * bitmapCompressionRatio()); + f.format("collection compression: %.2f%%\n", 100D * collectionCompressionRatio()); + + return s.toString(); + } + + /** + * Iterates over bits + *

+ * This iterator allows for modifications during the iteration, that is it + * is possible to add/remove elements through {@link #add(int)}, + * {@link #remove(int)}, {@link #addAll(IntSet)}, {@link #removeAll(IntSet)}, {@link #retainAll(IntSet)}, etc.. In this case, + * {@link IntIterator#next()} returns the first integral greater than the + * last visited one. + */ + private class BitIterator implements IntIterator + { + private int nextIndex; + private int nextBit; + private int last; + + /** + * identify the first bit + */ + private BitIterator() + { + nextIndex = 0; + if (isEmpty()) { + return; + } + + last = -1; // unused! + + // find the first non-empty word + while (words[nextIndex] == 0) { + nextIndex++; + } + + // find the first set bit + nextBit = Integer.numberOfTrailingZeros(words[nextIndex]); + } + + /** + * find the first set bit after nextIndex + nextBit + */ + void prepareNext() + { + // find the next set bit within the current word + int w = words[nextIndex]; + while ((++nextBit < WORD_SIZE)) { + if ((w & (1 << nextBit)) != 0) { + return; + } + } + + // find the first non-empty word + do { + if (++nextIndex == firstEmptyWord) { + return; + } + } while ((w = words[nextIndex]) == 0); + nextBit = Integer.numberOfTrailingZeros(w); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean hasNext() + { + return nextIndex < firstEmptyWord; + } + + /** + * {@inheritDoc} + */ + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + last = multiplyByWordSize(nextIndex) + nextBit; + prepareNext(); + return last; + } + + /** + * {@inheritDoc} + */ + @Override + public void skipAllBefore(int element) + { + if (element <= 0 || element <= last) { + return; + } + + // identify where the element is + int newNextIndex = wordIndexNoCheck(element); + int newNextBit = element & (WORD_SIZE - 1); + if (newNextIndex < nextIndex || (newNextIndex == nextIndex && newNextBit <= nextBit)) { + return; + } + + // "element" is the next item to return, unless it does not exist + nextIndex = newNextIndex; + if (nextIndex >= firstEmptyWord) { + return; + } + nextBit = newNextBit; + if ((words[nextIndex] & (1 << nextBit)) == 0) { + prepareNext(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void remove() + { + FastSet.this.remove(last); + } + + @Override + public IntIterator clone() + { + BitIterator retVal = new BitIterator(); + retVal.nextIndex = nextIndex; + retVal.nextBit = nextBit; + retVal.last = last; + return retVal; + } + } + + /** + * Iterates over bits in reverse order + *

+ * This iterator allows for modifications during the iteration, that is it + * is possible to add/remove elements through {@link #add(int)}, + * {@link #remove(int)}, {@link #addAll(IntSet)}, {@link #removeAll(IntSet)}, {@link #retainAll(IntSet)}, etc.. In this case, + * {@link IntIterator#next()} returns the first integral greater than the + * last visited one. + */ + private class ReverseBitIterator implements IntIterator + { + private int nextIndex; + private int nextBit; + private int last; + + /** + * identify the first bit + */ + private ReverseBitIterator() + { + nextIndex = firstEmptyWord - 1; + if (isEmpty()) { + return; + } + + last = Integer.MAX_VALUE; // unused! + nextBit = WORD_SIZE - Integer.numberOfLeadingZeros(words[nextIndex]) - 1; + } + + /** + * find the first set bit after nextIndex + nextBit + */ + void prepareNext() + { + // find the next set bit within the current word + int w = words[nextIndex]; + while ((--nextBit >= 0)) { + if ((w & (1 << nextBit)) != 0) { + return; + } + } + + // find the first non-empty word + do { + if (--nextIndex == -1) { + return; + } + } while ((w = words[nextIndex]) == 0); + nextBit = WORD_SIZE - Integer.numberOfLeadingZeros(w) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean hasNext() + { + return nextIndex >= 0; + } + + /** + * {@inheritDoc} + */ + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + last = multiplyByWordSize(nextIndex) + nextBit; + prepareNext(); + return last; + } + + /** + * {@inheritDoc} + */ + @Override + public void skipAllBefore(int element) + { + if (element < 0) { + nextIndex = -1; + return; + } + if (element >= last) { + return; + } + + // identify where the element is + int newNextIndex = wordIndexNoCheck(element); + int newNextBit = element & (WORD_SIZE - 1); + if (newNextIndex > nextIndex || (newNextIndex == nextIndex && newNextBit >= nextBit)) { + return; + } + + // "element" is the next item to return, unless it does not exist + nextIndex = newNextIndex; + nextBit = newNextBit; + if ((words[nextIndex] & (1 << nextBit)) == 0) { + prepareNext(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void remove() + { + FastSet.this.remove(last); + } + + @Override + public IntIterator clone() + { + BitIterator retVal = new BitIterator(); + retVal.nextIndex = nextIndex; + retVal.nextBit = nextBit; + retVal.last = last; + return retVal; + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/HashIntSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/HashIntSet.java new file mode 100755 index 00000000000..e81434a890b --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/HashIntSet.java @@ -0,0 +1,1012 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.intset; + +// update CompactIdentityHashSet.java, UniqueSet.java and +// SoftHashMapIndex.java accordingly. + +import io.druid.extendedset.utilities.IntHashCode; + +import java.util.Arrays; +import java.util.Collection; +import java.util.ConcurrentModificationException; +import java.util.NoSuchElementException; + +/** + * Implements a fast hash-set. + *

+ * Inspired by http://code.google.com/p/ontopia/source/browse/trunk/ontopia/src/java/net/ + * ontopia/utils/CompactHashSet.java + * + * @author Alessandro Colantonio + * @version $Id: HashIntSet.java 156 2011-09-01 00:13:57Z cocciasik $ + */ +public class HashIntSet extends AbstractIntSet +{ + protected final static int INITIAL_SIZE = 3; + protected final static double LOAD_FACTOR = 0.75D; + + /** + * empty cell + */ + protected final static int EMPTY = -1; + + /** + * When an object is deleted this object is put into the hashtable in its + * place, so that other objects with the same key (collisions) further down + * the hashtable are not lost after we delete an object in the collision + * chain. + */ + protected final static int REMOVED = -2; + + /** + * number of elements + */ + protected int size; + + /** + * This is the number of empty cells. It's not necessarily the same as + * objects.length - elements, because some cells may contain REMOVED. + */ + protected int freecells; + + /** + * cells + */ + protected int[] cells; + + /** + * concurrent modification during iteration + */ + protected int modCount; + + /** + * Constructs a new, empty set. + */ + public HashIntSet() + { + this(INITIAL_SIZE); + } + + /** + * Constructs a new, empty set. + * + * @param initialSize + */ + public HashIntSet(int initialSize) + { + if (initialSize <= 0) { + throw new IllegalArgumentException(); + } + cells = new int[initialSize]; + modCount = 0; + clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator iterator() + { + return new SortedIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntIterator descendingIterator() + { + return new DescendingSortedIterator(); + } + + /** + * Similar to {@link #iterator()}, but with no particular order + * + * @return iterator with no sorting + */ + public IntIterator unsortedIterator() + { + return new UnsortedIterator(); + } + + /** + * Returns the number of elements in this set (its cardinality). + */ + @Override + public int size() + { + return size; + } + + /** + * Returns true if this set contains no elements. + */ + @Override + public boolean isEmpty() + { + return size == 0; + } + + /** + * Compute the index of the element + * + * @param o element to search + * + * @return index of the element in {@link #cells} + */ + private final int toIndex(int o) + { + return (o & 0x7FFFFFFF) % cells.length; + } + + /** + * Find position of the integer in {@link #cells}. If not found, returns the + * first empty cell. + * + * @param element element to search + * + * @return if returned value >=0, it returns the index of the + * element; if returned value <0, the index of the + * first empty cell is -(returned value - 1) + */ + private int findElementOrEmpty(int element) + { + assert element >= 0; + int index = toIndex(IntHashCode.hashCode(element)); + int offset = 1; + + while (cells[index] != EMPTY) { + // element found! + if (cells[index] == element) { + return index; + } + + // compute the next index to check + index = toIndex(index + offset); + offset <<= 1; + offset++; + if (offset < 0) { + offset = 2; + } + } + + // element not found! + return -(index + 1); + } + + /** + * Find position of the integer in {@link #cells}. If not found, returns the + * first removed cell. + * + * @param element element to search + * + * @return if returned value >=0, it returns the index of the + * element; if returned value <0, the index of the + * first empty cell is -(returned value - 1) + */ + private int findElementOrRemoved(int element) + { + assert element >= 0; + int index = toIndex(IntHashCode.hashCode(element)); + int offset = 1; + int removed = -1; + + while (cells[index] != EMPTY) { + // element found! + if (cells[index] == element) { + return index; + } + + // remember the last removed cell if we don't find the element + if (cells[index] == REMOVED) { + removed = index; + } + + index = toIndex(index + offset); + offset <<= 1; + offset++; + if (offset < 0) { + offset = 2; + } + } + if (removed >= 0) { + return -(removed + 1); + } + return index; + } + + /** + * Returns true if this set contains the specified element. + * + * @param element element whose presence in this set is to be tested. + * + * @return true if this set contains the specified element. + */ + @Override + public boolean contains(int element) + { + if (element < 0) { + throw new IndexOutOfBoundsException("element < 0: " + element); + } + if (isEmpty()) { + return false; + } + return findElementOrEmpty(element) >= 0; + } + + /** + * Adds the specified element to this set if it is not already present. + * + * @param element element to be added to this set. + * + * @return true if the set did not already contain the specified + * element. + */ + @Override + public boolean add(int element) + { + if (element < 0) { + throw new IndexOutOfBoundsException("element < 0: " + element); + } + int index = findElementOrRemoved(element); + if (index >= 0) { + if (cells[index] == element) { + return false; + } + freecells--; + } else { + index = -(index + 1); + } + + modCount++; + size++; + + // set the integer + cells[index] = element; + + // do we need to rehash? + if (1 - ((double) freecells / cells.length) > LOAD_FACTOR) { + rehash(); + } + return true; + } + + /** + * Removes the specified element from the set. + */ + @Override + public boolean remove(int element) + { + if (element < 0) { + throw new IndexOutOfBoundsException("element < 0: " + element); + } + int index = findElementOrEmpty(element); + if (index < 0) { + return false; + } + + cells[index] = REMOVED; + modCount++; + size--; + return true; + } + + /** + * Removes all of the elements from this set. + */ + @Override + public void clear() + { + size = 0; + Arrays.fill(cells, EMPTY); + freecells = cells.length; + modCount++; + } + + /** + * Figures out correct size for rehashed set, then does the rehash. + */ + protected void rehash() + { + // do we need to increase capacity, or are there so many + // deleted objects hanging around that rehashing to the same + // size is sufficient? if 5% (arbitrarily chosen number) of + // cells can be freed up by a rehash, we do it. + + int gargagecells = cells.length - (size + freecells); + if ((double) gargagecells / cells.length > 0.05D) + // rehash with same size + { + rehash(cells.length); + } else + // rehash with increased capacity + { + rehash((cells.length << 1) + 1); + } + } + + /** + * Rehashes to a bigger size. + */ + protected void rehash(int newCapacity) + { + HashIntSet rehashed = new HashIntSet(newCapacity); + @SuppressWarnings("hiding") + int[] cells = rehashed.cells; + for (int element : this.cells) { + if (element < 0) + // removed or empty + { + continue; + } + + // add the element + cells[-(rehashed.findElementOrEmpty(element) + 1)] = element; + } + this.cells = cells; + freecells = newCapacity - size; + modCount++; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + boolean res = false; + while (itr.hasNext()) { + res |= add(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + boolean res = false; + while (itr.hasNext()) { + res |= remove(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + if (c == null || c.isEmpty()) { + return false; + } + boolean res = false; + for (int i = 0; i < cells.length; i++) { + if (cells[i] >= 0 && !c.contains(cells[i])) { + cells[i] = REMOVED; + res = true; + size--; + } + } + if (res) { + modCount++; + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet clone() + { + HashIntSet cloned = new HashIntSet(cells.length); + System.arraycopy(cells, 0, cloned.cells, 0, cells.length); + cloned.freecells = freecells; + cloned.size = size; + cloned.modCount = 0; + return cloned; + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + if (isEmpty()) { + return 0D; + } + return cells.length / Math.ceil(last() / 32D); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return isEmpty() ? 0D : (double) cells.length / size(); + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet complemented() + { + return (HashIntSet) super.complemented(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + boolean res = true; + while (res && itr.hasNext()) { + res &= contains(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet c) + { + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + boolean res = true; + while (res && itr.hasNext()) { + if (contains(itr.next())) { + return true; + } + } + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet c, int minElements) + { + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + while (minElements > 0 && itr.hasNext()) { + if (contains(itr.next())) { + minElements--; + } + } + return minElements == 0; + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet convert(int... a) + { + HashIntSet res = new HashIntSet((int) (a.length / LOAD_FACTOR) + 1); + for (int e : a) { + res.add(e); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet convert(Collection c) + { + HashIntSet res = new HashIntSet((int) (c.size() / LOAD_FACTOR) + 1); + for (int e : c) { + res.add(e); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return "size: " + size + ", freecells: " + freecells + ", " + + Arrays.toString(cells); + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet symmetricDifference(IntSet c) + { + HashIntSet res = clone(); + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + while (itr.hasNext()) { + res.flip(itr.next()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet union(IntSet other) + { + return (HashIntSet) super.union(other); + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet difference(IntSet other) + { + return (HashIntSet) super.difference(other); + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet intersection(IntSet other) + { + return (HashIntSet) super.intersection(other); + } + + /** + * {@inheritDoc} + */ + @Override + public HashIntSet empty() + { + return new HashIntSet(); + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(int element) + { + if (element < 0) { + throw new IndexOutOfBoundsException("element < 0: " + element); + } + modCount++; + int index = findElementOrRemoved(element); + if (index >= 0) { + // REMOVE + if (cells[index] == element) { + cells[index] = REMOVED; + size--; + return; + } + freecells--; + } else { + index = -(index + 1); + } + + // ADD + cells[index] = element; + size++; + + // do we need to rehash? + if (1 - ((double) freecells / cells.length) > LOAD_FACTOR) { + rehash(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public int get(int i) + { + return toArray()[i]; + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(int e) + { + if (e < 0) { + throw new IllegalArgumentException("positive integer expected: " + Integer.toString(e)); + } + return Arrays.binarySearch(toArray(), e); + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet c) + { + int res = 0; + IntIterator itr; + if (c instanceof HashIntSet) { + itr = ((HashIntSet) c).unsortedIterator(); + } else { + itr = c.iterator(); + } + while (itr.hasNext()) { + if (contains(itr.next())) { + res++; + } + } + return res; + + } + + /** + * {@inheritDoc} + */ + @Override + public int last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + int max = 0; + for (int element : cells) { + if (max < element) { + max = element; + } + } + return max; + } + + /** + * {@inheritDoc} + */ + @Override + public int first() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + int min = Integer.MAX_VALUE; + for (int element : cells) { + if (element >= 0 && min > element) { + min = element; + } + } + return min; + } + + /** + * {@inheritDoc} + */ + @Override + public int[] toArray(int[] a) + { + if (a.length < size) { + throw new IllegalArgumentException(); + } + if (isEmpty()) { + return a; + } + int i = 0; + for (int element : this.cells) { + if (element < 0) + // removed or empty + { + continue; + } + + // copy the element + a[i++] = element; + } + Arrays.sort(a, 0, size); + return a; + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return Arrays.toString(toArray()); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + if (isEmpty()) { + return 0; + } + int h = 1; + for (int e : cells) { + if (e >= 0) { + h ^= IntHashCode.hashCode(e); + } + } + return h; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof HashIntSet)) { + return super.equals(obj); + } + final HashIntSet other = (HashIntSet) obj; + if (size != other.size) { + return false; + } + for (int e : other.cells) { + if (e >= 0 && !contains(e)) { + return false; + } + } + return true; + } + + /** + * Iterates over the hashset, with no sorting + */ + private class UnsortedIterator implements IntIterator + { + private int nextIndex = 0; + private int current = -1; + private int expectedModCount = modCount; + + public UnsortedIterator() + { + nextIndex = 0; + skipEmpty(); + expectedModCount = modCount; + } + + void skipEmpty() + { + while (nextIndex < cells.length + && (cells[nextIndex] == EMPTY || cells[nextIndex] == REMOVED)) { + nextIndex++; + } + } + + @Override + public boolean hasNext() + { + return nextIndex < cells.length; + } + + @Override + public int next() + { + if (modCount != expectedModCount) { + throw new ConcurrentModificationException(); + } + if (nextIndex >= cells.length) { + throw new NoSuchElementException(); + } + + current = nextIndex; + nextIndex++; + skipEmpty(); + return cells[current]; + } + + @Override + public void remove() + { + if (modCount != expectedModCount) { + throw new ConcurrentModificationException(); + } + if (current < 0) { + throw new IllegalStateException(); + } + // delete object + cells[current] = REMOVED; + size--; + modCount++; + expectedModCount = modCount; // this is expected! + current = -1; + } + + @Override + public void skipAllBefore(int element) + { + throw new UnsupportedOperationException(); + } + + @Override + public IntIterator clone() + { + UnsortedIterator retVal = new UnsortedIterator(); + retVal.nextIndex = nextIndex; + retVal.current = current; + retVal.expectedModCount = expectedModCount; + return retVal; + } + } + + /** + * Iterates over the hashset, with no sorting + */ + private class SortedIterator implements IntIterator + { + int[] elements = toArray(); + int next = 0; + + @Override + public boolean hasNext() + { + return next < size; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return elements[next++]; + } + + @Override + public void remove() + { + if (elements[next - 1] == REMOVED) { + throw new IllegalStateException(); + } + HashIntSet.this.remove(elements[next - 1]); + elements[next - 1] = REMOVED; + } + + @Override + public void skipAllBefore(int element) + { + if (element <= elements[next]) { + return; + } + next = Arrays.binarySearch(elements, next + 1, size, element); + if (next < 0) { + next = -(next + 1); + } + } + + @Override + public IntIterator clone() + { + SortedIterator retVal = new SortedIterator(); + retVal.next = next; + retVal.elements = elements.clone(); + return retVal; + } + } + + /** + * Iterates over the hashset, with no sorting + */ + private class DescendingSortedIterator implements IntIterator + { + int[] elements = toArray(); + int next = size - 1; + + @Override + public boolean hasNext() + { + return next >= 0; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return elements[next--]; + } + + @Override + public void remove() + { + if (elements[next + 1] == REMOVED) { + throw new IllegalStateException(); + } + HashIntSet.this.remove(elements[next + 1]); + elements[next + 1] = REMOVED; + } + + @Override + public void skipAllBefore(int element) + { + if (element >= elements[next]) { + return; + } + next = Arrays.binarySearch(elements, 0, next, element); + if (next < 0) { + next = -(next + 1) - 1; + } + } + + @Override + public IntIterator clone() + { + DescendingSortedIterator retVal = new DescendingSortedIterator(); + retVal.elements = elements.clone(); + retVal.next = next; + return retVal; + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java new file mode 100755 index 00000000000..6dbf8bb1d0d --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/ImmutableConciseSet.java @@ -0,0 +1,1157 @@ +/* +* Copyright 2012 Metamarkets Group Inc. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package io.druid.extendedset.intset; + + +import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; +import com.google.common.collect.MinMaxPriorityQueue; +import com.google.common.collect.UnmodifiableIterator; +import com.google.common.primitives.Ints; +import io.druid.extendedset.utilities.IntList; + +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +public class ImmutableConciseSet +{ + private final static int CHUNK_SIZE = 10000; + private final IntBuffer words; + private final int lastWordIndex; + private final int size; + + public ImmutableConciseSet() + { + this.words = null; + this.lastWordIndex = -1; + this.size = 0; + } + + public ImmutableConciseSet(ByteBuffer byteBuffer) + { + this.words = byteBuffer.asIntBuffer(); + this.lastWordIndex = words.capacity() - 1; + this.size = calcSize(); + } + + public ImmutableConciseSet(IntBuffer buffer) + { + this.words = buffer; + this.lastWordIndex = (words == null || buffer.capacity() == 0) ? -1 : words.capacity() - 1; + this.size = calcSize(); + } + + public static ImmutableConciseSet newImmutableFromMutable(ConciseSet conciseSet) + { + if (conciseSet == null || conciseSet.isEmpty()) { + return new ImmutableConciseSet(); + } + return new ImmutableConciseSet(IntBuffer.wrap(conciseSet.getWords())); + } + + public static int compareInts(int x, int y) + { + return (x < y) ? -1 : ((x == y) ? 0 : 1); + } + + public static ImmutableConciseSet union(ImmutableConciseSet... sets) + { + return union(Arrays.asList(sets)); + } + + public static ImmutableConciseSet union(Iterable sets) + { + return union(sets.iterator()); + } + + public static ImmutableConciseSet union(Iterator sets) + { + ImmutableConciseSet partialResults = doUnion(Iterators.limit(sets, CHUNK_SIZE)); + while (sets.hasNext()) { + final UnmodifiableIterator partialIter = Iterators.singletonIterator(partialResults); + partialResults = doUnion(Iterators.concat(partialIter, Iterators.limit(sets, CHUNK_SIZE))); + } + return partialResults; + } + + public static ImmutableConciseSet intersection(ImmutableConciseSet... sets) + { + return intersection(Arrays.asList(sets)); + } + + public static ImmutableConciseSet intersection(Iterable sets) + { + return intersection(sets.iterator()); + } + + public static ImmutableConciseSet intersection(Iterator sets) + { + ImmutableConciseSet partialResults = doIntersection(Iterators.limit(sets, CHUNK_SIZE)); + while (sets.hasNext()) { + final UnmodifiableIterator partialIter = Iterators.singletonIterator(partialResults); + partialResults = doIntersection( + Iterators.concat(Iterators.limit(sets, CHUNK_SIZE), partialIter) + ); + } + return partialResults; + } + + public static ImmutableConciseSet complement(ImmutableConciseSet set) + { + return doComplement(set); + } + + public static ImmutableConciseSet complement(ImmutableConciseSet set, int length) + { + if (length <= 0) { + return new ImmutableConciseSet(); + } + + // special case when the set is empty and we need a concise set of ones + if (set == null || set.isEmpty()) { + final int leftoverBits = length % 31; + final int onesBlocks = length / 31; + final int[] words; + if (onesBlocks > 0) { + if (leftoverBits > 0) { + words = new int[]{ + ConciseSetUtils.SEQUENCE_BIT | (onesBlocks - 1), + ConciseSetUtils.onesUntil(leftoverBits) + }; + } else { + words = new int[]{ + ConciseSetUtils.SEQUENCE_BIT | (onesBlocks - 1) + }; + } + } else { + if (leftoverBits > 0) { + words = new int[]{ConciseSetUtils.onesUntil(leftoverBits)}; + } else { + words = new int[]{}; + } + } + ConciseSet newSet = new ConciseSet(words, false); + return ImmutableConciseSet.newImmutableFromMutable(newSet); + } + + IntList retVal = new IntList(); + int endIndex = length - 1; + + int wordsWalked = 0; + int last = 0; + + WordIterator iter = set.newWordIterator(); + + while (iter.hasNext()) { + int word = iter.next(); + wordsWalked = iter.wordsWalked; + if (ConciseSetUtils.isLiteral(word)) { + retVal.add(ConciseSetUtils.ALL_ZEROS_LITERAL | ~word); + } else { + retVal.add(ConciseSetUtils.SEQUENCE_BIT ^ word); + } + } + + last = set.getLast(); + + int distFromLastWordBoundary = ConciseSetUtils.maxLiteralLengthModulus(last); + int distToNextWordBoundary = ConciseSetUtils.MAX_LITERAL_LENGTH - distFromLastWordBoundary - 1; + last = (last < 0) ? 0 : last + distToNextWordBoundary; + + int diff = endIndex - last; + // only append a new literal when the end index is beyond the current word + if (diff > 0) { + // first check if the difference can be represented in 31 bits + if (diff <= ConciseSetUtils.MAX_LITERAL_LENGTH) { + retVal.add(ConciseSetUtils.ALL_ONES_LITERAL); + } else { + // create a fill from last set bit to endIndex for number of 31 bit blocks minus one + int endIndexWordCount = ConciseSetUtils.maxLiteralLengthDivision(endIndex); + retVal.add(ConciseSetUtils.SEQUENCE_BIT | (endIndexWordCount - wordsWalked - 1)); + retVal.add(ConciseSetUtils.ALL_ONES_LITERAL); + } + } + + // clear bits after last set value + int lastWord = retVal.get(retVal.length() - 1); + if (ConciseSetUtils.isLiteral(lastWord)) { + lastWord = ConciseSetUtils.clearBitsAfterInLastWord( + lastWord, + ConciseSetUtils.maxLiteralLengthModulus(endIndex) + ); + } + + retVal.set(retVal.length() - 1, lastWord); + trimZeros(retVal); + + if (retVal.isEmpty()) { + return new ImmutableConciseSet(); + } + return compact(new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray()))); + } + + public static ImmutableConciseSet compact(ImmutableConciseSet set) + { + IntList retVal = new IntList(); + WordIterator itr = set.newWordIterator(); + while (itr.hasNext()) { + addAndCompact(retVal, itr.next()); + } + return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); + } + + private static void addAndCompact(IntList set, int wordToAdd) + { + int length = set.length(); + if (set.isEmpty()) { + set.add(wordToAdd); + return; + } + + int last = set.get(length - 1); + + int newWord = 0; + if (ConciseSetUtils.isAllOnesLiteral(last)) { + if (ConciseSetUtils.isAllOnesLiteral(wordToAdd)) { + newWord = 0x40000001; + } else if (ConciseSetUtils.isOneSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = wordToAdd + 1; + } + } else if (ConciseSetUtils.isOneSequence(last)) { + if (ConciseSetUtils.isAllOnesLiteral(wordToAdd)) { + newWord = last + 1; + } else if (ConciseSetUtils.isOneSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = last + ConciseSetUtils.getSequenceNumWords(wordToAdd); + } + } else if (ConciseSetUtils.isAllZerosLiteral(last)) { + if (ConciseSetUtils.isAllZerosLiteral(wordToAdd)) { + newWord = 0x00000001; + } else if (ConciseSetUtils.isZeroSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = wordToAdd + 1; + } + } else if (ConciseSetUtils.isZeroSequence(last)) { + if (ConciseSetUtils.isAllZerosLiteral(wordToAdd)) { + newWord = last + 1; + } else if (ConciseSetUtils.isZeroSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = last + ConciseSetUtils.getSequenceNumWords(wordToAdd); + } + } else if (ConciseSetUtils.isLiteralWithSingleOneBit(last)) { + int position = Integer.numberOfTrailingZeros(last) + 1; + if (ConciseSetUtils.isAllZerosLiteral(wordToAdd)) { + newWord = 0x00000001 | (position << 25); + } else if (ConciseSetUtils.isZeroSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = (wordToAdd + 1) | (position << 25); + } + } else if (ConciseSetUtils.isLiteralWithSingleZeroBit(last)) { + int position = Integer.numberOfTrailingZeros(~last) + 1; + if (ConciseSetUtils.isAllOnesLiteral(wordToAdd)) { + newWord = 0x40000001 | (position << 25); + } else if (ConciseSetUtils.isOneSequence(wordToAdd) && ConciseSetUtils.getFlippedBit(wordToAdd) == -1) { + newWord = (wordToAdd + 1) | (position << 25); + } + } + + if (newWord != 0) { + set.set(length - 1, newWord); + } else { + set.add(wordToAdd); + } + } + + private static ImmutableConciseSet doUnion(Iterator sets) + { + IntList retVal = new IntList(); + + // lhs = current word position, rhs = the iterator + // Comparison is first by index, then one fills > literals > zero fills + // one fills are sorted by length (longer one fills have priority) + // similarily, shorter zero fills have priority + MinMaxPriorityQueue theQ = MinMaxPriorityQueue.orderedBy( + new Comparator() + { + @Override + public int compare(WordHolder h1, WordHolder h2) + { + int w1 = h1.getWord(); + int w2 = h2.getWord(); + int s1 = h1.getIterator().startIndex; + int s2 = h2.getIterator().startIndex; + + if (s1 != s2) { + return compareInts(s1, s2); + } + + if (ConciseSetUtils.isOneSequence(w1)) { + if (ConciseSetUtils.isOneSequence(w2)) { + return -compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); + } + return -1; + } else if (ConciseSetUtils.isLiteral(w1)) { + if (ConciseSetUtils.isOneSequence(w2)) { + return 1; + } else if (ConciseSetUtils.isLiteral(w2)) { + return 0; + } + return -1; + } else { + if (!ConciseSetUtils.isZeroSequence(w2)) { + return 1; + } + return compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); + } + } + } + ).create(); + + // populate priority queue + while (sets.hasNext()) { + ImmutableConciseSet set = sets.next(); + + if (set != null && !set.isEmpty()) { + WordIterator itr = set.newWordIterator(); + theQ.add(new WordHolder(itr.next(), itr)); + } + } + + int currIndex = 0; + + while (!theQ.isEmpty()) { + // create a temp list to hold everything that will get pushed back into the priority queue after each run + List wordsToAdd = Lists.newArrayList(); + + // grab the top element from the priority queue + WordHolder curr = theQ.poll(); + int word = curr.getWord(); + WordIterator itr = curr.getIterator(); + + // if the next word in the queue starts at a different point than where we ended off we need to create a zero gap + // to fill the space + if (currIndex < itr.startIndex) { + addAndCompact(retVal, itr.startIndex - currIndex - 1); + currIndex = itr.startIndex; + } + + if (ConciseSetUtils.isOneSequence(word)) { + // extract a literal from the flip bits of the one sequence + int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word); + + // advance everything past the longest ones sequence + WordHolder nextVal = theQ.peek(); + while (nextVal != null && + nextVal.getIterator().startIndex < itr.wordsWalked) { + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + if (i.startIndex == itr.startIndex) { + // if a literal was created from a flip bit, OR it with other literals or literals from flip bits in the same + // position + if (ConciseSetUtils.isOneSequence(w)) { + flipBitLiteral |= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); + } else if (ConciseSetUtils.isLiteral(w)) { + flipBitLiteral |= w; + } else { + flipBitLiteral |= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); + } + } + + i.advanceTo(itr.wordsWalked); + if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } + nextVal = theQ.peek(); + } + + // advance longest one literal forward and push result back to priority queue + // if a flip bit is still needed, put it in the correct position + int newWord = word & 0xC1FFFFFF; + if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { + flipBitLiteral ^= ConciseSetUtils.ALL_ONES_LITERAL; + int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1; + newWord |= (position << 25); + } + addAndCompact(retVal, newWord); + currIndex = itr.wordsWalked; + + if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } + } else if (ConciseSetUtils.isLiteral(word)) { + // advance all other literals + WordHolder nextVal = theQ.peek(); + while (nextVal != null && + nextVal.getIterator().startIndex == itr.startIndex) { + + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + // if we still have zero fills with flipped bits, OR them here + if (ConciseSetUtils.isLiteral(w)) { + word |= w; + } else { + int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); + if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { + word |= flipBitLiteral; + i.advanceTo(itr.wordsWalked); + } + } + + if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } + + nextVal = theQ.peek(); + } + + // advance the set with the current literal forward and push result back to priority queue + addAndCompact(retVal, word); + currIndex++; + + if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } + } else { // zero fills + int flipBitLiteral; + WordHolder nextVal = theQ.peek(); + + while (nextVal != null && + nextVal.getIterator().startIndex == itr.startIndex) { + // check if literal can be created flip bits of other zero sequences + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); + if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { + wordsToAdd.add(new WordHolder(flipBitLiteral, i)); + } else if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } + nextVal = theQ.peek(); + } + + // check if a literal needs to be created from the flipped bits of this sequence + flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word); + if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { + wordsToAdd.add(new WordHolder(flipBitLiteral, itr)); + } else if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } + } + + theQ.addAll(wordsToAdd); + } + + if (retVal.isEmpty()) { + return new ImmutableConciseSet(); + } + return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); + } + + public static ImmutableConciseSet doIntersection(Iterator sets) + { + IntList retVal = new IntList(); + + // lhs = current word position, rhs = the iterator + // Comparison is first by index, then zero fills > literals > one fills + // zero fills are sorted by length (longer zero fills have priority) + // similarily, shorter one fills have priority + MinMaxPriorityQueue theQ = MinMaxPriorityQueue.orderedBy( + new Comparator() + { + @Override + public int compare(WordHolder h1, WordHolder h2) + { + int w1 = h1.getWord(); + int w2 = h2.getWord(); + int s1 = h1.getIterator().startIndex; + int s2 = h2.getIterator().startIndex; + + if (s1 != s2) { + return compareInts(s1, s2); + } + + if (ConciseSetUtils.isZeroSequence(w1)) { + if (ConciseSetUtils.isZeroSequence(w2)) { + return -compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); + } + return -1; + } else if (ConciseSetUtils.isLiteral(w1)) { + if (ConciseSetUtils.isZeroSequence(w2)) { + return 1; + } else if (ConciseSetUtils.isLiteral(w2)) { + return 0; + } + return -1; + } else { + if (!ConciseSetUtils.isOneSequence(w2)) { + return 1; + } + return compareInts(ConciseSetUtils.getSequenceNumWords(w1), ConciseSetUtils.getSequenceNumWords(w2)); + } + } + } + ).create(); + + // populate priority queue + while (sets.hasNext()) { + ImmutableConciseSet set = sets.next(); + + if (set == null || set.isEmpty()) { + return new ImmutableConciseSet(); + } + + WordIterator itr = set.newWordIterator(); + theQ.add(new WordHolder(itr.next(), itr)); + } + + int currIndex = 0; + int wordsWalkedAtSequenceEnd = Integer.MAX_VALUE; + + while (!theQ.isEmpty()) { + // create a temp list to hold everything that will get pushed back into the priority queue after each run + List wordsToAdd = Lists.newArrayList(); + + // grab the top element from the priority queue + WordHolder curr = theQ.poll(); + int word = curr.getWord(); + WordIterator itr = curr.getIterator(); + + // if a sequence has ended, we can break out because of Boolean logic + if (itr.startIndex >= wordsWalkedAtSequenceEnd) { + break; + } + + // if the next word in the queue starts at a different point than where we ended off we need to create a one gap + // to fill the space + if (currIndex < itr.startIndex) { + // number of 31 bit blocks that compromise the fill minus one + addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (itr.startIndex - currIndex - 1))); + currIndex = itr.startIndex; + } + + if (ConciseSetUtils.isZeroSequence(word)) { + // extract a literal from the flip bits of the zero sequence + int flipBitLiteral = ConciseSetUtils.getLiteralFromZeroSeqFlipBit(word); + + // advance everything past the longest zero sequence + WordHolder nextVal = theQ.peek(); + while (nextVal != null && + nextVal.getIterator().startIndex < itr.wordsWalked) { + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + if (i.startIndex == itr.startIndex) { + // if a literal was created from a flip bit, AND it with other literals or literals from flip bits in the same + // position + if (ConciseSetUtils.isZeroSequence(w)) { + flipBitLiteral &= ConciseSetUtils.getLiteralFromZeroSeqFlipBit(w); + } else if (ConciseSetUtils.isLiteral(w)) { + flipBitLiteral &= w; + } else { + flipBitLiteral &= ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); + } + } + + i.advanceTo(itr.wordsWalked); + if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } else { + wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd); + } + nextVal = theQ.peek(); + } + + // advance longest zero literal forward and push result back to priority queue + // if a flip bit is still needed, put it in the correct position + int newWord = word & 0xC1FFFFFF; + if (flipBitLiteral != ConciseSetUtils.ALL_ZEROS_LITERAL) { + int position = Integer.numberOfTrailingZeros(flipBitLiteral) + 1; + newWord = (word & 0xC1FFFFFF) | (position << 25); + } + addAndCompact(retVal, newWord); + currIndex = itr.wordsWalked; + + if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } else { + wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd); + } + } else if (ConciseSetUtils.isLiteral(word)) { + // advance all other literals + WordHolder nextVal = theQ.peek(); + while (nextVal != null && + nextVal.getIterator().startIndex == itr.startIndex) { + + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + // if we still have one fills with flipped bits, AND them here + if (ConciseSetUtils.isLiteral(w)) { + word &= w; + } else { + int flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); + if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { + word &= flipBitLiteral; + i.advanceTo(itr.wordsWalked); + } + } + + if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } else { + wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd); + } + + nextVal = theQ.peek(); + } + + // advance the set with the current literal forward and push result back to priority queue + addAndCompact(retVal, word); + currIndex++; + + if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } else { + wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd); + } + } else { // one fills + int flipBitLiteral; + WordHolder nextVal = theQ.peek(); + + while (nextVal != null && + nextVal.getIterator().startIndex == itr.startIndex) { + // check if literal can be created flip bits of other one sequences + WordHolder entry = theQ.poll(); + int w = entry.getWord(); + WordIterator i = entry.getIterator(); + + flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(w); + if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { + wordsToAdd.add(new WordHolder(flipBitLiteral, i)); + } else if (i.hasNext()) { + wordsToAdd.add(new WordHolder(i.next(), i)); + } else { + wordsWalkedAtSequenceEnd = Math.min(i.wordsWalked, wordsWalkedAtSequenceEnd); + } + + nextVal = theQ.peek(); + } + + // check if a literal needs to be created from the flipped bits of this sequence + flipBitLiteral = ConciseSetUtils.getLiteralFromOneSeqFlipBit(word); + if (flipBitLiteral != ConciseSetUtils.ALL_ONES_LITERAL) { + wordsToAdd.add(new WordHolder(flipBitLiteral, itr)); + } else if (itr.hasNext()) { + wordsToAdd.add(new WordHolder(itr.next(), itr)); + } else { + wordsWalkedAtSequenceEnd = Math.min(itr.wordsWalked, wordsWalkedAtSequenceEnd); + } + } + + theQ.addAll(wordsToAdd); + } + + // fill in any missing one sequences + if (currIndex < wordsWalkedAtSequenceEnd) { + addAndCompact(retVal, (ConciseSetUtils.SEQUENCE_BIT | (wordsWalkedAtSequenceEnd - currIndex - 1))); + } + + if (retVal.isEmpty()) { + return new ImmutableConciseSet(); + } + return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); + } + + public static ImmutableConciseSet doComplement(ImmutableConciseSet set) + { + if (set == null || set.isEmpty()) { + return new ImmutableConciseSet(); + } + + IntList retVal = new IntList(); + WordIterator iter = set.newWordIterator(); + while (iter.hasNext()) { + int word = iter.next(); + if (ConciseSetUtils.isLiteral(word)) { + retVal.add(ConciseSetUtils.ALL_ZEROS_LITERAL | ~word); + } else { + retVal.add(ConciseSetUtils.SEQUENCE_BIT ^ word); + } + } + // do not complement after the last element + int lastWord = retVal.get(retVal.length() - 1); + if (ConciseSetUtils.isLiteral(lastWord)) { + lastWord = ConciseSetUtils.clearBitsAfterInLastWord( + lastWord, + ConciseSetUtils.maxLiteralLengthModulus(set.getLast()) + ); + } + + retVal.set(retVal.length() - 1, lastWord); + + trimZeros(retVal); + + if (retVal.isEmpty()) { + return new ImmutableConciseSet(); + } + return new ImmutableConciseSet(IntBuffer.wrap(retVal.toArray())); + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + private static void trimZeros(IntList set) + { + // loop over ALL_ZEROS_LITERAL words + int w; + int last = set.length() - 1; + do { + w = set.get(last); + if (w == ConciseSetUtils.ALL_ZEROS_LITERAL) { + set.set(last, 0); + last--; + } else if (ConciseSetUtils.isZeroSequence(w)) { + if (ConciseSetUtils.isSequenceWithNoBits(w)) { + set.set(last, 0); + last--; + } else { + // convert the sequence in a 1-bit literal word + set.set(last, ConciseSetUtils.getLiteral(w, false)); + return; + } + } else { + // one sequence or literal + return; + } + if (set.isEmpty() || last == -1) { + return; + } + } while (true); + } + + public byte[] toBytes() + { + if (words == null) { + return new byte[]{}; + } + ByteBuffer buf = ByteBuffer.allocate(words.capacity() * Ints.BYTES); + buf.asIntBuffer().put(words.asReadOnlyBuffer()); + return buf.array(); + } + + public int getLastWordIndex() + { + return lastWordIndex; + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + private int calcSize() + { + int retVal = 0; + for (int i = 0; i <= lastWordIndex; i++) { + int w = words.get(i); + if (ConciseSetUtils.isLiteral(w)) { + retVal += ConciseSetUtils.getLiteralBitCount(w); + } else { + if (ConciseSetUtils.isZeroSequence(w)) { + if (!ConciseSetUtils.isSequenceWithNoBits(w)) { + retVal++; + } + } else { + retVal += ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(w) + 1); + if (!ConciseSetUtils.isSequenceWithNoBits(w)) { + retVal--; + } + } + } + } + + return retVal; + } + + public int size() + { + return size; + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + public int getLast() + { + if (isEmpty()) { + return -1; + } + + int last = 0; + for (int i = 0; i <= lastWordIndex; i++) { + int w = words.get(i); + if (ConciseSetUtils.isLiteral(w)) { + last += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + last += ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(w) + 1); + } + } + + int w = words.get(lastWordIndex); + if (ConciseSetUtils.isLiteral(w)) { + last -= Integer.numberOfLeadingZeros(ConciseSetUtils.getLiteralBits(w)); + } else { + last--; + } + return last; + } + + public boolean contains(final int integer) + { + if (isEmpty()) { + return false; + } + final IntSet.IntIterator intIterator = iterator(); + intIterator.skipAllBefore(integer); + return intIterator.hasNext() && intIterator.next() == integer; + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + public int get(int i) + { + if (i < 0) { + throw new IndexOutOfBoundsException(); + } + + // initialize data + int firstSetBitInWord = 0; + int position = i; + int setBitsInCurrentWord = 0; + for (int j = 0; j <= lastWordIndex; j++) { + int w = words.get(j); + if (ConciseSetUtils.isLiteral(w)) { + // number of bits in the current word + setBitsInCurrentWord = ConciseSetUtils.getLiteralBitCount(w); + + // check if the desired bit is in the current word + if (position < setBitsInCurrentWord) { + int currSetBitInWord = -1; + for (; position >= 0; position--) { + currSetBitInWord = Integer.numberOfTrailingZeros(w & (0xFFFFFFFF << (currSetBitInWord + 1))); + } + return firstSetBitInWord + currSetBitInWord; + } + + // skip the 31-bit block + firstSetBitInWord += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + // number of involved bits (31 * blocks) + int sequenceLength = ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(w) + 1); + + // check the sequence type + if (ConciseSetUtils.isOneSequence(w)) { + if (ConciseSetUtils.isSequenceWithNoBits(w)) { + setBitsInCurrentWord = sequenceLength; + if (position < setBitsInCurrentWord) { + return firstSetBitInWord + position; + } + } else { + setBitsInCurrentWord = sequenceLength - 1; + if (position < setBitsInCurrentWord) + // check whether the desired set bit is after the + // flipped bit (or after the first block) + { + return firstSetBitInWord + position + (position < ConciseSetUtils.getFlippedBit(w) ? 0 : 1); + } + } + } else { + if (ConciseSetUtils.isSequenceWithNoBits(w)) { + setBitsInCurrentWord = 0; + } else { + setBitsInCurrentWord = 1; + if (position == 0) { + return firstSetBitInWord + ConciseSetUtils.getFlippedBit(w); + } + } + } + + // skip the 31-bit blocks + firstSetBitInWord += sequenceLength; + } + + // update the number of found set bits + position -= setBitsInCurrentWord; + } + + throw new IndexOutOfBoundsException(Integer.toString(i)); + } + + public int compareTo(ImmutableConciseSet other) + { + return words.asReadOnlyBuffer().compareTo(other.words.asReadOnlyBuffer()); + } + + private boolean isEmpty() + { + return words == null || words.limit() == 0; + } + + @Override + // Based on the AbstractIntSet implementation by Alessandro Colantonio + public String toString() + { + IntSet.IntIterator itr = iterator(); + if (!itr.hasNext()) { + return "[]"; + } + + StringBuilder sb = new StringBuilder(); + sb.append('['); + for (; ; ) { + sb.append(itr.next()); + if (!itr.hasNext()) { + return sb.append(']').toString(); + } + sb.append(", "); + } + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + public IntSet.IntIterator iterator() + { + if (isEmpty()) { + return new IntSet.IntIterator() + { + @Override + public void skipAllBefore(int element) {/*empty*/} + + @Override + public boolean hasNext() {return false;} + + @Override + public int next() {throw new NoSuchElementException();} + + @Override + public void remove() {throw new UnsupportedOperationException();} + + @Override + public IntSet.IntIterator clone() {throw new UnsupportedOperationException();} + }; + } + return new BitIterator(); + } + + public WordIterator newWordIterator() + { + return new WordIterator(); + } + + private static class WordHolder + { + private final int word; + private final WordIterator iterator; + + public WordHolder( + int word, + WordIterator iterator + ) + { + this.word = word; + this.iterator = iterator; + } + + public int getWord() + { + return word; + } + + public WordIterator getIterator() + { + return iterator; + } + } + + // Based on the ConciseSet implementation by Alessandro Colantonio + private class BitIterator implements IntSet.IntIterator + { + final ConciseSetUtils.LiteralAndZeroFillExpander litExp; + final ConciseSetUtils.OneFillExpander oneExp; + + ConciseSetUtils.WordExpander exp; + int nextIndex = 0; + int nextOffset = 0; + + private BitIterator() + { + litExp = ConciseSetUtils.newLiteralAndZeroFillExpander(); + oneExp = ConciseSetUtils.newOneFillExpander(); + + nextWord(); + } + + private BitIterator( + ConciseSetUtils.LiteralAndZeroFillExpander litExp, + ConciseSetUtils.OneFillExpander oneExp, + ConciseSetUtils.WordExpander exp, + int nextIndex, + int nextOffset + ) + { + this.litExp = litExp; + this.oneExp = oneExp; + this.exp = exp; + this.nextIndex = nextIndex; + this.nextOffset = nextOffset; + } + + @Override + public boolean hasNext() + { + while (!exp.hasNext()) { + if (nextIndex > lastWordIndex) { + return false; + } + nextWord(); + } + return true; + } + + @Override + public int next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return exp.next(); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + + @Override + public void skipAllBefore(int element) + { + while (true) { + exp.skipAllBefore(element); + if (exp.hasNext() || nextIndex > lastWordIndex) { + return; + } + nextWord(); + } + } + + @Override + public IntSet.IntIterator clone() + { + return new BitIterator( + (ConciseSetUtils.LiteralAndZeroFillExpander) litExp.clone(), + (ConciseSetUtils.OneFillExpander) oneExp.clone(), + exp.clone(), + nextIndex, + nextOffset + ); + } + + private void nextWord() + { + final int word = words.get(nextIndex++); + exp = ConciseSetUtils.isOneSequence(word) ? oneExp : litExp; + exp.reset(nextOffset, word, true); + + // prepare next offset + if (ConciseSetUtils.isLiteral(word)) { + nextOffset += ConciseSetUtils.MAX_LITERAL_LENGTH; + } else { + nextOffset += ConciseSetUtils.maxLiteralLengthMultiplication(ConciseSetUtils.getSequenceCount(word) + 1); + } + } + } + + public class WordIterator implements Iterator + { + private int startIndex; + private int wordsWalked; + private int currWord; + private int nextWord; + private int currRow; + + private volatile boolean hasNextWord = false; + + WordIterator() + { + startIndex = -1; + wordsWalked = 0; + currRow = -1; + } + + public void advanceTo(int endCount) + { + while (hasNext() && wordsWalked < endCount) { + next(); + } + if (wordsWalked <= endCount) { + return; + } + + nextWord = (currWord & 0xC1000000) | (wordsWalked - endCount - 1); + startIndex = endCount; + hasNextWord = true; + } + + @Override + public boolean hasNext() + { + if (isEmpty()) { + return false; + } + if (hasNextWord) { + return true; + } + return currRow < (words.capacity() - 1); + } + + @Override + public Integer next() + { + if (hasNextWord) { + currWord = nextWord; + hasNextWord = false; + return new Integer(currWord); + } + + currWord = words.get(++currRow); + if (ConciseSetUtils.isLiteral(currWord)) { + startIndex = wordsWalked++; + } else { + startIndex = wordsWalked; + wordsWalked += ConciseSetUtils.getSequenceNumWords(currWord); + } + + return new Integer(currWord); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/intset/IntSet.java b/extendedset/src/main/java/io/druid/extendedset/intset/IntSet.java new file mode 100755 index 00000000000..3f15daeb828 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/intset/IntSet.java @@ -0,0 +1,662 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.intset; + + +import io.druid.extendedset.ExtendedSet; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * Very similar to {@link ExtendedSet} but for the primitive int + * type. + * + * @author Alessandro Colantonio + * @version $Id: IntSet.java 135 2011-01-04 15:54:48Z cocciasik $ + * @see ArraySet + * @see ConciseSet + * @see FastSet + * @see HashIntSet + */ +public interface IntSet extends Cloneable, Comparable +{ + /** + * Generates the intersection set + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #retainAll(IntSet) + */ + public IntSet intersection(IntSet other); + + /** + * Generates the union set + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #addAll(IntSet) + */ + public IntSet union(IntSet other); + + /** + * Generates the difference set + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #removeAll(IntSet) + */ + public IntSet difference(IntSet other); + + /** + * Generates the symmetric difference set + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #flip(int) + */ + public IntSet symmetricDifference(IntSet other); + + /** + * Generates the complement set. The returned set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @return the complement set + * + * @see IntSet#complement() + */ + public IntSet complemented(); + + /** + * Complements the current set. The modified set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @see IntSet#complemented() + */ + public void complement(); + + /** + * Returns true if the specified {@link IntSet} + * instance contains any elements that are also contained within this + * {@link IntSet} instance + * + * @param other {@link IntSet} to intersect with + * + * @return a boolean indicating whether this {@link IntSet} + * intersects the specified {@link IntSet}. + */ + public boolean containsAny(IntSet other); + + /** + * Returns true if the specified {@link IntSet} + * instance contains at least minElements elements that are + * also contained within this {@link IntSet} instance + * + * @param other {@link IntSet} instance to intersect with + * @param minElements minimum number of elements to be contained within this + * {@link IntSet} instance + * + * @return a boolean indicating whether this {@link IntSet} + * intersects the specified {@link IntSet}. + * + * @throws IllegalArgumentException if minElements < 1 + */ + public boolean containsAtLeast(IntSet other, int minElements); + + /** + * Computes the intersection set size. + *

+ * This is faster than calling {@link #intersection(IntSet)} and + * then {@link #size()} + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the size + */ + public int intersectionSize(IntSet other); + + /** + * Computes the union set size. + *

+ * This is faster than calling {@link #union(IntSet)} and then + * {@link #size()} + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the size + */ + public int unionSize(IntSet other); + + /** + * Computes the symmetric difference set size. + *

+ * This is faster than calling {@link #symmetricDifference(IntSet)} + * and then {@link #size()} + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the size + */ + public int symmetricDifferenceSize(IntSet other); + + /** + * Computes the difference set size. + *

+ * This is faster than calling {@link #difference(IntSet)} and then + * {@link #size()} + * + * @param other {@link IntSet} instance that represents the right + * operand + * + * @return the size + */ + public int differenceSize(IntSet other); + + /** + * Computes the complement set size. + *

+ * This is faster than calling {@link #complemented()} and then + * {@link #size()} + * + * @return the size + */ + public int complementSize(); + + /** + * Generates an empty set + * + * @return the empty set + */ + public IntSet empty(); + + /** + * See the clone() of {@link Object} + * + * @return cloned object + */ + public IntSet clone(); + + /** + * Computes the compression factor of the equivalent bitmap representation + * (1 means not compressed, namely a memory footprint similar to + * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) + * + * @return the compression factor + */ + public double bitmapCompressionRatio(); + + /** + * Computes the compression factor of the equivalent integer collection (1 + * means not compressed, namely a memory footprint similar to + * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) + * + * @return the compression factor + */ + public double collectionCompressionRatio(); + + /** + * @return a {@link IntIterator} instance to iterate over the set + */ + public IntIterator iterator(); + + /** + * @return a {@link IntIterator} instance to iterate over the set in + * descending order + */ + public IntIterator descendingIterator(); + + /** + * Prints debug info about the given {@link IntSet} implementation + * + * @return a string that describes the internal representation of the + * instance + */ + public String debugInfo(); + + /** + * Adds to the set all the elements between first and + * last, both included. + * + * @param from first element + * @param to last element + */ + public void fill(int from, int to); + + /** + * Removes from the set all the elements between first and + * last, both included. + * + * @param from first element + * @param to last element + */ + public void clear(int from, int to); + + /** + * Adds the element if it not existing, or removes it if existing + * + * @param e element to flip + * + * @see #symmetricDifference(IntSet) + */ + public void flip(int e); + + /** + * Gets the ith element of the set + * + * @param i position of the element in the sorted set + * + * @return the ith element of the set + * + * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to + * {@link #size()} + */ + public int get(int i); + + /** + * Provides position of element within the set. + *

+ * It returns -1 if the element does not exist within the set. + * + * @param e element of the set + * + * @return the element position + */ + public int indexOf(int e); + + /** + * Converts a given array into an instance of the current class. + * + * @param a array to use to generate the new instance + * + * @return the converted collection + */ + public IntSet convert(int... a); + + /** + * Converts a given collection into an instance of the current class. + * + * @param c array to use to generate the new instance + * + * @return the converted collection + */ + public IntSet convert(Collection c); + + /** + * Returns the first (lowest) element currently in this set. + * + * @return the first (lowest) element currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public int first(); + + /** + * Returns the last (highest) element currently in this set. + * + * @return the last (highest) element currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public int last(); + + /** + * @return the number of elements in this set (its cardinality) + */ + public int size(); + + /** + * @return true if this set contains no elements + */ + public boolean isEmpty(); + + /** + * Returns true if this set contains the specified element. + * + * @param i element whose presence in this set is to be tested + * + * @return true if this set contains the specified element + */ + public boolean contains(int i); + + /** + * Adds the specified element to this set if it is not already present. It + * ensures that sets never contain duplicate elements. + * + * @param i element to be added to this set + * + * @return true if this set did not already contain the specified + * element + * + * @throws IllegalArgumentException if some property of the specified element prevents it from + * being added to this set + */ + public boolean add(int i); + + /** + * Removes the specified element from this set if it is present. + * + * @param i object to be removed from this set, if present + * + * @return true if this set contained the specified element + * + * @throws UnsupportedOperationException if the remove operation is not supported by this set + */ + public boolean remove(int i); + + /** + * Returns true if this set contains all of the elements of the + * specified collection. + * + * @param c collection to be checked for containment in this set + * + * @return true if this set contains all of the elements of the + * specified collection + * + * @throws NullPointerException if the specified collection contains one or more null + * elements and this set does not permit null elements + * (optional), or if the specified collection is null + * @see #contains(int) + */ + public boolean containsAll(IntSet c); + + /** + * Adds all of the elements in the specified collection to this set if + * they're not already present. + * + * @param c collection containing elements to be added to this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if the specified collection contains one or more null + * elements and this set does not permit null elements, or if + * the specified collection is null + * @throws IllegalArgumentException if some property of an element of the specified collection + * prevents it from being added to this set + * @see #add(int) + */ + public boolean addAll(IntSet c); + + /** + * Retains only the elements in this set that are contained in the specified + * collection. In other words, removes from this set all of its elements + * that are not contained in the specified collection. + * + * @param c collection containing elements to be retained in this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if this set contains a null element and the specified + * collection does not permit null elements (optional), or if + * the specified collection is null + * @see #remove(int) + */ + public boolean retainAll(IntSet c); + + /** + * Removes from this set all of its elements that are contained in the + * specified collection. + * + * @param c collection containing elements to be removed from this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if this set contains a null element and the specified + * collection does not permit null elements (optional), or if + * the specified collection is null + * @see #remove(int) + * @see #contains(int) + */ + public boolean removeAll(IntSet c); + + /** + * Removes all of the elements from this set. The set will be empty after + * this call returns. + * + * @throws UnsupportedOperationException if the clear method is not supported by this set + */ + public void clear(); + + /** + * @return an array containing all the elements in this set, in the same + * order. + */ + public int[] toArray(); + + /** + * Returns an array containing all of the elements in this set. + *

+ * If this set fits in the specified array with room to spare (i.e., the + * array has more elements than this set), the element in the array + * immediately following the end of the set are left unchanged. + * + * @param a the array into which the elements of this set are to be + * stored. + * + * @return the array containing all the elements in this set + * + * @throws NullPointerException if the specified array is null + * @throws IllegalArgumentException if this set does not fit in the specified array + */ + public int[] toArray(int[] a); + + /** + * Computes the power-set of the current set. + *

+ * It is a particular implementation of the algorithm Apriori (see: + * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + * Association Rules in Large Databases, in Proceedings of the + * 20th International Conference on Very Large Data Bases, + * p.487-499, 1994). The returned power-set does not contain the + * empty set. + *

+ * The subsets composing the powerset are returned in a list that is sorted + * according to the lexicographical order provided by the integer set. + * + * @return the power-set + * + * @see #powerSet(int, int) + * @see #powerSetSize() + */ + public List powerSet(); + + /** + * Computes a subset of the power-set of the current set, composed by those + * subsets that have cardinality between min and + * max. + *

+ * It is a particular implementation of the algorithm Apriori (see: + * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + * Association Rules in Large Databases, in Proceedings of the + * 20th International Conference on Very Large Data Bases, + * p.487-499, 1994). The power-set does not contains the empty set. + *

+ * The subsets composing the powerset are returned in a list that is sorted + * according to the lexicographical order provided by the integer set. + * + * @param min minimum subset size (greater than zero) + * @param max maximum subset size + * + * @return the power-set + * + * @see #powerSet() + * @see #powerSetSize(int, int) + */ + public List powerSet(int min, int max); + + /** + * Computes the power-set size of the current set. + *

+ * The power-set does not contains the empty set. + * + * @return the power-set size + * + * @see #powerSet() + */ + public int powerSetSize(); + + /** + * Computes the power-set size of the current set, composed by those subsets + * that have cardinality between min and max. + *

+ * The returned power-set does not contain the empty set. + * + * @param min minimum subset size (greater than zero) + * @param max maximum subset size + * + * @return the power-set size + * + * @see #powerSet(int, int) + */ + public int powerSetSize(int min, int max); + + /** + * Computes the Jaccard similarity coefficient between this set and the + * given set. + *

+ * The coefficient is defined as + * |A intersection B| / |A union B|. + * + * @param other the other set + * + * @return the Jaccard similarity coefficient + * + * @see #jaccardDistance(IntSet) + */ + public double jaccardSimilarity(IntSet other); + + /** + * Computes the Jaccard distance between this set and the given set. + *

+ * The coefficient is defined as + * 1 - {@link #jaccardSimilarity(IntSet)}. + * + * @param other the other set + * + * @return the Jaccard distance + * + * @see #jaccardSimilarity(IntSet) + */ + public double jaccardDistance(IntSet other); + + /** + * Computes the weighted version of the Jaccard similarity coefficient + * between this set and the given set. + *

+ * The coefficient is defined as + * sum of min(A_i, B_i) / sum of max(A_i, B_i). + * + * @param other the other set + * + * @return the weighted Jaccard similarity coefficient + * + * @see #weightedJaccardDistance(IntSet) + */ + public double weightedJaccardSimilarity(IntSet other); + + /** + * Computes the weighted version of the Jaccard distance between this set + * and the given set. + *

+ * The coefficient is defined as 1 - + * {@link #weightedJaccardSimilarity(IntSet)}. + * + * @param other the other set + * + * @return the weighted Jaccard distance + * + * @see #weightedJaccardSimilarity(IntSet) + */ + public double weightedJaccardDistance(IntSet other); + + /** + * An {@link Iterator}-like interface that allows to "skip" some elements of + * the set + */ + public interface IntIterator + { + /** + * @return true if the iterator has more elements. + */ + boolean hasNext(); + + /** + * @return the next element in the iteration. + * + * @throws NoSuchElementException iteration has no more elements. + */ + int next(); + + /** + * Removes from the underlying collection the last element returned by + * the iterator (optional operation). This method can be called only + * once per call to next. The behavior of an iterator is + * unspecified if the underlying collection is modified while the + * iteration is in progress in any way other than by calling this + * method. + * + * @throws UnsupportedOperationException if the remove operation is not supported by + * this Iterator. + * @throws IllegalStateException if the next method has not yet been called, + * or the remove method has already been called + * after the last call to the next method. + */ + void remove(); + + /** + * Skips all the elements before the the specified element, so that + * {@link #next()} gives the given element or, if it does not exist, the + * element immediately after according to the sorting provided by this + * set. + *

+ * If element is less than the next element, it does + * nothing + * + * @param element first element to not skip + */ + public void skipAllBefore(int element); + + /** + * Clone the iterator + * + * @return a clone of the IntIterator + */ + public IntIterator clone(); + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/ArrayMap.java b/extendedset/src/main/java/io/druid/extendedset/utilities/ArrayMap.java new file mode 100755 index 00000000000..3026fdf2590 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/ArrayMap.java @@ -0,0 +1,299 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.utilities; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; + +/** + * A {@link Map} backed by an array, where keys are the indices of the array, + * and values are the elements of the array. + *

+ * Modifications to the map (i.e., through {@link #put(Integer, Object)} and + * {@link java.util.Map.Entry#setValue(Object)}) are reflected to the original array. + * However, the map has a fixed length, that is the length of the array. + * + * @param the type of elements represented by columns + * + * @author Alessandro Colantonio + * @version $Id$ + */ +public class ArrayMap extends AbstractMap implements java.io.Serializable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = -578029467093308343L; + + /** + * array backed by this map + */ + private final T[] array; + /** + * first index of the map + */ + private final int indexShift; + /** + * {@link Set} instance to iterate over #array + */ + private transient Set> entrySet; + + /** + * Initializes the map + * + * @param array array to manipulate + * @param indexShift first index of the map + */ + ArrayMap(T[] array, int indexShift) + { + this.array = array; + this.indexShift = indexShift; + entrySet = null; + } + + /** + * Initializes the map + * + * @param array array to manipulate + */ + ArrayMap(T[] array) + { + this(array, 0); + } + + /** + * Test + * + * @param args + */ + public static void main(String[] args) + { + ArrayMap am = new ArrayMap(new String[]{"Three", "Four", "Five"}, 3); + System.out.println(am); + am.put(5, "FIVE"); + System.out.println(am); + System.out.println(am.get(5)); + System.out.println(am.containsKey(2)); + System.out.println(am.containsKey(3)); + System.out.println(am.containsValue("THREE")); + System.out.println(am.keySet()); + System.out.println(am.values()); + } + + /** + * {@inheritDoc} + */ + @Override + public Set> entrySet() + { + if (entrySet == null) { + // create an entry for each element + final List entries = new ArrayList(array.length); + for (int i = 0; i < array.length; i++) { + entries.add(new SimpleEntry(i)); + } + + // create the Set instance + entrySet = new AbstractSet>() + { + @Override + public Iterator> iterator() + { + return new Iterator>() + { + int curr = 0; + + @Override + public boolean hasNext() + { + return curr < entries.size(); + } + + @Override + public Entry next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return entries.get(curr++); + } + + @Override + public void remove() + { + throw new IllegalArgumentException(); + } + }; + } + + @Override + public int size() + { + return entries.size(); + } + }; + } + return entrySet; + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return array.length; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsKey(Object key) + { + int index = (Integer) key - indexShift; + return (index >= 0) && (index < array.length); + } + + /** + * {@inheritDoc} + */ + @Override + public T get(Object key) + { + return array[(Integer) key - indexShift]; + } + + /** + * {@inheritDoc} + */ + @Override + public T put(Integer key, T value) + { + int actualIndex = key - indexShift; + T old = array[actualIndex]; + array[actualIndex] = value; + return old; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return Arrays.hashCode(array); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!super.equals(obj)) { + return false; + } + if (!(obj instanceof ArrayMap)) { + return false; + } + return Arrays.equals(array, ((ArrayMap) obj).array); + } + + /** + * Reconstruct the instance from a stream + */ + private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException + { + s.defaultReadObject(); + entrySet = null; + } + + /** + * Entry of the map + */ + private class SimpleEntry implements Entry + { + /** + * index of {@link ArrayMap#array} + */ + final int actualIndex; + + /** + * Creates an entry + * + * @param index index of {@link ArrayMap#array} + */ + private SimpleEntry(int index) + { + this.actualIndex = index; + } + + /** + * {@inheritDoc} + */ + @Override + public Integer getKey() + { + return actualIndex + indexShift; + } + + /** + * {@inheritDoc} + */ + @Override + public T getValue() + { + return array[actualIndex]; + } + + /** + * {@inheritDoc} + */ + @Override + public T setValue(T value) + { + T old = array[actualIndex]; + array[actualIndex] = value; + return old; + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return (actualIndex + indexShift) + "=" + array[actualIndex]; + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/BitCount.java b/extendedset/src/main/java/io/druid/extendedset/utilities/BitCount.java new file mode 100755 index 00000000000..306a2e1e510 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/BitCount.java @@ -0,0 +1,350 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.utilities; + +import java.util.Random; + +/** + * Population count (a.k.a. Hamming distance) of a bitmap represented by an + * array of int. + *

+ * Derived from http + * ://dalkescientific.com/writings/diary/popcnt.c + * + * @author Alessandro Colantonio + * @version $Id: BitCount.java 157 2011-11-14 14:25:15Z cocciasik $ + */ +public class BitCount +{ + /** + * Population count + *

+ * It counts a single word + * + * @param word word to count + * + * @return population count + */ + public static int count(int word) + { + word -= ((word >>> 1) & 0x55555555); + word = (word & 0x33333333) + ((word >>> 2) & 0x33333333); + word = (word + (word >>> 4)) & 0x0F0F0F0F; + return (word * 0x01010101) >>> 24; + } + + /** + * Population count + * + * @param buffer array of int + * + * @return population count + */ + public static int count(int[] buffer) + { + return count(buffer, buffer.length); + } + + /** + * Population count + *

+ * It counts 24 words at a time, then 3 at a time, then 1 at a time + * + * @param buffer array of int + * @param n number of elements of buffer to count + * + * @return population count + */ + public static int count(int[] buffer, int n) + { + final int n1 = n - n % 24; + final int n2 = n - n % 3; + + int cnt = 0; + int i; + for (i = 0; i < n1; i += 24) { + cnt += merging3(buffer, i); + } + for (; i < n2; i += 3) { + cnt += merging2(buffer, i); + } + cnt += popcount_fbsd2(buffer, i, n); + return cnt; + } + + // used by count() + private static int merging3(int[] buffer, int x) + { + int cnt1; + int cnt2; + int cnt = 0; + for (int i = x; i < x + 24; i += 3) { + cnt1 = buffer[i]; + cnt2 = buffer[i + 1]; + final int w = buffer[i + 2]; + cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); + cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); + cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); + cnt1 += (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); + cnt += (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); + } + cnt = (cnt & 0x00FF00FF) + ((cnt >>> 8) & 0x00FF00FF); + cnt += cnt >>> 16; + return cnt & 0x00000FFFF; + } + + // used by count() + private static int merging2(int[] buffer, int x) + { + int cnt1 = buffer[x]; + int cnt2 = buffer[x + 1]; + final int w = buffer[x + 2]; + cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); + cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); + cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); + cnt2 = (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); + cnt1 += cnt2; + cnt1 = (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); + cnt1 += cnt1 >>> 8; + cnt1 += cnt1 >>> 16; + return cnt1 & 0x000000FF; + } + + // used by count() + private static int popcount_fbsd2(int[] data, int x, int n) + { + int cnt = 0; + for (; x < n; x++) { + cnt += count(data[x]); + } + return cnt; + } + + /** + * Population count, skipping words at even positions + * + * @param buffer array of int + * + * @return population count + */ + public static int count_2(int[] buffer) + { + return count_2(buffer, buffer.length); + } + + /** + * Population count, skipping words at even positions + *

+ * It counts 24 words at a time, then 3 at a time, then 1 at a time + * + * @param buffer array of int + * @param n number of elements of buffer to count + * + * @return population count + */ + public static int count_2(int[] buffer, int n) + { + final int n1 = n - n % 48; + final int n2 = n - n % 6; + + int cnt = 0; + int i; + for (i = 0; i < n1; i += 48) { + cnt += merging3_2(buffer, i); + } + for (; i < n2; i += 6) { + cnt += merging2_2(buffer, i); + } + cnt += popcount_fbsd2_2(buffer, i, n); + return cnt; + } + + // used by count_2() + private static int merging3_2(int[] buffer, int x) + { + int cnt1; + int cnt2; + int cnt = 0; + for (int i = x; i < x + 48; i += 6) { + cnt1 = buffer[i + 1]; + cnt2 = buffer[i + 3]; + final int w = buffer[i + 5]; + cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); + cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); + cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); + cnt1 += (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); + cnt += (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); + } + cnt = (cnt & 0x00FF00FF) + ((cnt >>> 8) & 0x00FF00FF); + cnt += cnt >>> 16; + return cnt & 0x00000FFFF; + } + + // used by count_2() + private static int merging2_2(int[] buffer, int x) + { + int cnt1 = buffer[x + 1]; + int cnt2 = buffer[x + 3]; + final int w = buffer[x + 5]; + cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555); + cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555); + cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333); + cnt2 = (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333); + cnt1 += cnt2; + cnt1 = (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F); + cnt1 += cnt1 >>> 8; + cnt1 += cnt1 >>> 16; + return cnt1 & 0x000000FF; + } + + // used by count_2() + private static int popcount_fbsd2_2(int[] data, int x, int n) + { + int cnt = 0; + for (x++; x < n; x += 2) { + cnt += count(data[x]); + } + return cnt; + } + + /** + * Test + * + * @param args + */ + public static void main(String[] args) + { + final int trials = 10000; + final int maxLength = 10000; + + Random rnd = new Random(); + final int seed = rnd.nextInt(); + + System.out.print("Test correctness... "); + rnd = new Random(seed); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength)]; + for (int j = 0; j < x.length; j++) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + + int size1 = 0; + for (int j = 0; j < x.length; j++) { + size1 += count(x[j]); + } + int size2 = count(x); + + if (size1 != size2) { + System.out.println("i = " + i); + System.out.println("ERRORE!"); + System.out.println(size1 + ", " + size2); + for (int j = 0; j < x.length; j++) { + System.out.format("x[%d] = %d --> %d\n", j, x[j], count(x[j])); + } + return; + } + } + System.out.println("done!"); + + System.out.print("Test correctness II... "); + rnd = new Random(seed); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength << 1)]; + for (int j = 1; j < x.length; j += 2) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + + int size1 = 0; + for (int j = 1; j < x.length; j += 2) { + size1 += count(x[j]); + } + int size2 = count_2(x); + + if (size1 != size2) { + System.out.println("i = " + i); + System.out.println("ERRORE!"); + System.out.println(size1 + ", " + size2); + for (int j = 1; j < x.length; j += 2) { + System.out.format("x[%d] = %d --> %d\n", j, x[j], count(x[j])); + } + return; + } + } + System.out.println("done!"); + + System.out.print("Test time count(): "); + rnd = new Random(seed); + long t = System.currentTimeMillis(); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength)]; + for (int j = 0; j < x.length; j++) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + + @SuppressWarnings("unused") + int size = 0; + for (int j = 0; j < x.length; j++) { + size += count(x[j]); + } + } + System.out.println(System.currentTimeMillis() - t); + + System.out.print("Test time BitCount.count(): "); + rnd = new Random(seed); + t = System.currentTimeMillis(); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength)]; + for (int j = 0; j < x.length; j++) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + count(x); + } + System.out.println(System.currentTimeMillis() - t); + + System.out.print("Test II time count(): "); + rnd = new Random(seed); + t = System.currentTimeMillis(); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength << 1)]; + for (int j = 1; j < x.length; j += 2) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + + @SuppressWarnings("unused") + int size = 0; + for (int j = 1; j < x.length; j += 2) { + size += count(x[j]); + } + } + System.out.println(System.currentTimeMillis() - t); + + System.out.print("Test II time BitCount.count(): "); + rnd = new Random(seed); + t = System.currentTimeMillis(); + for (int i = 0; i < trials; i++) { + int[] x = new int[rnd.nextInt(maxLength << 1)]; + for (int j = 1; j < x.length; j += 2) { + x[j] = rnd.nextInt(Integer.MAX_VALUE); + } + count_2(x); + } + System.out.println(System.currentTimeMillis() - t); + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/CollectionMap.java b/extendedset/src/main/java/io/druid/extendedset/utilities/CollectionMap.java new file mode 100755 index 00000000000..ec38ce38b99 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/CollectionMap.java @@ -0,0 +1,317 @@ +package io.druid.extendedset.utilities; + +import io.druid.extendedset.ExtendedSet; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.wrappers.IntegerSet; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedSet; + +/** + * This class implements a {@link Map} from a key of type K to a + * collection contains instances of I. + * + * @param key type + * @param item type + * @param {@link Collection} subclass used to collect items + * + * @author Alessandro Colantonio + * @version $Id: CollectionMap.java 152 2011-03-30 11:18:18Z cocciasik $ + */ +public class CollectionMap> extends LinkedHashMap +{ + private static final long serialVersionUID = -2613391212228461025L; + + /** + * empty collection + */ + private final C emptySet; + + /** + * Initializes the map by providing an instance of the empty collection + * + * @param emptySet the empty collection + */ + public CollectionMap(C emptySet) + { + this.emptySet = emptySet; + } + + /** + * Generates a new {@link CollectionMap} instance. It is an alternative to + * the constructor {@link #CollectionMap(Collection)} that reduces the code + * to write. + * + * @param key type + * @param item type + * @param {@link Collection} subclass used to collect items + * @param empty subset type + * @param emptySet the empty collection + * + * @return the new instance of {@link CollectionMap} + */ + public static , EX extends CX> + CollectionMap newCollectionMap(EX emptySet) + { + return new CollectionMap(emptySet); + } + + /** + * Test procedure + *

+ * Expected output: + *

+   * {}
+   * {A=[1]}
+   * {A=[1, 2]}
+   * {A=[1, 2], B=[3]}
+   * {A=[1, 2], B=[3, 4, 5, 6]}
+   * true
+   * true
+   * false
+   * {A=[1], B=[3, 4, 5, 6]}
+   * {A=[1], B=[3, 4, 5, 6]}
+   * {A=[1], B=[6]}
+   * 
+ * + * @param args + */ + public static void main(String[] args) + { + CollectionMap map = newCollectionMap(new IntegerSet(new ConciseSet())); + System.out.println(map); + + map.putItem("A", 1); + System.out.println(map); + + map.putItem("A", 2); + System.out.println(map); + + map.putItem("B", 3); + System.out.println(map); + + map.putAllItems("B", Arrays.asList(4, 5, 6)); + System.out.println(map); + + System.out.println(map.containsItem(1)); + System.out.println(map.containsItem(6)); + System.out.println(map.containsItem(7)); + + map.removeItem("A", 2); + System.out.println(map); + + map.removeItem("A", 3); + System.out.println(map); + + map.removeAllItems("B", Arrays.asList(1, 2, 3, 4, 5)); + System.out.println(map); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public CollectionMap clone() + { + // result + CollectionMap cloned = new CollectionMap(emptySet); + + // clone all the entries + cloned.putAll(this); + + // clone all the values + if (emptySet instanceof Cloneable) { + for (Entry e : cloned.entrySet()) { + try { + e.setValue((C) e.getValue().getClass().getMethod("clone").invoke(e.getValue())); + } + catch (Exception ex) { + throw new RuntimeException(ex); + } + } + } else { + for (Entry e : cloned.entrySet()) { + C copy = cloneEmptySet(); + copy.addAll(e.getValue()); + e.setValue(copy); + } + } + return cloned; + } + + /** + * Generates an empty {@link CollectionMap} instance with the same + * collection type for values + * + * @return the empty {@link CollectionMap} instance + */ + public CollectionMap empty() + { + return new CollectionMap(emptySet); + } + + /** + * Populates the current instance with the data from another map. In + * particular, it creates the list of keys associated to each value. + * + * @param map the input map + */ + public void mapValueToKeys(Map map) + { + for (Entry e : map.entrySet()) { + putItem(e.getValue(), e.getKey()); + } + } + + /** + * Generates a clone of the empty set + * + * @return a clone of the empty set + */ + @SuppressWarnings("unchecked") + private C cloneEmptySet() + { + try { + if (emptySet instanceof Cloneable) { + return (C) emptySet.getClass().getMethod("clone").invoke(emptySet); + } + return (C) emptySet.getClass().newInstance(); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Checks if there are some collections that contain the given item + * + * @param item item to check + * + * @return true if the item exists within some collections + */ + public boolean containsItem(I item) + { + for (Entry e : entrySet()) { + if (e.getValue().contains(item)) { + return true; + } + } + return false; + } + + /** + * Adds an item to the collection corresponding to the given key + * + * @param key the key for the identification of the collection + * @param item item to add + * + * @return the updated collection of items for the given key + */ + public C putItem(K key, I item) + { + C items = get(key); + if (items == null) { + put(key, items = cloneEmptySet()); + } + items.add(item); + return items; + } + + /** + * Adds a collection of items to the collection corresponding to the given key + * + * @param key the key for the identification of the collection + * @param c items to add + * + * @return the updated collection of items for the given key + */ + public C putAllItems(K key, Collection c) + { + C items = get(key); + if (c == null) { + put(key, items = cloneEmptySet()); + } + items.addAll(c); + return items; + } + + /** + * Removes the item from the collection corresponding to the given key + * + * @param key the key for the identification of the collection + * @param item item to remove + * + * @return the updated collection of items for the given key + */ + public C removeItem(K key, I item) + { + C items = get(key); + if (items == null) { + return null; + } + items.remove(item); + if (items.isEmpty()) { + remove(key); + } + return items; + } + + /** + * Removes a collection of items from the collection corresponding to the given key + * + * @param key the key for the identification of the collection + * @param c items to remove + * + * @return the updated collection of items for the given key + */ + public C removeAllItems(K key, Collection c) + { + C items = get(key); + if (items == null) { + return null; + } + items.removeAll(c); + if (items.isEmpty()) { + remove(key); + } + return items; + } + + /** + * Makes all collections read-only + */ + @SuppressWarnings("unchecked") + public void makeAllCollectionsUnmodifiable() + { + if (emptySet instanceof ExtendedSet) { + for (Entry e : entrySet()) { + e.setValue((C) ((ExtendedSet) e.getValue()).unmodifiable()); + } + } else if (emptySet instanceof List) { + for (Entry e : entrySet()) { + e.setValue((C) (Collections.unmodifiableList((List) e.getValue()))); + } + } else if (emptySet instanceof Set) { + for (Entry e : entrySet()) { + e.setValue((C) (Collections.unmodifiableSet((Set) e.getValue()))); + } + } else if (emptySet instanceof SortedSet) { + for (Entry e : entrySet()) { + e.setValue((C) (Collections.unmodifiableSortedSet((SortedSet) e.getValue()))); + } + } else { + for (Entry e : entrySet()) { + e.setValue((C) (Collections.unmodifiableCollection(e.getValue()))); + } + } + + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/IntHashCode.java b/extendedset/src/main/java/io/druid/extendedset/utilities/IntHashCode.java new file mode 100755 index 00000000000..1aaa06bdbe1 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/IntHashCode.java @@ -0,0 +1,103 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.utilities; + +/** + * Hash functions for integers and integer arrays. + * + * @author Alessandro Colantonio + * @version $Id: IntHashCode.java 127 2010-12-21 20:22:12Z cocciasik $ + */ +public class IntHashCode +{ + /** + * Computes a hashcode for an integer + *

+ * Inspired by Thomas Wang's function, described at http://www.concentric.net/~ttwang/tech/inthash.htm + * + * @param key the given integer + * + * @return the hashcode + */ + public static int hashCode(int key) + { + key = ~key + (key << 15); + key ^= key >>> 12; + key += key << 2; + key ^= key >>> 4; + key *= 2057; + key ^= key >>> 16; + return key; + } + + /** + * Computes the hashcode of an array of integers + * + * @param keys the given integer array + * + * @return the hashcode + */ + public static int hashCode(int[] keys) + { + return hashCode(keys, keys.length, 0); + } + + /** + * Computes the hashcode of an array of integers + *

+ * It is based on MurmurHash3 Algorithm, described at http://sites.google.com/site/murmurhash + * + * @param keys the given integer array + * @param len number of elements to include, that is + * len <= keys.length + * @param seed initial seed + * + * @return the hashcode + */ + public static int hashCode(int[] keys, int len, int seed) + { + int h = 0x971e137b ^ seed; + int c1 = 0x95543787; + int c2 = 0x2ad7eb25; + + for (int i = 0; i < len; i++) { + int k = keys[i]; + k *= c1; + k = (k << 11) | (k >>> 21); // rotl k, 11 + k *= c2; + h ^= k; + + h = (h << 2) - h + 0x52dce729; + c1 = (c1 << 2) + c1 + 0x7b7d159c; + c2 = (c2 << 2) + c2 + 0x6bce6396; + } + + h ^= len; + h ^= h >>> 16; + h *= 0x85ebca6b; + h ^= h >>> 13; + h *= 0xc2b2ae35; + h ^= h >>> 16; + return h; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/IntList.java b/extendedset/src/main/java/io/druid/extendedset/utilities/IntList.java new file mode 100755 index 00000000000..51474c9fccb --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/IntList.java @@ -0,0 +1,115 @@ +package io.druid.extendedset.utilities; + +import java.nio.IntBuffer; +import java.util.ArrayList; + +/** + */ +public class IntList +{ + private final ArrayList baseLists = new ArrayList(); + + private final int allocateSize; + + private int maxIndex; + + public IntList() + { + this(1000); + } + + public IntList(final int allocateSize) + { + this.allocateSize = allocateSize; + + maxIndex = -1; + } + + public int length() + { + return maxIndex + 1; + } + + public boolean isEmpty() + { + return (length() == 0); + } + + public void add(int value) + { + set(length(), value); + } + + public void set(int index, int value) + { + int subListIndex = index / allocateSize; + + if (subListIndex >= baseLists.size()) { + for (int i = baseLists.size(); i <= subListIndex; ++i) { + baseLists.add(null); + } + } + + int[] baseList = baseLists.get(subListIndex); + + if (baseList == null) { + baseList = new int[allocateSize]; + baseLists.set(subListIndex, baseList); + } + + baseList[index % allocateSize] = value; + + if (index > maxIndex) { + maxIndex = index; + } + } + + public int get(int index) + { + if (index > maxIndex) { + throw new ArrayIndexOutOfBoundsException(index); + } + + int subListIndex = index / allocateSize; + int[] baseList = baseLists.get(subListIndex); + + if (baseList == null) { + return 0; + } + + return baseList[index % allocateSize]; + } + + public int baseListCount() + { + return baseLists.size(); + } + + public IntBuffer getBaseList(int index) + { + final int[] array = baseLists.get(index); + if (array == null) { + return null; + } + + final IntBuffer retVal = IntBuffer.wrap(array); + + if (index + 1 == baseListCount()) { + retVal.limit(maxIndex - (index * allocateSize)); + } + + return retVal.asReadOnlyBuffer(); + } + + public int[] toArray() + { + int[] retVal = new int[length()]; + int currIndex = 0; + for (int[] arr : baseLists) { + int min = Math.min(length() - currIndex, arr.length); + System.arraycopy(arr, 0, retVal, currIndex, min); + currIndex += min; + } + return retVal; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/IntSetStatistics.java b/extendedset/src/main/java/io/druid/extendedset/utilities/IntSetStatistics.java new file mode 100755 index 00000000000..5d40f8299b5 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/IntSetStatistics.java @@ -0,0 +1,689 @@ +package io.druid.extendedset.utilities; + +import io.druid.extendedset.intset.IntSet; + +import java.util.Collection; +import java.util.Formatter; +import java.util.List; + +/** + * A wrapper class for classes that implement the {@link IntSet} interface to count method calls + * + * @author Alessandro Colantonio + * @version $Id: IntSetStatistics.java 153 2011-05-30 16:39:57Z cocciasik $ + */ +public class IntSetStatistics implements IntSet +{ + /** + * @uml.property name="unionCount" + */ + private static long unionCount = 0; + + + /* + * Monitored characteristics + */ + /** + * @uml.property name="intersectionCount" + */ + private static long intersectionCount = 0; + /** + * @uml.property name="differenceCount" + */ + private static long differenceCount = 0; + /** + * @uml.property name="symmetricDifferenceCount" + */ + private static long symmetricDifferenceCount = 0; + /** + * @uml.property name="complementCount" + */ + private static long complementCount = 0; + /** + * @uml.property name="unionSizeCount" + */ + private static long unionSizeCount = 0; + /** + * @uml.property name="intersectionSizeCount" + */ + private static long intersectionSizeCount = 0; + /** + * @uml.property name="differenceSizeCount" + */ + private static long differenceSizeCount = 0; + /** + * @uml.property name="symmetricDifferenceSizeCount" + */ + private static long symmetricDifferenceSizeCount = 0; + /** + * @uml.property name="complementSizeCount" + */ + private static long complementSizeCount = 0; + /** + * @uml.property name="equalsCount" + */ + private static long equalsCount = 0; + /** + * @uml.property name="hashCodeCount" + */ + private static long hashCodeCount = 0; + /** + * @uml.property name="containsAllCount" + */ + private static long containsAllCount = 0; + /** + * @uml.property name="containsAnyCount" + */ + private static long containsAnyCount = 0; + /** + * @uml.property name="containsAtLeastCount" + */ + private static long containsAtLeastCount = 0; + /** + * instance to monitor + * + * @uml.property name="container" + * @uml.associationEnd + */ + private final IntSet container; + + + /* + * Statistics getters + */ + + /** + * Wraps an {@link IntSet} instance with an {@link IntSetStatistics} + * instance + * + * @param container {@link IntSet} to wrap + */ + public IntSetStatistics(IntSet container) + { + this.container = extractContainer(container); + } + + /** + * @return number of union operations (i.e., {@link #addAll(IntSet)} , {@link #union(IntSet)} ) + * + * @uml.property name="unionCount" + */ + public static long getUnionCount() {return unionCount;} + + /** + * @return number of intersection operations (i.e., {@link #retainAll(IntSet)} , {@link #intersection(IntSet)} ) + * + * @uml.property name="intersectionCount" + */ + public static long getIntersectionCount() {return intersectionCount;} + + /** + * @return number of difference operations (i.e., {@link #removeAll(IntSet)} , {@link #difference(IntSet)} ) + * + * @uml.property name="differenceCount" + */ + public static long getDifferenceCount() {return differenceCount;} + + /** + * @return number of symmetric difference operations (i.e., {@link #symmetricDifference(IntSet)} ) + * + * @uml.property name="symmetricDifferenceCount" + */ + public static long getSymmetricDifferenceCount() {return symmetricDifferenceCount;} + + /** + * @return number of complement operations (i.e., {@link #complement()} , {@link #complemented()} ) + * + * @uml.property name="complementCount" + */ + public static long getComplementCount() {return complementCount;} + + /** + * @return cardinality of union operations (i.e., {@link #addAll(IntSet)} , {@link #union(IntSet)} ) + * + * @uml.property name="unionSizeCount" + */ + public static long getUnionSizeCount() {return unionSizeCount;} + + /** + * @return cardinality of intersection operations (i.e., {@link #retainAll(IntSet)} , {@link #intersection(IntSet)} ) + * + * @uml.property name="intersectionSizeCount" + */ + public static long getIntersectionSizeCount() {return intersectionSizeCount;} + + /** + * @return cardinality of difference operations (i.e., {@link #removeAll(IntSet)} , {@link #difference(IntSet)} ) + * + * @uml.property name="differenceSizeCount" + */ + public static long getDifferenceSizeCount() {return differenceSizeCount;} + + /** + * @return cardinality of symmetric difference operations (i.e., {@link #symmetricDifference(IntSet)} ) + * + * @uml.property name="symmetricDifferenceSizeCount" + */ + public static long getSymmetricDifferenceSizeCount() {return symmetricDifferenceSizeCount;} + + /** + * @return cardinality of complement operations (i.e., {@link #complement()} , {@link #complemented()} ) + * + * @uml.property name="complementSizeCount" + */ + public static long getComplementSizeCount() {return complementSizeCount;} + + /** + * @return number of equality check operations (i.e., {@link #equals(Object)} ) + * + * @uml.property name="equalsCount" + */ + public static long getEqualsCount() {return equalsCount;} + + /** + * @return number of hash code computations (i.e., {@link #hashCode()} ) + * + * @uml.property name="hashCodeCount" + */ + public static long getHashCodeCount() {return hashCodeCount;} + + /** + * @return number of {@link #containsAll(IntSet)} calls + * + * @uml.property name="containsAllCount" + */ + public static long getContainsAllCount() {return containsAllCount;} + + /** + * @return number of {@link #containsAny(IntSet)} calls + * + * @uml.property name="containsAnyCount" + */ + public static long getContainsAnyCount() {return containsAnyCount;} + + /** + * @return number of {@link #containsAtLeast(IntSet, int)} calls + * + * @uml.property name="containsAtLeastCount" + */ + public static long getContainsAtLeastCount() {return containsAtLeastCount;} + + + /* + * Other statistical methods + */ + + /** + * @return the sum of the cardinality of set operations + */ + public static long getSizeCheckCount() + { + return getIntersectionSizeCount() + + + getUnionSizeCount() + + getDifferenceSizeCount() + + getSymmetricDifferenceSizeCount() + + getComplementSizeCount(); + } + + /** + * Resets all counters + */ + public static void resetCounters() + { + unionCount = intersectionCount = differenceCount = symmetricDifferenceCount = complementCount = + unionSizeCount = intersectionSizeCount = differenceSizeCount = symmetricDifferenceSizeCount = complementSizeCount = + equalsCount = hashCodeCount = containsAllCount = containsAnyCount = containsAtLeastCount = 0; + } + + /** + * @return the summary information string + */ + public static String summary() + { + final StringBuilder s = new StringBuilder(); + final Formatter f = new Formatter(s); + + f.format("unionCount: %d\n", Long.valueOf(unionCount)); + f.format("intersectionCount: %d\n", Long.valueOf(intersectionCount)); + f.format("differenceCount: %d\n", Long.valueOf(differenceCount)); + f.format("symmetricDifferenceCount: %d\n", Long.valueOf(symmetricDifferenceCount)); + f.format("complementCount: %d\n", Long.valueOf(complementCount)); + f.format("unionSizeCount: %d\n", Long.valueOf(unionSizeCount)); + f.format("intersectionSizeCount: %d\n", Long.valueOf(intersectionSizeCount)); + f.format("differenceSizeCount: %d\n", Long.valueOf(differenceSizeCount)); + f.format("symmetricDifferenceSizeCount: %d\n", Long.valueOf(symmetricDifferenceSizeCount)); + f.format("complementSizeCount: %d\n", Long.valueOf(complementSizeCount)); + f.format("equalsCount: %d\n", Long.valueOf(equalsCount)); + f.format("hashCodeCount: %d\n", Long.valueOf(hashCodeCount)); + f.format("containsAllCount: %d\n", Long.valueOf(containsAllCount)); + f.format("containsAnyCount: %d\n", Long.valueOf(containsAnyCount)); + f.format("containsAtLeastCount: %d\n", Long.valueOf(containsAtLeastCount)); + + return s.toString(); + } + + /** + * Removes the {@link IntSetStatistics} wrapper + * + * @param c + * + * @return the contained {@link IntSet} instance + */ + public static IntSet extractContainer(IntSet c) + { + if (c instanceof IntSetStatistics) { + return extractContainer(((IntSetStatistics) c).container); + } + return c; + } + + /* + * MONITORED METHODS + */ + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(IntSet c) + { + unionCount++; + return container.addAll(extractContainer(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet union(IntSet other) + { + unionCount++; + return new IntSetStatistics(container.union(extractContainer(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(IntSet c) + { + intersectionCount++; + return container.retainAll(extractContainer(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet intersection(IntSet other) + { + intersectionCount++; + return new IntSetStatistics(container.intersection(extractContainer(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(IntSet c) + { + differenceCount++; + return container.removeAll(extractContainer(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet difference(IntSet other) + { + differenceCount++; + return new IntSetStatistics(container.difference(extractContainer(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet symmetricDifference(IntSet other) + { + symmetricDifferenceCount++; + return container.symmetricDifference(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + complementCount++; + container.complement(); + } + + /** + * {@inheritDoc} + */ + @Override + public IntSet complemented() + { + complementCount++; + return new IntSetStatistics(container.complemented()); + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(IntSet other) + { + unionSizeCount++; + return container.unionSize(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(IntSet other) + { + intersectionSizeCount++; + return container.intersectionSize(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(IntSet other) + { + differenceSizeCount++; + return container.differenceSize(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(IntSet other) + { + symmetricDifferenceSizeCount++; + return container.symmetricDifferenceSize(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + complementSizeCount++; + return container.complementSize(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(IntSet c) + { + containsAllCount++; + return container.containsAll(extractContainer(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(IntSet other) + { + containsAnyCount++; + return container.containsAny(extractContainer(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(IntSet other, int minElements) + { + containsAtLeastCount++; + return container.containsAtLeast(extractContainer(other), minElements); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + hashCodeCount++; + return container.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + equalsCount++; + return obj != null + && ((obj instanceof IntSetStatistics) + ? container.equals(extractContainer((IntSetStatistics) obj)) + : container.equals(obj)); + } + + /* + * SIMPLE REDIRECTION + */ + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() {return container.bitmapCompressionRatio();} + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() {return container.collectionCompressionRatio();} + + /** + * {@inheritDoc} + */ + @Override + public void clear(int from, int to) {container.clear(from, to);} + + /** + * {@inheritDoc} + */ + @Override + public void fill(int from, int to) {container.fill(from, to);} + + /** + * {@inheritDoc} + */ + @Override + public void clear() {container.clear();} + + /** + * {@inheritDoc} + */ + @Override + public boolean add(int i) {return container.add(i);} + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(int i) {return container.remove(i);} + + /** + * {@inheritDoc} + */ + @Override + public void flip(int e) {container.flip(e);} + + /** + * {@inheritDoc} + */ + @Override + public int get(int i) {return container.get(i);} + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(int e) {return container.indexOf(e);} + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(int i) {return container.contains(i);} + + /** + * {@inheritDoc} + */ + @Override + public int first() {return container.first();} + + /** + * {@inheritDoc} + */ + @Override + public int last() {return container.last();} + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() {return container.isEmpty();} + + /** + * {@inheritDoc} + */ + @Override + public int size() {return container.size();} + + /** + * {@inheritDoc} + */ + @Override + public IntIterator iterator() {return container.iterator();} + + /** + * {@inheritDoc} + */ + @Override + public IntIterator descendingIterator() {return container.descendingIterator();} + + /** + * {@inheritDoc} + */ + @Override + public int[] toArray() {return container.toArray();} + + /** + * {@inheritDoc} + */ + @Override + public int[] toArray(int[] a) {return container.toArray(a);} + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(IntSet o) {return container.compareTo(o);} + + /** + * {@inheritDoc} + */ + @Override + public String toString() {return container.toString();} + + /** + * {@inheritDoc} + */ + @Override + public List powerSet() {return container.powerSet();} + + /** + * {@inheritDoc} + */ + @Override + public List powerSet(int min, int max) {return container.powerSet(min, max);} + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize() {return container.powerSetSize();} + + /** + * {@inheritDoc} + */ + @Override + public int powerSetSize(int min, int max) {return container.powerSetSize(min, max);} + + /** + * {@inheritDoc} + */ + @Override + public double jaccardSimilarity(IntSet other) {return container.jaccardSimilarity(other);} + + /** + * {@inheritDoc} + */ + @Override + public double jaccardDistance(IntSet other) {return container.jaccardDistance(other);} + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardSimilarity(IntSet other) {return container.weightedJaccardSimilarity(other);} + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardDistance(IntSet other) {return container.weightedJaccardDistance(other);} + + /* + * OTHERS + */ + + /** + * {@inheritDoc} + */ + @Override + public IntSet empty() {return new IntSetStatistics(container.empty());} + + /** + * {@inheritDoc} + */ + @Override + public IntSet clone() {return new IntSetStatistics(container.clone());} + + /** + * {@inheritDoc} + */ + @Override + public IntSet convert(int... a) {return new IntSetStatistics(container.convert(a));} + + /** + * {@inheritDoc} + */ + @Override + public IntSet convert(Collection c) {return new IntSetStatistics(container.convert(c));} + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() {return "Analyzed IntSet:\n" + container.debugInfo();} +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwister.java b/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwister.java new file mode 100755 index 00000000000..2ba2a6161a8 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwister.java @@ -0,0 +1,869 @@ +package io.druid.extendedset.utilities.random; + + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; + +/** + *

MersenneTwister and MersenneTwisterFast

+ *

Version 13, based on version MT199937(99/10/29) + * of the Mersenne Twister algorithm found at + * + * The Mersenne Twister Home Page, with the initialization + * improved using the new 2002/1/26 initialization algorithm + * By Sean Luke, October 2004. + *

+ *

MersenneTwister is a drop-in subclass replacement + * for java.util.Random. It is properly synchronized and + * can be used in a multithreaded environment. On modern VMs such + * as HotSpot, it is approximately 1/3 slower than java.util.Random. + *

+ *

MersenneTwisterFast is not a subclass of java.util.Random. It has + * the same public methods as Random does, however, and it is + * algorithmically identical to MersenneTwister. MersenneTwisterFast + * has hard-code inlined all of its methods directly, and made all of them + * final (well, the ones of consequence anyway). Further, these + * methods are not synchronized, so the same MersenneTwisterFast + * instance cannot be shared by multiple threads. But all this helps + * MersenneTwisterFast achieve well over twice the speed of MersenneTwister. + * java.util.Random is about 1/3 slower than MersenneTwisterFast. + *

+ *

About the Mersenne Twister

+ *

This is a Java version of the C-program for MT19937: Integer version. + * The MT19937 algorithm was created by Makoto Matsumoto and Takuji Nishimura, + * who ask: "When you use this, send an email to: matumoto@math.keio.ac.jp + * with an appropriate reference to your work". Indicate that this + * is a translation of their algorithm into Java. + *

+ *

Reference. + * Makato Matsumoto and Takuji Nishimura, + * "Mersenne Twister: A 623-Dimensionally Equidistributed Uniform + * Pseudo-Random Number Generator", + * ACM Transactions on Modeling and Computer Simulation, + * Vol. 8, No. 1, January 1998, pp 3--30. + *

+ *

About this Version

+ *

+ *

Changes Since V12: clone() method added. + *

+ *

Changes Since V11: stateEquals(...) method added. MersenneTwisterFast + * is equal to other MersenneTwisterFasts with identical state; likewise + * MersenneTwister is equal to other MersenneTwister with identical state. + * This isn't equals(...) because that requires a contract of immutability + * to compare by value. + *

+ *

Changes Since V10: A documentation error suggested that + * setSeed(int[]) required an int[] array 624 long. In fact, the array + * can be any non-zero length. The new version also checks for this fact. + *

+ *

Changes Since V9: readState(stream) and writeState(stream) + * provided. + *

+ *

Changes Since V8: setSeed(int) was only using the first 28 bits + * of the seed; it should have been 32 bits. For small-number seeds the + * behavior is identical. + *

+ *

Changes Since V7: A documentation error in MersenneTwisterFast + * (but not MersenneTwister) stated that nextDouble selects uniformly from + * the full-open interval [0,1]. It does not. nextDouble's contract is + * identical across MersenneTwisterFast, MersenneTwister, and java.util.Random, + * namely, selection in the half-open interval [0,1). That is, 1.0 should + * not be returned. A similar contract exists in nextFloat. + *

+ *

Changes Since V6: License has changed from LGPL to BSD. + * New timing information to compare against + * java.util.Random. Recent versions of HotSpot have helped Random increase + * in speed to the point where it is faster than MersenneTwister but slower + * than MersenneTwisterFast (which should be the case, as it's a less complex + * algorithm but is synchronized). + *

+ *

Changes Since V5: New empty constructor made to work the same + * as java.util.Random -- namely, it seeds based on the current time in + * milliseconds. + *

+ *

Changes Since V4: New initialization algorithms. See + * (see + * http://www.math.keio.ac.jp/matumoto/MT2002/emt19937ar.html) + *

+ *

The MersenneTwister code is based on standard MT19937 C/C++ + * code by Takuji Nishimura, + * with suggestions from Topher Cooper and Marc Rieffel, July 1997. + * The code was originally translated into Java by Michael Lecuyer, + * January 1999, and the original code is Copyright (c) 1999 by Michael Lecuyer. + *

+ *

Java notes

+ *

+ *

This implementation implements the bug fixes made + * in Java 1.2's version of Random, which means it can be used with + * earlier versions of Java. See + * + * the JDK 1.2 java.util.Random documentation for further documentation + * on the random-number generation contracts made. Additionally, there's + * an undocumented bug in the JDK java.util.Random.nextBytes() method, + * which this code fixes. + *

+ *

Just like java.util.Random, this + * generator accepts a long seed but doesn't use all of it. java.util.Random + * uses 48 bits. The Mersenne Twister instead uses 32 bits (int size). + * So it's best if your seed does not exceed the int range. + *

+ *

MersenneTwister can be used reliably + * on JDK version 1.1.5 or above. Earlier Java versions have serious bugs in + * java.util.Random; only MersenneTwisterFast (and not MersenneTwister nor + * java.util.Random) should be used with them. + *

+ *

License

+ *

+ * Copyright (c) 2003 by Sean Luke.
+ * Portions copyright (c) 1993 by Michael Lecuyer.
+ * All rights reserved.
+ *

+ *

Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + *

    + *
  • Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + *
  • Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + *
  • Neither the name of the copyright owners, their employers, nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + *
+ *

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @version 13 + */ + +@SuppressWarnings("serial") +public class MersenneTwister extends java.util.Random implements Cloneable +{ + // Period parameters + private static final int N = 624; + private static final int M = 397; + private static final int MATRIX_A = 0x9908b0df; // private static final * constant vector a + private static final int UPPER_MASK = 0x80000000; // most significant w-r bits + private static final int LOWER_MASK = 0x7fffffff; // least significant r bits + + // Tempering parameters + private static final int TEMPERING_MASK_B = 0x9d2c5680; + private static final int TEMPERING_MASK_C = 0xefc60000; + + private int mt[]; // the array for the state vector + private int mti; // mti==N+1 means mt[N] is not initialized + private int mag01[]; + + // a good initial seed (of int size, though stored in a long) + //private static final long GOOD_SEED = 4357; + + /* implemented here because there's a bug in Random's implementation + of the Gaussian code (divide by zero, and log(0), ugh!), yet its + gaussian variables are private so we can't access them here. :-( */ + + private double __nextNextGaussian; + private boolean __haveNextNextGaussian; + + /* We're overriding all internal data, to my knowledge, so this should be okay */ + + /** + * Constructor using the default seed. + */ + public MersenneTwister() + { + this(System.currentTimeMillis()); + } + + /** + * Constructor using a given seed. Though you pass this seed in + * as a long, it's best to make sure it's actually an integer. + * + * @param seed + */ + public MersenneTwister(final long seed) + { + super(seed); /* just in case */ + setSeed(seed); + } + + /** + * Constructor using an array of integers as seed. + * Your array must have a non-zero length. Only the first 624 integers + * in the array are used; if the array is shorter than this then + * integers are repeatedly used in a wrap-around fashion. + * + * @param array + */ + public MersenneTwister(final int[] array) + { + super(System.currentTimeMillis()); /* pick something at random just in case */ + setSeed(array); + } + + /** + * Tests the code. + * + * @param args + */ + public static void main(String args[]) + { + int j; + + MersenneTwister r; + + // CORRECTNESS TEST + // COMPARE WITH http://www.math.keio.ac.jp/matumoto/CODES/MT2002/mt19937ar.out + + r = new MersenneTwister(new int[]{0x123, 0x234, 0x345, 0x456}); + System.out.println("Output of MersenneTwister with new (2002/1/26) seeding mechanism"); + for (j = 0; j < 1000; j++) { + // first, convert the int from signed to "unsigned" + long l = r.nextInt(); + if (l < 0) { + l += 4294967296L; // max int value + } + String s = String.valueOf(l); + while (s.length() < 10) { + s = " " + s; // buffer + } + System.out.print(s + " "); + if (j % 5 == 4) { + System.out.println(); + } + } + + // SPEED TEST + + final long SEED = 4357; + + int xx; + long ms; + System.out.println("\nTime to test grabbing 100000000 ints"); + + r = new MersenneTwister(SEED); + ms = System.currentTimeMillis(); + xx = 0; + for (j = 0; j < 100000000; j++) { + xx += r.nextInt(); + } + System.out.println("Mersenne Twister: " + (System.currentTimeMillis() - ms) + " Ignore this: " + xx); + + System.out.println("To compare this with java.util.Random, run this same test on MersenneTwisterFast."); + System.out.println("The comparison with Random is removed from MersenneTwister because it is a proper"); + System.out.println("subclass of Random and this unfairly makes some of Random's methods un-inlinable,"); + System.out.println("so it would make Random look worse than it is."); + + // TEST TO COMPARE TYPE CONVERSION BETWEEN + // MersenneTwisterFast.java AND MersenneTwister.java + + + System.out.println("\nGrab the first 1000 booleans"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean() + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(double)"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean(j / 999.0) + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(float)"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean(j / 999.0f) + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + byte[] bytes = new byte[1000]; + System.out.println("\nGrab the first 1000 bytes using nextBytes"); + r = new MersenneTwister(SEED); + r.nextBytes(bytes); + for (j = 0; j < 1000; j++) { + System.out.print(bytes[j] + " "); + if (j % 16 == 15) { + System.out.println(); + } + } + if (!(j % 16 == 15)) { + System.out.println(); + } + + byte b; + System.out.println("\nGrab the first 1000 bytes -- must be same as nextBytes"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print((b = r.nextByte()) + " "); + if (b != bytes[j]) { + System.out.print("BAD "); + } + if (j % 16 == 15) { + System.out.println(); + } + } + if (!(j % 16 == 15)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 shorts"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextShort() + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 ints"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextInt() + " "); + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 ints of different sizes"); + r = new MersenneTwister(SEED); + int max = 1; + for (j = 0; j < 1000; j++) { + System.out.print(r.nextInt(max) + " "); + max *= 2; + if (max <= 0) { + max = 1; + } + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 longs"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextLong() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 longs of different sizes"); + r = new MersenneTwister(SEED); + long max2 = 1; + for (j = 0; j < 1000; j++) { + System.out.print(r.nextLong(max2) + " "); + max2 *= 2; + if (max2 <= 0) { + max2 = 1; + } + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 floats"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextFloat() + " "); + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 doubles"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextDouble() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 gaussian doubles"); + r = new MersenneTwister(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextGaussian() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + } + + /** + * {@inheritDoc} + */ + @Override + public Object clone() throws CloneNotSupportedException + { + MersenneTwister f = (MersenneTwister) (super.clone()); + f.mt = mt.clone(); + f.mag01 = mag01.clone(); + return f; + } + + /** + * @param o + * + * @return ? + */ + public boolean stateEquals(Object o) + { + if (o == this) { + return true; + } + if (o == null || !(o instanceof MersenneTwister)) { + return false; + } + MersenneTwister other = (MersenneTwister) o; + if (mti != other.mti) { + return false; + } + for (int x = 0; x < mag01.length; x++) { + if (mag01[x] != other.mag01[x]) { + return false; + } + } + for (int x = 0; x < mt.length; x++) { + if (mt[x] != other.mt[x]) { + return false; + } + } + return true; + } + + /** + * Reads the entire state of the MersenneTwister RNG from the stream + * + * @param stream + * + * @throws IOException + */ + public void readState(DataInputStream stream) throws IOException + { + int len = mt.length; + for (int x = 0; x < len; x++) { + mt[x] = stream.readInt(); + } + + len = mag01.length; + for (int x = 0; x < len; x++) { + mag01[x] = stream.readInt(); + } + + mti = stream.readInt(); + __nextNextGaussian = stream.readDouble(); + __haveNextNextGaussian = stream.readBoolean(); + } + + /** + * Writes the entire state of the MersenneTwister RNG to the stream + * + * @param stream + * + * @throws IOException + */ + public void writeState(DataOutputStream stream) throws IOException + { + int len = mt.length; + for (int x = 0; x < len; x++) { + stream.writeInt(mt[x]); + } + + len = mag01.length; + for (int x = 0; x < len; x++) { + stream.writeInt(mag01[x]); + } + + stream.writeInt(mti); + stream.writeDouble(__nextNextGaussian); + stream.writeBoolean(__haveNextNextGaussian); + } + + /** + * Initialize the pseudo random number generator. Don't + * pass in a long that's bigger than an int (Mersenne Twister + * only uses the first 32 bits for its seed). + */ + @Override + synchronized public void setSeed(final long seed) + { + // it's always good style to call super + super.setSeed(seed); + + // Due to a bug in java.util.Random clear up to 1.2, we're + // doing our own Gaussian variable. + __haveNextNextGaussian = false; + + mt = new int[N]; + + mag01 = new int[2]; + mag01[0] = 0x0; + mag01[1] = MATRIX_A; + + mt[0] = (int) (seed & 0xffffffff); + for (mti = 1; mti < N; mti++) { + mt[mti] = + (1812433253 * (mt[mti - 1] ^ (mt[mti - 1] >>> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xffffffff; + /* for >32 bit machines */ + } + } + + /** + * Sets the seed of the MersenneTwister using an array of integers. + * Your array must have a non-zero length. Only the first 624 integers + * in the array are used; if the array is shorter than this then + * integers are repeatedly used in a wrap-around fashion. + * + * @param array + */ + synchronized public void setSeed(final int[] array) + { + if (array.length == 0) { + throw new IllegalArgumentException("Array length must be greater than zero"); + } + int i, j, k; + setSeed(19650218); + i = 1; + j = 0; + k = (N > array.length ? N : array.length); + for (; k != 0; k--) { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1664525)) + array[j] + j; /* non linear */ + mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ + i++; + j++; + if (i >= N) { + mt[0] = mt[N - 1]; + i = 1; + } + if (j >= array.length) { + j = 0; + } + } + for (k = N - 1; k != 0; k--) { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1566083941)) - i; /* non linear */ + mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ + i++; + if (i >= N) { + mt[0] = mt[N - 1]; + i = 1; + } + } + mt[0] = 0x80000000; /* MSB is 1; assuring non-zero initial array */ + } + + /* If you've got a truly old version of Java, you can omit these + two next methods. */ + + /** + * Returns an integer with bits bits filled with a random number. + */ + @Override + synchronized protected int next(final int bits) + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return y >>> (32 - bits); // hope that's right! + } + + private synchronized void writeObject(final ObjectOutputStream out) + throws IOException + { + // just so we're synchronized. + out.defaultWriteObject(); + } + + private synchronized void readObject(final ObjectInputStream in) + throws IOException, ClassNotFoundException + { + // just so we're synchronized. + in.defaultReadObject(); + } + + /** + * This method is missing from jdk 1.0.x and below. JDK 1.1 + * includes this for us, but what the heck. + */ + @Override + public boolean nextBoolean() {return next(1) != 0;} + + /** + * This generates a coin flip with a probability probability + * of returning true, else returning false. probability must + * be between 0.0 and 1.0, inclusive. Not as precise a random real + * event as nextBoolean(double), but twice as fast. To explicitly + * use this, remember you may need to cast to float first. + * + * @param probability + * + * @return ? + */ + public boolean nextBoolean(final float probability) + { + if (probability < 0.0f || probability > 1.0f) { + throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); + } + if (probability == 0.0f) { + return false; // fix half-open issues + } else if (probability == 1.0f) { + return true; // fix half-open issues + } + return nextFloat() < probability; + } + + /** + * This generates a coin flip with a probability probability + * of returning true, else returning false. probability must + * be between 0.0 and 1.0, inclusive. + * + * @param probability + * + * @return ? + */ + public boolean nextBoolean(final double probability) + { + if (probability < 0.0 || probability > 1.0) { + throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); + } + if (probability == 0.0) { + return false; // fix half-open issues + } else if (probability == 1.0) { + return true; // fix half-open issues + } + return nextDouble() < probability; + } + + /** + * This method is missing from JDK 1.1 and below. JDK 1.2 + * includes this for us, but what the heck. + */ + @Override + public int nextInt(final int n) + { + if (n <= 0) { + throw new IllegalArgumentException("n must be > 0"); + } + + if ((n & -n) == n) { + return (int) ((n * (long) next(31)) >> 31); + } + + int bits, val; + do { + bits = next(31); + val = bits % n; + } + while (bits - val + (n - 1) < 0); + return val; + } + + /** + * This method is for completness' sake. + * Returns a long drawn uniformly from 0 to n-1. Suffice it to say, + * n must be > 0, or an IllegalArgumentException is raised. + * + * @param n + * + * @return ? + */ + public long nextLong(final long n) + { + if (n <= 0) { + throw new IllegalArgumentException("n must be > 0"); + } + + long bits, val; + do { + bits = (nextLong() >>> 1); + val = bits % n; + } + while (bits - val + (n - 1) < 0); + return val; + } + + /** + * A bug fix for versions of JDK 1.1 and below. JDK 1.2 fixes + * this for us, but what the heck. + * + * @return ? + */ + @Override + public double nextDouble() + { + return (((long) next(26) << 27) + next(27)) + / (double) (1L << 53); + } + + /** + * A bug fix for versions of JDK 1.1 and below. JDK 1.2 fixes + * this for us, but what the heck. + */ + + @Override + public float nextFloat() + { + return next(24) / ((float) (1 << 24)); + } + + /** + * A bug fix for all versions of the JDK. The JDK appears to + * use all four bytes in an integer as independent byte values! + * Totally wrong. I've submitted a bug report. + */ + + @Override + public void nextBytes(final byte[] bytes) + { + for (int x = 0; x < bytes.length; x++) { + bytes[x] = (byte) next(8); + } + } + + /** + * For completeness' sake, though it's not in java.util.Random. + * + * @return ? + */ + public char nextChar() + { + // chars are 16-bit UniCode values + return (char) (next(16)); + } + + /** + * For completeness' sake, though it's not in java.util.Random. + * + * @return ? + */ + public short nextShort() + { + return (short) (next(16)); + } + + /** + * For completeness' sake, though it's not in java.util.Random. + * + * @return ? + */ + public byte nextByte() + { + return (byte) (next(8)); + } +// } + + /** + * A bug fix for all JDK code including 1.2. nextGaussian can theoretically + * ask for the log of 0 and divide it by 0! See Java bug + * + * http://developer.java.sun.com/developer/bugParade/bugs/4254501.html + * + * @return ? + */ + @Override + synchronized public double nextGaussian() + { + if (__haveNextNextGaussian) { + __haveNextNextGaussian = false; + return __nextNextGaussian; + } +// else +// { + double v1, v2, s; + do { + v1 = 2 * nextDouble() - 1; // between -1.0 and 1.0 + v2 = 2 * nextDouble() - 1; // between -1.0 and 1.0 + s = v1 * v1 + v2 * v2; + } while (s >= 1 || s == 0); + double multiplier = /*Strict*/Math.sqrt(-2 * /*Strict*/Math.log(s) / s); + __nextNextGaussian = v2 * multiplier; + __haveNextNextGaussian = true; + return v1 * multiplier; + } + +} diff --git a/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwisterFast.java b/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwisterFast.java new file mode 100755 index 00000000000..0789c2008f7 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/utilities/random/MersenneTwisterFast.java @@ -0,0 +1,1470 @@ +package io.druid.extendedset.utilities.random; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.Serializable; +import java.util.Random; + +/** + *

MersenneTwister and MersenneTwisterFast

+ *

Version 13, based on version MT199937(99/10/29) + * of the Mersenne Twister algorithm found at + * + * The Mersenne Twister Home Page, with the initialization + * improved using the new 2002/1/26 initialization algorithm + * By Sean Luke, October 2004. + *

+ *

MersenneTwister is a drop-in subclass replacement + * for java.util.Random. It is properly synchronized and + * can be used in a multithreaded environment. On modern VMs such + * as HotSpot, it is approximately 1/3 slower than java.util.Random. + *

+ *

MersenneTwisterFast is not a subclass of java.util.Random. It has + * the same public methods as Random does, however, and it is + * algorithmically identical to MersenneTwister. MersenneTwisterFast + * has hard-code inlined all of its methods directly, and made all of them + * final (well, the ones of consequence anyway). Further, these + * methods are not synchronized, so the same MersenneTwisterFast + * instance cannot be shared by multiple threads. But all this helps + * MersenneTwisterFast achieve well over twice the speed of MersenneTwister. + * java.util.Random is about 1/3 slower than MersenneTwisterFast. + *

+ *

About the Mersenne Twister

+ *

This is a Java version of the C-program for MT19937: Integer version. + * The MT19937 algorithm was created by Makoto Matsumoto and Takuji Nishimura, + * who ask: "When you use this, send an email to: matumoto@math.keio.ac.jp + * with an appropriate reference to your work". Indicate that this + * is a translation of their algorithm into Java. + *

+ *

Reference. + * Makato Matsumoto and Takuji Nishimura, + * "Mersenne Twister: A 623-Dimensionally Equidistributed Uniform + * Pseudo-Random Number Generator", + * ACM Transactions on Modeling and. Computer Simulation, + * Vol. 8, No. 1, January 1998, pp 3--30. + *

+ *

About this Version

+ *

+ *

Changes Since V12: clone() method added. + *

+ *

Changes Since V11: stateEquals(...) method added. MersenneTwisterFast + * is equal to other MersenneTwisterFasts with identical state; likewise + * MersenneTwister is equal to other MersenneTwister with identical state. + * This isn't equals(...) because that requires a contract of immutability + * to compare by value. + *

+ *

Changes Since V10: A documentation error suggested that + * setSeed(int[]) required an int[] array 624 long. In fact, the array + * can be any non-zero length. The new version also checks for this fact. + *

+ *

Changes Since V9: readState(stream) and writeState(stream) + * provided. + *

+ *

Changes Since V8: setSeed(int) was only using the first 28 bits + * of the seed; it should have been 32 bits. For small-number seeds the + * behavior is identical. + *

+ *

Changes Since V7: A documentation error in MersenneTwisterFast + * (but not MersenneTwister) stated that nextDouble selects uniformly from + * the full-open interval [0,1]. It does not. nextDouble's contract is + * identical across MersenneTwisterFast, MersenneTwister, and java.util.Random, + * namely, selection in the half-open interval [0,1). That is, 1.0 should + * not be returned. A similar contract exists in nextFloat. + *

+ *

Changes Since V6: License has changed from LGPL to BSD. + * New timing information to compare against + * java.util.Random. Recent versions of HotSpot have helped Random increase + * in speed to the point where it is faster than MersenneTwister but slower + * than MersenneTwisterFast (which should be the case, as it's a less complex + * algorithm but is synchronized). + *

+ *

Changes Since V5: New empty constructor made to work the same + * as java.util.Random -- namely, it seeds based on the current time in + * milliseconds. + *

+ *

Changes Since V4: New initialization algorithms. See + * (see + * http://www.math.keio.ac.jp/matumoto/MT2002/emt19937ar.html) + *

+ *

The MersenneTwister code is based on standard MT19937 C/C++ + * code by Takuji Nishimura, + * with suggestions from Topher Cooper and Marc Rieffel, July 1997. + * The code was originally translated into Java by Michael Lecuyer, + * January 1999, and the original code is Copyright (c) 1999 by Michael Lecuyer. + *

+ *

Java notes

+ *

+ *

This implementation implements the bug fixes made + * in Java 1.2's version of Random, which means it can be used with + * earlier versions of Java. See + * + * the JDK 1.2 java.util.Random documentation for further documentation + * on the random-number generation contracts made. Additionally, there's + * an undocumented bug in the JDK java.util.Random.nextBytes() method, + * which this code fixes. + *

+ *

Just like java.util.Random, this + * generator accepts a long seed but doesn't use all of it. java.util.Random + * uses 48 bits. The Mersenne Twister instead uses 32 bits (int size). + * So it's best if your seed does not exceed the int range. + *

+ *

MersenneTwister can be used reliably + * on JDK version 1.1.5 or above. Earlier Java versions have serious bugs in + * java.util.Random; only MersenneTwisterFast (and not MersenneTwister nor + * java.util.Random) should be used with them. + *

+ *

License

+ *

+ * Copyright (c) 2003 by Sean Luke.
+ * Portions copyright (c) 1993 by Michael Lecuyer.
+ * All rights reserved.
+ *

+ *

Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + *

    + *
  • Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + *
  • Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + *
  • Neither the name of the copyright owners, their employers, nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + *
+ *

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @version 13 + */ + +// Note: this class is hard-inlined in all of its methods. This makes some of +// the methods well-nigh unreadable in their complexity. In fact, the Mersenne +// Twister is fairly easy code to understand: if you're trying to get a handle +// on the code, I strongly suggest looking at MersenneTwister.java first. +// -- Sean + +@SuppressWarnings("serial") +public class MersenneTwisterFast implements Serializable, Cloneable +{ + // Period parameters + private static final int N = 624; + private static final int M = 397; + private static final int MATRIX_A = 0x9908b0df; // private static final * constant vector a + private static final int UPPER_MASK = 0x80000000; // most significant w-r bits + private static final int LOWER_MASK = 0x7fffffff; // least significant r bits + + + // Tempering parameters + private static final int TEMPERING_MASK_B = 0x9d2c5680; + private static final int TEMPERING_MASK_C = 0xefc60000; + + private int mt[]; // the array for the state vector + private int mti; // mti==N+1 means mt[N] is not initialized + private int mag01[]; + + // a good initial seed (of int size, though stored in a long) + //private static final long GOOD_SEED = 4357; + + private double __nextNextGaussian; + private boolean __haveNextNextGaussian; + + /* We're overriding all internal data, to my knowledge, so this should be okay */ + + /** + * Constructor using the default seed. + */ + public MersenneTwisterFast() + { + this(System.currentTimeMillis()); + } + + /** + * Constructor using a given seed. Though you pass this seed in + * as a long, it's best to make sure it's actually an integer. + * + * @param seed + */ + public MersenneTwisterFast(final long seed) + { + setSeed(seed); + } + + /** + * Constructor using an array of integers as seed. + * Your array must have a non-zero length. Only the first 624 integers + * in the array are used; if the array is shorter than this then + * integers are repeatedly used in a wrap-around fashion. + * + * @param array + */ + public MersenneTwisterFast(final int[] array) + { + setSeed(array); + } + + /** + * Tests the code. + * + * @param args + */ + public static void main(String args[]) + { + int j; + + MersenneTwisterFast r; + + // CORRECTNESS TEST + // COMPARE WITH http://www.math.keio.ac.jp/matumoto/CODES/MT2002/mt19937ar.out + + r = new MersenneTwisterFast(new int[]{0x123, 0x234, 0x345, 0x456}); + System.out.println("Output of MersenneTwisterFast with new (2002/1/26) seeding mechanism"); + for (j = 0; j < 1000; j++) { + // first, convert the int from signed to "unsigned" + long l = r.nextInt(); + if (l < 0) { + l += 4294967296L; // max int value + } + String s = String.valueOf(l); + while (s.length() < 10) { + s = " " + s; // buffer + } + System.out.print(s + " "); + if (j % 5 == 4) { + System.out.println(); + } + } + + // SPEED TEST + + final long SEED = 4357; + + int xx; + long ms; + System.out.println("\nTime to test grabbing 100000000 ints"); + + Random rr = new Random(SEED); + xx = 0; + ms = System.currentTimeMillis(); + for (j = 0; j < 100000000; j++) { + xx += rr.nextInt(); + } + System.out.println("java.util.Random: " + (System.currentTimeMillis() - ms) + " Ignore this: " + xx); + + r = new MersenneTwisterFast(SEED); + ms = System.currentTimeMillis(); + xx = 0; + for (j = 0; j < 100000000; j++) { + xx += r.nextInt(); + } + System.out.println("Mersenne Twister Fast: " + (System.currentTimeMillis() - ms) + " Ignore this: " + xx); + + // TEST TO COMPARE TYPE CONVERSION BETWEEN + // MersenneTwisterFast.java AND MersenneTwister.java + + System.out.println("\nGrab the first 1000 booleans"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean() + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(double)"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean((j / 999.0)) + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(float)"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextBoolean((j / 999.0f)) + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + byte[] bytes = new byte[1000]; + System.out.println("\nGrab the first 1000 bytes using nextBytes"); + r = new MersenneTwisterFast(SEED); + r.nextBytes(bytes); + for (j = 0; j < 1000; j++) { + System.out.print(bytes[j] + " "); + if (j % 16 == 15) { + System.out.println(); + } + } + if (!(j % 16 == 15)) { + System.out.println(); + } + + byte b; + System.out.println("\nGrab the first 1000 bytes -- must be same as nextBytes"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print((b = r.nextByte()) + " "); + if (b != bytes[j]) { + System.out.print("BAD "); + } + if (j % 16 == 15) { + System.out.println(); + } + } + if (!(j % 16 == 15)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 shorts"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextShort() + " "); + if (j % 8 == 7) { + System.out.println(); + } + } + if (!(j % 8 == 7)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 ints"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextInt() + " "); + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 ints of different sizes"); + r = new MersenneTwisterFast(SEED); + int max = 1; + for (j = 0; j < 1000; j++) { + System.out.print(r.nextInt(max) + " "); + max *= 2; + if (max <= 0) { + max = 1; + } + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 longs"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextLong() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 longs of different sizes"); + r = new MersenneTwisterFast(SEED); + long max2 = 1; + for (j = 0; j < 1000; j++) { + System.out.print(r.nextLong(max2) + " "); + max2 *= 2; + if (max2 <= 0) { + max2 = 1; + } + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 floats"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextFloat() + " "); + if (j % 4 == 3) { + System.out.println(); + } + } + if (!(j % 4 == 3)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 doubles"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextDouble() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + System.out.println("\nGrab the first 1000 gaussian doubles"); + r = new MersenneTwisterFast(SEED); + for (j = 0; j < 1000; j++) { + System.out.print(r.nextGaussian() + " "); + if (j % 3 == 2) { + System.out.println(); + } + } + if (!(j % 3 == 2)) { + System.out.println(); + } + + } + + /** + * {@inheritDoc} + */ + @Override + public Object clone() throws CloneNotSupportedException + { + MersenneTwisterFast f = (MersenneTwisterFast) (super.clone()); + f.mt = mt.clone(); + f.mag01 = mag01.clone(); + return f; + } + + /** + * @param o + * + * @return ? + */ + public boolean stateEquals(Object o) + { + if (o == this) { + return true; + } + if (o == null || !(o instanceof MersenneTwisterFast)) { + return false; + } + MersenneTwisterFast other = (MersenneTwisterFast) o; + if (mti != other.mti) { + return false; + } + for (int x = 0; x < mag01.length; x++) { + if (mag01[x] != other.mag01[x]) { + return false; + } + } + for (int x = 0; x < mt.length; x++) { + if (mt[x] != other.mt[x]) { + return false; + } + } + return true; + } + + /** + * Reads the entire state of the MersenneTwister RNG from the stream + * + * @param stream + * + * @throws IOException + */ + public void readState(DataInputStream stream) throws IOException + { + int len = mt.length; + for (int x = 0; x < len; x++) { + mt[x] = stream.readInt(); + } + + len = mag01.length; + for (int x = 0; x < len; x++) { + mag01[x] = stream.readInt(); + } + + mti = stream.readInt(); + __nextNextGaussian = stream.readDouble(); + __haveNextNextGaussian = stream.readBoolean(); + } + + /** + * Writes the entire state of the MersenneTwister RNG to the stream + * + * @param stream + * + * @throws IOException + */ + public void writeState(DataOutputStream stream) throws IOException + { + int len = mt.length; + for (int x = 0; x < len; x++) { + stream.writeInt(mt[x]); + } + + len = mag01.length; + for (int x = 0; x < len; x++) { + stream.writeInt(mag01[x]); + } + + stream.writeInt(mti); + stream.writeDouble(__nextNextGaussian); + stream.writeBoolean(__haveNextNextGaussian); + } + + /** + * Initialize the pseudo random number generator. Don't + * pass in a long that's bigger than an int (Mersenne Twister + * only uses the first 32 bits for its seed). + * + * @param seed + */ + synchronized public void setSeed(final long seed) + { + // Due to a bug in java.util.Random clear up to 1.2, we're + // doing our own Gaussian variable. + __haveNextNextGaussian = false; + + mt = new int[N]; + + mag01 = new int[2]; + mag01[0] = 0x0; + mag01[1] = MATRIX_A; + + mt[0] = (int) (seed & 0xffffffff); + for (mti = 1; mti < N; mti++) { + mt[mti] = + (1812433253 * (mt[mti - 1] ^ (mt[mti - 1] >>> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xffffffff; + /* for >32 bit machines */ + } + } + + /** + * Sets the seed of the MersenneTwister using an array of integers. + * Your array must have a non-zero length. Only the first 624 integers + * in the array are used; if the array is shorter than this then + * integers are repeatedly used in a wrap-around fashion. + * + * @param array + */ + synchronized public void setSeed(final int[] array) + { + if (array.length == 0) { + throw new IllegalArgumentException("Array length must be greater than zero"); + } + int i, j, k; + setSeed(19650218); + i = 1; + j = 0; + k = (N > array.length ? N : array.length); + for (; k != 0; k--) { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1664525)) + array[j] + j; /* non linear */ + mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ + i++; + j++; + if (i >= N) { + mt[0] = mt[N - 1]; + i = 1; + } + if (j >= array.length) { + j = 0; + } + } + for (k = N - 1; k != 0; k--) { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1566083941)) - i; /* non linear */ + mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ + i++; + if (i >= N) { + mt[0] = mt[N - 1]; + i = 1; + } + } + mt[0] = 0x80000000; /* MSB is 1; assuring non-zero initial array */ + } + + /** + * @return ? + */ + public final int nextInt() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return y; + } + + /** + * @return ? + */ + public final short nextShort() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (short) (y >>> 16); + } + + /** + * @return ? + */ + public final char nextChar() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (char) (y >>> 16); + } + + /** + * @return ? + */ + public final boolean nextBoolean() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return ((y >>> 31) != 0); + } + + /** + * This generates a coin flip with a probability probability + * of returning true, else returning false. probability must + * be between 0.0 and 1.0, inclusive. Not as precise a random real + * event as nextBoolean(double), but twice as fast. To explicitly + * use this, remember you may need to cast to float first. + * + * @param probability + * + * @return ? + */ + public final boolean nextBoolean(final float probability) + { + int y; + + if (probability < 0.0f || probability > 1.0f) { + throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); + } + if (probability == 0.0f) { + return false; // fix half-open issues + } else if (probability == 1.0f) { + return true; // fix half-open issues + } + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (y >>> 8) / ((float) (1 << 24)) < probability; + } + + /** + * This generates a coin flip with a probability probability + * of returning true, else returning false. probability must + * be between 0.0 and 1.0, inclusive. + * + * @param probability + * + * @return ? + */ + public final boolean nextBoolean(final double probability) + { + int y; + int z; + + if (probability < 0.0 || probability > 1.0) { + throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive."); + } + if (probability == 0.0) { + return false; // fix half-open issues + } else if (probability == 1.0) { + return true; // fix half-open issues + } + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + for (; kk < N - 1; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; + + mti = 0; + } + + z = mt[mti++]; + z ^= z >>> 11; // TEMPERING_SHIFT_U(z) + z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) + z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) + z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) + + /* derived from nextDouble documentation in jdk 1.2 docs, see top */ + return ((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53) < probability; + } + + /** + * @return ? + */ + public final byte nextByte() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (byte) (y >>> 24); + } + + /** + * @param bytes + */ + public final void nextBytes(byte[] bytes) + { + int y; + + for (int x = 0; x < bytes.length; x++) { + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + bytes[x] = (byte) (y >>> 24); + } + } + + /** + * @return ? + */ + public final long nextLong() + { + int y; + int z; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + for (; kk < N - 1; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; + + mti = 0; + } + + z = mt[mti++]; + z ^= z >>> 11; // TEMPERING_SHIFT_U(z) + z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) + z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) + z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) + + return (((long) y) << 32) + z; + } + + /** + * Returns a long drawn uniformly from 0 to n-1. Suffice it to say, + * n must be > 0, or an IllegalArgumentException is raised. + * + * @param n + * + * @return ? + */ + public final long nextLong(final long n) + { + if (n <= 0) { + throw new IllegalArgumentException("n must be > 0"); + } + + long bits, val; + do { + int y; + int z; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + for (; kk < N - 1; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; + + mti = 0; + } + + z = mt[mti++]; + z ^= z >>> 11; // TEMPERING_SHIFT_U(z) + z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) + z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) + z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) + + bits = (((((long) y) << 32) + z) >>> 1); + val = bits % n; + } while (bits - val + (n - 1) < 0); + return val; + } + + /** + * Returns a random double in the half-open range from [0.0,1.0). Thus 0.0 is a valid + * result but 1.0 is not. + * + * @return ? + */ + public final double nextDouble() + { + int y; + int z; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + for (; kk < N - 1; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; + + mti = 0; + } + + z = mt[mti++]; + z ^= z >>> 11; // TEMPERING_SHIFT_U(z) + z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) + z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) + z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) + + /* derived from nextDouble documentation in jdk 1.2 docs, see top */ + return ((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53); + } + + /** + * @return ? + */ + public final double nextGaussian() + { + if (__haveNextNextGaussian) { + __haveNextNextGaussian = false; + return __nextNextGaussian; + } +// else +// { + double v1, v2, s; + do { + int y; + int z; + int a; + int b; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + for (; kk < N - 1; kk++) { + z = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (z >>> 1) ^ mag01[z & 0x1]; + } + z = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (z >>> 1) ^ mag01[z & 0x1]; + + mti = 0; + } + + z = mt[mti++]; + z ^= z >>> 11; // TEMPERING_SHIFT_U(z) + z ^= (z << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(z) + z ^= (z << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(z) + z ^= (z >>> 18); // TEMPERING_SHIFT_L(z) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + a = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (a >>> 1) ^ mag01[a & 0x1]; + } + for (; kk < N - 1; kk++) { + a = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (a >>> 1) ^ mag01[a & 0x1]; + } + a = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (a >>> 1) ^ mag01[a & 0x1]; + + mti = 0; + } + + a = mt[mti++]; + a ^= a >>> 11; // TEMPERING_SHIFT_U(a) + a ^= (a << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(a) + a ^= (a << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(a) + a ^= (a >>> 18); // TEMPERING_SHIFT_L(a) + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + b = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (b >>> 1) ^ mag01[b & 0x1]; + } + for (; kk < N - 1; kk++) { + b = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (b >>> 1) ^ mag01[b & 0x1]; + } + b = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (b >>> 1) ^ mag01[b & 0x1]; + + mti = 0; + } + + b = mt[mti++]; + b ^= b >>> 11; // TEMPERING_SHIFT_U(b) + b ^= (b << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(b) + b ^= (b << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(b) + b ^= (b >>> 18); // TEMPERING_SHIFT_L(b) + + /* derived from nextDouble documentation in jdk 1.2 docs, see top */ + v1 = 2 * + (((((long) (y >>> 6)) << 27) + (z >>> 5)) / (double) (1L << 53)) + - 1; + v2 = 2 * (((((long) (a >>> 6)) << 27) + (b >>> 5)) / (double) (1L << 53)) + - 1; + s = v1 * v1 + v2 * v2; + } while (s >= 1 || s == 0); + double multiplier = /*Strict*/Math.sqrt(-2 * /*Strict*/Math.log(s) / s); + __nextNextGaussian = v2 * multiplier; + __haveNextNextGaussian = true; + return v1 * multiplier; +// } + } + + /** + * Returns a random float in the half-open range from [0.0f,1.0f). Thus 0.0f is a valid + * result but 1.0f is not. + * + * @return ? + */ + public final float nextFloat() + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (y >>> 8) / ((float) (1 << 24)); + } + + /** + * Returns an integer drawn uniformly from 0 to n-1. Suffice it to say, + * n must be > 0, or an IllegalArgumentException is raised. + * + * @param n + * + * @return ? + */ + public final int nextInt(final int n) + { + if (n <= 0) { + throw new IllegalArgumentException("n must be > 0"); + } + + if ((n & -n) == n) // i.e., n is a power of 2 + { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + return (int) ((n * (long) (y >>> 1)) >> 31); + } + + int bits, val; + do { + int y; + + if (mti >= N) // generate N words at one time + { + int kk; + @SuppressWarnings("hiding") + final int[] mt = this.mt; // locals are slightly faster + @SuppressWarnings("hiding") + final int[] mag01 = this.mag01; // locals are slightly faster + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + y ^= y >>> 11; // TEMPERING_SHIFT_U(y) + y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y) + y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y) + y ^= (y >>> 18); // TEMPERING_SHIFT_L(y) + + bits = (y >>> 1); + val = bits % n; + } while (bits - val + (n - 1) < 0); + return val; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/GenericExtendedSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/GenericExtendedSet.java new file mode 100755 index 00000000000..cb4bf71b41d --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/GenericExtendedSet.java @@ -0,0 +1,885 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers; + + +import io.druid.extendedset.AbstractExtendedSet; +import io.druid.extendedset.ExtendedSet; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.SortedSet; + +/** + * {@link ExtendedSet}-based class internally managed by an instance of any + * class implementing {@link Collection} + * + * @param the type of elements maintained by this set + * + * @author Alessandro Colantonio + * @version $Id$ + */ +public class GenericExtendedSet> extends AbstractExtendedSet +{ + /** + * class implementing {@link Collection} that is used to collect elements + */ + private final Class setClass; + /** + * elements of the set + */ + private /*final*/ Collection elements; + + /** + * Empty-set constructor + * + * @param setClass {@link Collection}-derived class + */ + @SuppressWarnings("unchecked") + public GenericExtendedSet(Class setClass) + { + this.setClass = setClass; + try { + elements = setClass.newInstance(); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + throw new UnsupportedOperationException(); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return isEmpty() ? 0D : 1D; + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet empty() + { + return new GenericExtendedSet(setClass); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator iterator() + { + // prepare the sorted set + final Collection sorted; + if (elements instanceof SortedSet || elements instanceof List) { + //NOTE: SortedSet.comparator() is null + sorted = elements; + } else { + sorted = new ArrayList(elements); + Collections.sort((List) sorted); + } + + // iterate over the sorted set + return new ExtendedIterator() + { + final Iterator itr = sorted.iterator(); + T current; + + { + current = itr.hasNext() ? itr.next() : null; + } + + @Override + public void skipAllBefore(T element) + { + while (element.compareTo(current) > 0) { + next(); + } + } + + @Override + public boolean hasNext() + { + return current != null; + } + + @Override + public T next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + T prev = current; + current = itr.hasNext() ? itr.next() : null; + return prev; + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator descendingIterator() + { + // prepare the sorted set + final Collection sorted; +//TODO +// if (elements instanceof SortedSet || elements instanceof List) { +// //NOTE: SortedSet.comparator() is null +// sorted = elements; +// } else { + sorted = new ArrayList(elements); + Collections.sort((List) sorted, Collections.reverseOrder()); +// } + + // iterate over the sorted set + return new ExtendedIterator() + { + final Iterator itr = sorted.iterator(); + T current; + + { + current = itr.hasNext() ? itr.next() : null; + } + + @Override + public void skipAllBefore(T element) + { + while (element.compareTo(current) > 0) { + next(); + } + } + + @Override + public boolean hasNext() + { + return current != null; + } + + @Override + public T next() + { + if (!hasNext()) { + throw new NoSuchElementException(); + } + T prev = current; + current = itr.hasNext() ? itr.next() : null; + return prev; + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public GenericExtendedSet clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + GenericExtendedSet c = empty(); + if (elements instanceof Cloneable) { + try { + c.elements = (Collection) elements.getClass().getMethod("clone").invoke(elements); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } else { + c.elements.addAll(elements); + } + return c; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return setClass.getSimpleName() + ": " + elements.toString(); + } + + + + /* + * Collection methods + */ + + /** + * {@inheritDoc} + */ + @Override + public boolean add(T e) + { + if (elements instanceof List) { + final List l = (List) elements; + int pos = Collections.binarySearch(l, e); + if (pos >= 0) { + return false; + } + l.add(-(pos + 1), e); + return true; + } + return elements.add(e); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean remove(Object o) + { + if (elements instanceof List) { + try { + final List l = (List) elements; + int pos = Collections.binarySearch(l, (T) o); + if (pos < 0) { + return false; + } + l.remove(pos); + return true; + } + catch (ClassCastException e) { + return false; + } + } + return elements.remove(o); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean contains(Object o) + { + if (elements instanceof List) { + try { + return Collections.binarySearch((List) elements, (T) o) >= 0; + } + catch (ClassCastException e) { + return false; + } + } + return elements.contains(o); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean containsAll(Collection c) + { + if (isEmpty() || c == null || c.isEmpty()) { + return false; + } + if (this == c) { + return true; + } + + if (elements instanceof List + && c instanceof GenericExtendedSet + && ((GenericExtendedSet) c).elements instanceof List) { + Iterator thisItr = elements.iterator(); + Iterator otherItr = ((GenericExtendedSet) c).elements.iterator(); + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r; + while ((r = otherValue.compareTo(thisValue)) > 0) { + if (!thisItr.hasNext()) { + return false; + } + thisValue = thisItr.next(); + } + if (r < 0) { + return false; + } + } + return !otherItr.hasNext(); + } + + return elements.containsAll(c); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(Collection c) + { + if (elements instanceof List) { + //TODO: copiare codice di union + Collection res = union(c).elements; + boolean r = !res.equals(elements); + elements = res; + return r; + } + return elements.addAll(c); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean retainAll(Collection c) + { + if (elements instanceof List) { + try { + //TODO: copiare codice di intersection + Collection res = intersection((Collection) c).elements; + boolean r = !res.equals(elements); + elements = res; + return r; + } + catch (ClassCastException e) { + return false; + } + } + return elements.retainAll(c); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean removeAll(Collection c) + { + if (elements instanceof List) { + try { + //TODO: copiare codice di difference + Collection res = difference((Collection) c).elements; + boolean r = !res.equals(elements); + elements = res; + return r; + } + catch (ClassCastException e) { + return false; + } + } + return elements.removeAll(c); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object o) + { + return o instanceof GenericExtendedSet && ((GenericExtendedSet) o).elements.equals(elements); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() {return elements.size();} + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() {return elements.isEmpty();} + + /** + * {@inheritDoc} + */ + @Override + public void clear() {elements.clear();} + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() {return elements.hashCode();} + + + /* + * SortedSet methods + */ + + /** + * {@inheritDoc} + */ + @Override + public Comparator comparator() + { + return null; + } + + /** + * {@inheritDoc} + */ + @Override + public T first() + { + if (elements instanceof SortedSet) { + return ((SortedSet) elements).first(); + } + if (elements instanceof List) { + return ((List) elements).get(0); + } + return super.first(); + } + + /** + * {@inheritDoc} + */ + @Override + public T last() + { + if (elements instanceof SortedSet) { + return ((SortedSet) elements).last(); + } + if (elements instanceof List) { + return ((List) elements).get(elements.size() - 1); + } + return super.last(); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet headSet(T toElement) + { + if (elements instanceof SortedSet) { + GenericExtendedSet c = empty(); + c.elements = ((SortedSet) elements).headSet(toElement); + return c; + } + return super.headSet(toElement); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet tailSet(T fromElement) + { + if (elements instanceof SortedSet) { + GenericExtendedSet c = empty(); + c.elements = ((SortedSet) elements).tailSet(fromElement); + return c; + } + return super.headSet(fromElement); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet subSet(T fromElement, T toElement) + { + if (elements instanceof SortedSet) { + GenericExtendedSet c = empty(); + c.elements = ((SortedSet) elements).subSet(fromElement, toElement); + return c; + } + return super.headSet(toElement); + } + + + /* + * ExtendedSet methods + */ + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(Collection other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return 0; + } + if (this == other) { + return size(); + } + + if (elements instanceof List + && other instanceof GenericExtendedSet + && ((GenericExtendedSet) other).elements instanceof List) { + int res = 0; + Iterator thisItr = elements.iterator(); + @SuppressWarnings("unchecked") + Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r = thisValue.compareTo(otherValue); + while (r != 0) { + while ((r = thisValue.compareTo(otherValue)) > 0) { + if (!otherItr.hasNext()) { + return res; + } + otherValue = otherItr.next(); + } + if (r == 0) { + break; + } + while ((r = otherValue.compareTo(thisValue)) > 0) { + if (!thisItr.hasNext()) { + return res; + } + thisValue = thisItr.next(); + } + } + + res++; + } + return res; + } + + return super.intersectionSize(other); + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet intersection(Collection other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return empty(); + } + if (this == other) { + return clone(); + } + + if (elements instanceof List + && other instanceof GenericExtendedSet + && ((GenericExtendedSet) other).elements instanceof List) { + GenericExtendedSet res = empty(); + Iterator thisItr = elements.iterator(); + @SuppressWarnings("unchecked") + Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r = thisValue.compareTo(otherValue); + while (r != 0) { + while ((r = thisValue.compareTo(otherValue)) > 0) { + if (!otherItr.hasNext()) { + return res; + } + otherValue = otherItr.next(); + } + if (r == 0) { + break; + } + while ((r = otherValue.compareTo(thisValue)) > 0) { + if (!thisItr.hasNext()) { + return res; + } + thisValue = thisItr.next(); + } + } + + res.elements.add(thisValue); + } + return res; + } + + GenericExtendedSet clone = clone(); + clone.elements.retainAll(other); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet union(Collection other) + { + if (this == other || other == null || other.isEmpty()) { + return clone(); + } + if (isEmpty()) { + GenericExtendedSet res = empty(); + res.elements.addAll(other); + return res; + } + + if (elements instanceof List + && other instanceof GenericExtendedSet + && ((GenericExtendedSet) other).elements instanceof List) { + GenericExtendedSet res = empty(); + Iterator thisItr = elements.iterator(); + @SuppressWarnings("unchecked") + Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); +mainLoop: + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r = thisValue.compareTo(otherValue); + while (r != 0) { + while ((r = thisValue.compareTo(otherValue)) > 0) { + res.elements.add(otherValue); + if (!otherItr.hasNext()) { + res.elements.add(thisValue); + break mainLoop; + } + otherValue = otherItr.next(); + } + if (r == 0) { + break; + } + while ((r = otherValue.compareTo(thisValue)) > 0) { + res.elements.add(thisValue); + if (!thisItr.hasNext()) { + res.elements.add(otherValue); + break mainLoop; + } + thisValue = thisItr.next(); + } + } + + res.elements.add(thisValue); + } + while (thisItr.hasNext()) { + res.elements.add(thisItr.next()); + } + while (otherItr.hasNext()) { + res.elements.add(otherItr.next()); + } + return res; + } + + GenericExtendedSet clone = clone(); + for (T e : other) { + clone.add(e); + } + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet difference(Collection other) + { + if (isEmpty() || this == other) { + return empty(); + } + if (other == null || other.isEmpty()) { + return clone(); + } + + if (elements instanceof List + && other instanceof GenericExtendedSet + && ((GenericExtendedSet) other).elements instanceof List) { + GenericExtendedSet res = empty(); + Iterator thisItr = elements.iterator(); + @SuppressWarnings("unchecked") + Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); +mainLoop: + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r = thisValue.compareTo(otherValue); + while (r != 0) { + while ((r = thisValue.compareTo(otherValue)) > 0) { + if (!otherItr.hasNext()) { + res.elements.add(thisValue); + break mainLoop; + } + otherValue = otherItr.next(); + } + if (r == 0) { + break; + } + while ((r = otherValue.compareTo(thisValue)) > 0) { + res.elements.add(thisValue); + if (!thisItr.hasNext()) { + break mainLoop; + } + thisValue = thisItr.next(); + } + } + } + while (thisItr.hasNext()) { + res.elements.add(thisItr.next()); + } + return res; + } + + GenericExtendedSet clone = clone(); + clone.elements.removeAll(other); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet symmetricDifference(Collection other) + { + if (this == other || other == null || other.isEmpty()) { + return clone(); + } + if (isEmpty()) { + GenericExtendedSet res = empty(); + res.elements.addAll(other); + return res; + } + + if (elements instanceof List + && other instanceof GenericExtendedSet + && ((GenericExtendedSet) other).elements instanceof List) { + GenericExtendedSet res = empty(); + Iterator thisItr = elements.iterator(); + @SuppressWarnings("unchecked") + Iterator otherItr = ((GenericExtendedSet) other).elements.iterator(); +mainLoop: + while (thisItr.hasNext() && otherItr.hasNext()) { + T thisValue = thisItr.next(); + T otherValue = otherItr.next(); + + int r = thisValue.compareTo(otherValue); + while (r != 0) { + while ((r = thisValue.compareTo(otherValue)) > 0) { + res.elements.add(otherValue); + if (!otherItr.hasNext()) { + res.elements.add(thisValue); + break mainLoop; + } + otherValue = otherItr.next(); + } + if (r == 0) { + break; + } + while ((r = otherValue.compareTo(thisValue)) > 0) { + res.elements.add(thisValue); + if (!thisItr.hasNext()) { + res.elements.add(otherValue); + break mainLoop; + } + thisValue = thisItr.next(); + } + } + } + while (thisItr.hasNext()) { + res.elements.add(thisItr.next()); + } + while (otherItr.hasNext()) { + res.elements.add(otherItr.next()); + } + return res; + } + + GenericExtendedSet clone = union(other); + clone.removeAll(intersection(other)); + return clone; + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + throw new UnsupportedOperationException(); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedSet unmodifiable() + { + GenericExtendedSet c = empty(); + c.elements = Collections.unmodifiableCollection(elements); + return c; + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(T from, T to) + { + throw new UnsupportedOperationException(); + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet convert(Collection c) + { + GenericExtendedSet res = (GenericExtendedSet) super.convert(c); + if (res.elements instanceof List) { + Collections.sort((List) res.elements); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public GenericExtendedSet convert(Object... e) + { + GenericExtendedSet res = (GenericExtendedSet) super.convert(e); + if (res.elements instanceof List) { + Collections.sort((List) res.elements); + } + return res; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/IndexedSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/IndexedSet.java new file mode 100755 index 00000000000..11532dbb191 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/IndexedSet.java @@ -0,0 +1,741 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers; + + +import io.druid.extendedset.AbstractExtendedSet; +import io.druid.extendedset.ExtendedSet; +import io.druid.extendedset.intset.IntSet; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * An {@link ExtendedSet} implementation that maps each element of the universe (i.e., the collection of all possible elements) to an integer referred to as its "index". + * + * @param < T > the type of elements maintained by this set + * + * @author Alessandro Colantonio + * @version $Id: IndexedSet.java 154 2011-05-30 22:19:24Z cocciasik $ + * @see ExtendedSet + * @see AbstractExtendedSet + */ +public class IndexedSet extends AbstractExtendedSet implements java.io.Serializable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = -2386771695765773453L; + + // indices + /** + * @uml.property name="indices" + * @uml.associationEnd + */ + private final IntSet indices; + + // mapping to translate items to indices and vice-versa + private final Map itemToIndex; + private final T[] indexToItem; + + /** + * Creates an empty {@link IndexedSet} based on a given collection that + * represents the set of all possible items that can be added to the + * {@link IndexedSet} instance. + *

+ * VERY IMPORTANT! to correctly work and effectively reduce the + * memory allocation, new instances of {@link IndexedSet} must be + * created through the {@link #clone()} or {@link #empty()} methods and + * not by calling many times this constructor with the same + * collection for universe! + * + * @param indices {@link IntSet} instance used for internal representation + * @param universe collection of all possible items. Order will be + * preserved. + */ + @SuppressWarnings("unchecked") + public IndexedSet(IntSet indices, final Collection universe) + { + // NOTE: this procedure removes duplicates while keeping the order + indexToItem = universe instanceof Set ? (T[]) universe.toArray() : (T[]) (new LinkedHashSet(universe)).toArray(); + itemToIndex = new HashMap(Math.max((int) (indexToItem.length / .75f) + 1, 16)); + for (int i = 0; i < indexToItem.length; i++) { + itemToIndex.put(indexToItem[i], Integer.valueOf(i)); + } + this.indices = indices; + } + + /** + * Creates a {@link IndexedSet} instance from a given universe + * mapping + * + * @param itemToIndex universe item-to-index mapping + * @param indexToItem universe index-to-item mapping + * @param indices initial item set + */ + private IndexedSet(Map itemToIndex, T[] indexToItem, IntSet indices) + { + this.itemToIndex = itemToIndex; + this.indexToItem = indexToItem; + this.indices = indices; + } + + /** + * A shortcut for new IndexedSet<T>(itemToIndex, indexToItem, indices) + */ + private IndexedSet createFromIndices(IntSet indx) + { + return new IndexedSet(itemToIndex, indexToItem, indx); + } + + /** + * Checks if the given collection is a instance of {@link IndexedSet} with + * the same index mappings + * + * @param c collection to check + * + * @return true if the given collection is a instance of + * {@link IndexedSet} with the same index mappings + */ + private boolean hasSameIndices(Collection c) + { + // since indices are always re-created through constructor and + // referenced through clone(), it is sufficient to check just only one + // mapping table + return (c instanceof IndexedSet) && (indexToItem == ((IndexedSet) c).indexToItem); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet clone() + { + return createFromIndices(indices.clone()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null || !(obj instanceof Collection)) { + return false; + } + IndexedSet other = convert((Collection) obj); + return this.indexToItem == other.indexToItem + && this.itemToIndex == other.itemToIndex + && this.indices.equals(other.indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return indices.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(ExtendedSet o) + { + return indices.compareTo(convert(o).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public Comparator comparator() + { + return new Comparator() + { + @Override + public int compare(T o1, T o2) + { + // compare elements according to the universe ordering + return itemToIndex.get(o1).compareTo(itemToIndex.get(o2)); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public T first() + { + return indexToItem[indices.first()]; + } + + /** + * {@inheritDoc} + */ + @Override + public T last() + { + return indexToItem[indices.last()]; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(T e) + { + Integer index = itemToIndex.get(e); + if (index == null) { + throw new IllegalArgumentException("element not in the current universe"); + } + return indices.add(index.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(Collection c) + { + return c != null && !c.isEmpty() && indices.addAll(convert(c).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + indices.clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(T e) + { + indices.flip(itemToIndex.get(e).intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(Object o) + { + if (o == null) { + return false; + } + Integer index = itemToIndex.get(o); + return index != null && indices.contains(index.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(Collection c) + { + return c == null || indices.containsAll(convert(c).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(Collection other) + { + return other == null || indices.containsAny(convert(other).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(Collection other, int minElements) + { + return other != null && !other.isEmpty() && indices.containsAtLeast(convert(other).indices, minElements); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return indices.isEmpty(); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator iterator() + { + return new ExtendedIterator() + { + final IntSet.IntIterator itr = indices.iterator(); + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public T next() {return indexToItem[itr.next()];} + + @Override + public void skipAllBefore(T element) {itr.skipAllBefore(itemToIndex.get(element).intValue());} + + @Override + public void remove() {itr.remove();} + }; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator descendingIterator() + { + return new ExtendedIterator() + { + final IntSet.IntIterator itr = indices.descendingIterator(); + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public T next() {return indexToItem[itr.next()];} + + @Override + public void skipAllBefore(T element) {itr.skipAllBefore(itemToIndex.get(element).intValue());} + + @Override + public void remove() {itr.remove();} + }; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(Object o) + { + if (o == null) { + return false; + } + Integer index = itemToIndex.get(o); + return index != null && indices.remove(index.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(Collection c) + { + return c != null && !c.isEmpty() && indices.removeAll(convert(c).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(Collection c) + { + if (isEmpty()) { + return false; + } + if (c == null || c.isEmpty()) { + indices.clear(); + return true; + } + return indices.retainAll(convert(c).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return indices.size(); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet intersection(Collection other) + { + if (other == null) { + return empty(); + } + return createFromIndices(indices.intersection(convert(other).indices)); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet union(Collection other) + { + if (other == null) { + return clone(); + } + return createFromIndices(indices.union(convert(other).indices)); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet difference(Collection other) + { + if (other == null) { + return clone(); + } + return createFromIndices(indices.difference(convert(other).indices)); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet symmetricDifference(Collection other) + { + if (other == null) { + return clone(); + } + return createFromIndices(indices.symmetricDifference(convert(other).indices)); + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet complemented() + { + return createFromIndices(indices.complemented()); + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + indices.complement(); + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(Collection other) + { + if (other == null) { + return 0; + } + return indices.intersectionSize(convert(other).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(Collection other) + { + if (other == null) { + return size(); + } + return indices.unionSize(convert(other).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(Collection other) + { + if (other == null) { + return size(); + } + return indices.symmetricDifferenceSize(convert(other).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(Collection other) + { + if (other == null) { + return size(); + } + return indices.differenceSize(convert(other).indices); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + return indices.complementSize(); + } + + /** + * Returns the collection of all possible elements + * + * @return the collection of all possible elements + */ + public IndexedSet universe() + { + IntSet allItems = indices.empty(); + allItems.fill(0, indexToItem.length - 1); + return createFromIndices(allItems); + } + + /** + * Returns the index of the given item + * + * @param item + * + * @return the index of the given item + */ + public Integer absoluteIndexOf(T item) + { + return itemToIndex.get(item); + } + + /** + * Returns the item corresponding to the given index + * + * @param i index + * + * @return the item + */ + public T absoluteGet(int i) + { + return indexToItem[i]; + } + + /** + * Returns the set of indices. Modifications to this set are reflected to + * this {@link IndexedSet} instance. Trying to perform operation on + * out-of-bound indices will throw an {@link IllegalArgumentException} + * exception. + * + * @return the index set + * + * @see #absoluteGet(int) + * @see #absoluteIndexOf(Object) + */ + public IntSet indices() + { + return indices; + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet empty() + { + return createFromIndices(indices.empty()); + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + return indices.bitmapCompressionRatio(); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return indices.collectionCompressionRatio(); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public IndexedSet convert(Collection c) + { + if (c == null) { + return empty(); + } + + // useless to convert... + if (hasSameIndices(c)) { + return (IndexedSet) c; + } + + // NOTE: cannot call super.convert(c) because of loop + IndexedSet res = empty(); + for (T t : (Collection) c) { + res.add(t); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public IndexedSet convert(Object... e) + { + return (IndexedSet) super.convert(e); + } + + /** + * {@inheritDoc} + */ + @Override + public List> powerSet() + { + return powerSet(1, Integer.MAX_VALUE); + } + + /** + * {@inheritDoc} + */ + @Override + public List> powerSet(int min, int max) + { + List ps = indices.powerSet(min, max); + List> res = new ArrayList>(ps.size()); + for (IntSet s : ps) { + res.add(createFromIndices(s)); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return String.format("items = %s\nitemToIndex = %s\nindexToItem = %s\n", + indices.debugInfo(), itemToIndex.toString(), Arrays.toString(indexToItem) + ); + } + + /** + * {@inheritDoc} + */ + @Override + public double jaccardSimilarity(ExtendedSet other) + { + return indices.jaccardSimilarity(convert(other).indices); + } + + //TODO +// /** +// * {@inheritDoc} +// */ +// @Override +// public IndexedSet unmodifiable() { +// return createFromIndices(indices.unmodifiable()); +// } +// +// /** +// * {@inheritDoc} +// */ +// @Override +// public IndexedSet subSet(T fromElement, T toElement) { +// return createFromIndices(indices.subSet(itemToIndex.get(fromElement), itemToIndex.get(toElement))); +// } +// +// /** +// * {@inheritDoc} +// */ +// @Override +// public IndexedSet headSet(T toElement) { +// return createFromIndices(indices.headSet(itemToIndex.get(toElement))); +// } +// +// /** +// * {@inheritDoc} +// */ +// @Override +// public IndexedSet tailSet(T fromElement) { +// return createFromIndices(indices.tailSet(itemToIndex.get(fromElement))); +// } + + /** + * {@inheritDoc} + */ + @Override + public T get(int i) + { + return indexToItem[indices.get(i)]; + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(T e) + { + return indices.indexOf(itemToIndex.get(e).intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(T from, T to) + { + indices.clear(itemToIndex.get(from).intValue(), itemToIndex.get(to).intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(T from, T to) + { + indices.fill(itemToIndex.get(from).intValue(), itemToIndex.get(to).intValue()); + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/IntegerSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/IntegerSet.java new file mode 100755 index 00000000000..0c92053a238 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/IntegerSet.java @@ -0,0 +1,580 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers; + +import io.druid.extendedset.AbstractExtendedSet; +import io.druid.extendedset.ExtendedSet; +import io.druid.extendedset.intset.IntSet; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + +/** + * This class provides a "wrapper" for any {@link IntSet} instance in order to be used as an {@link ExtendedSet} instance. + * + * @author Alessandro Colantonio + * @version $Id: IntegerSet.java 153 2011-05-30 16:39:57Z cocciasik $ + */ +public class IntegerSet extends AbstractExtendedSet +{ + /** + * the collection of int numbers + * + * @uml.property name="items" + * @uml.associationEnd + */ + private final IntSet items; + + /** + * Wraps an instance of {@link IntSet} + * + * @param items the {@link IntSet} to wrap + */ + public IntegerSet(IntSet items) + { + this.items = items; + } + + /** + * @return the internal integer representation + */ + public IntSet intSet() + { + return items; + } + + /** + * Converts a generic collection of {@link Integer} instances to a + * {@link IntSet} instance. If the given collection is an + * {@link IntegerSet} instance, it returns the contained + * {@link #items} object. + * + * @param c the generic collection of {@link Integer} instances + * + * @return the resulting {@link IntSet} instance + */ + private IntSet toIntSet(Collection c) + { + // nothing to convert + if (c == null) { + return null; + } + if (c instanceof IntegerSet) { + return ((IntegerSet) c).items; + } + + // extract integers from the given collection + IntSet res = items.empty(); + List sorted = new ArrayList(c.size()); + for (Object i : c) { + try { + sorted.add((Integer) i); + } + catch (ClassCastException e) { + // do nothing + } + } + Collections.sort(sorted); + for (Integer i : sorted) { + res.add(i.intValue()); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(Collection c) + { + return items.addAll(toIntSet(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + return items.bitmapCompressionRatio(); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(Integer from, Integer to) + { + items.clear(from.intValue(), to.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + return new IntegerSet(items.clone()); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return items.collectionCompressionRatio(); + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(ExtendedSet o) + { + return items.compareTo(toIntSet(o)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet complemented() + { + return new IntegerSet(items.complemented()); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + return items.complementSize(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(Collection other) + { + return items.containsAny(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(Collection other, int minElements) + { + return items.containsAtLeast(toIntSet(other), minElements); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet convert(Collection c) + { + return new IntegerSet(toIntSet(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet convert(Object... e) + { + return convert(Arrays.asList(e)); + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + return getClass().getSimpleName() + "\n" + items.debugInfo(); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator descendingIterator() + { + return new ExtendedIterator() + { + final IntSet.IntIterator itr = items.descendingIterator(); + + @Override + public void remove() {itr.remove();} + + @Override + public Integer next() {return Integer.valueOf(itr.next());} + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public void skipAllBefore(Integer element) {itr.skipAllBefore(element.intValue());} + }; + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet difference(Collection other) + { + return new IntegerSet(items.difference(toIntSet(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(Collection other) + { + return items.differenceSize(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet empty() + { + return new IntegerSet(items.empty()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (!(o instanceof IntegerSet)) { + return false; + } + return items.equals(((IntegerSet) o).items); + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(Integer from, Integer to) + { + items.fill(from.intValue(), to.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public Integer first() + { + return Integer.valueOf(items.first()); + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(Integer e) + { + items.flip(e.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public Integer get(int i) + { + return Integer.valueOf(items.get(i)); + } + + /** + * {@inheritDoc} + */ + @Override + public int indexOf(Integer e) + { + return items.indexOf(e.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet intersection(Collection other) + { + return new IntegerSet(items.intersection(toIntSet(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public int intersectionSize(Collection other) + { + return items.intersectionSize(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator iterator() + { + return new ExtendedIterator() + { + final IntSet.IntIterator itr = items.iterator(); + + @Override + public void remove() {itr.remove();} + + @Override + public Integer next() {return Integer.valueOf(itr.next());} + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public void skipAllBefore(Integer element) {itr.skipAllBefore(element.intValue());} + }; + } + + /** + * {@inheritDoc} + */ + @Override + public Integer last() + { + return Integer.valueOf(items.last()); + } + + /** + * {@inheritDoc} + */ + @Override + public List powerSet() + { + return powerSet(1, Integer.MAX_VALUE); + } + + /** + * {@inheritDoc} + */ + @Override + public List powerSet(int min, int max) + { + List ps = items.powerSet(min, max); + List res = new ArrayList(ps.size()); + for (IntSet s : ps) { + res.add(new IntegerSet(s)); + } + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(Collection c) + { + return items.removeAll(toIntSet(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(Collection c) + { + return items.retainAll(toIntSet(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet symmetricDifference(Collection other) + { + return new IntegerSet(items.symmetricDifference(toIntSet(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(Collection other) + { + return items.symmetricDifferenceSize(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public IntegerSet union(Collection other) + { + return new IntegerSet(items.union(toIntSet(other))); + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(Collection other) + { + return items.unionSize(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return items.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + items.complement(); + } + + /** + * {@inheritDoc} + */ + @Override + public Comparator comparator() + { + return null; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(Integer e) + { + return items.add(e.intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + items.clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(Object o) + { + return o instanceof Integer && items.contains(((Integer) o).intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(Collection c) + { + return items.containsAll(toIntSet(c)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return items.isEmpty(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean remove(Object o) + { + return o instanceof Integer && items.remove(((Integer) o).intValue()); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return items.size(); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + // NOTE: by not calling super.toString(), we avoid to iterate over new + // Integer instances, thus avoiding to waste time and memory with garbage + // collection + return items.toString(); + } + + /** + * {@inheritDoc} + */ + @Override + public double jaccardSimilarity(ExtendedSet other) + { + return items.jaccardSimilarity(toIntSet(other)); + } + + /** + * {@inheritDoc} + */ + @Override + public double weightedJaccardSimilarity(ExtendedSet other) + { + return items.weightedJaccardSimilarity(toIntSet(other)); + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/LongSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/LongSet.java new file mode 100755 index 00000000000..ad60d782fe9 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/LongSet.java @@ -0,0 +1,1692 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers; + +import io.druid.extendedset.ExtendedSet; +import io.druid.extendedset.intset.ConciseSetUtils; +import io.druid.extendedset.intset.IntSet; +import io.druid.extendedset.intset.IntSet.IntIterator; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; +import java.util.NavigableMap; +import java.util.NoSuchElementException; +import java.util.SortedSet; +import java.util.TreeMap; + +/** + * Very similar to {@link ExtendedSet} but for the primitive long type. + * + * @author Alessandro Colantonio + * @version $Id: LongSet.java 154 2011-05-30 22:19:24Z cocciasik $ + */ +public class LongSet implements Cloneable, Comparable, java.io.Serializable, Iterable +{ + /** + * generated ID + */ + private static final long serialVersionUID = -6165350530254304256L; + + /** + * maximum cardinality of each subset + */ + private static int SUBSET_SIZE = ConciseSetUtils.MAX_ALLOWED_INTEGER + 1; + + /** + * transaction-item pair indices (from 0 to {@link #SUBSET_SIZE} - 1) + * + * @uml.property name="firstIndices" + * @uml.associationEnd + */ + private final IntSet firstIndices; + + /** + * transaction-item pair indices (from {@link #SUBSET_SIZE}) + */ + private final NavigableMap otherIndices; + + /** + * Creates an empty set + * + * @param block {@link IntSet} instance internally used to represent + * {@link Long} values. It can be non-empty. + */ + public LongSet(IntSet block) + { + firstIndices = block.empty(); + otherIndices = new TreeMap(); + } + + /** + * Shallow-copy constructor + */ + private LongSet(IntSet firstIndices, NavigableMap otherIndices) + { + this.firstIndices = firstIndices; + this.otherIndices = otherIndices; + } + + /** + * @return an empty {@link IntSet} instance of the same type of that of + * internally used to represent integers + */ + public IntSet emptyBlock() + { + return firstIndices.empty(); + } + + /** + * Retains only the elements in this set that are contained in the specified + * collection. In other words, removes from this set all of its elements + * that are not contained in the specified collection. + * + * @param other collection containing elements to be retained in this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if this set contains a null element and the specified + * collection does not permit null elements (optional), or if + * the specified collection is null + * @see #remove(long) + */ + @SuppressWarnings("null") + public boolean retainAll(LongSet other) + { + if (isEmpty() || this == other) { + return false; + } + if (other == null || other.isEmpty()) { + clear(); + return true; + } + + boolean res = firstIndices.retainAll(other.firstIndices); + if (otherIndices.isEmpty()) { + return res; + } + if (other.otherIndices.isEmpty()) { + otherIndices.clear(); + return true; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + itr1.remove(); + while (itr1.hasNext()) { + itr1.next(); + itr1.remove(); + } + return true; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c < 0) { + itr1.remove(); + res = true; + } else if (c == 0) { + res |= e1.getValue().retainAll(e2.getValue()); + if (e1.getValue().isEmpty()) { + itr1.remove(); + } + } + } + } + + /** + * Generates the intersection set + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #retainAll(LongSet) + */ + @SuppressWarnings("null") + public LongSet intersection(LongSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return empty(); + } + if (this == other) { + return clone(); + } + + LongSet res = new LongSet(firstIndices.intersection(other.firstIndices), new TreeMap()); + if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c == 0) { + IntSet s = e1.getValue().intersection(e2.getValue()); + if (!s.isEmpty()) { + res.otherIndices.put(e1.getKey(), s); + } + } + } + } + + /** + * Adds all of the elements in the specified collection to this set if + * they're not already present. + * + * @param other collection containing elements to be added to this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if the specified collection contains one or more null + * elements and this set does not permit null elements, or if + * the specified collection is null + * @throws IllegalArgumentException if some property of an element of the specified collection + * prevents it from being added to this set + * @see #add(long) + */ + @SuppressWarnings("null") + public boolean addAll(LongSet other) + { + if (other == null || other.isEmpty() || this == other) { + return false; + } + + boolean res = firstIndices.addAll(other.firstIndices); + if (other.otherIndices.isEmpty()) { + return res; + } + if (otherIndices.isEmpty()) { + for (Entry e : other.otherIndices.entrySet()) { + otherIndices.put(e.getKey(), e.getValue().clone()); + } + return true; + } + Iterator> itr1 = new ArrayList>(otherIndices.entrySet()).iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return res; + } + } + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + otherIndices.put(e2.getKey(), e2.getValue().clone()); + while (itr2.hasNext()) { + e2 = itr2.next(); + otherIndices.put(e2.getKey(), e2.getValue().clone()); + } + return true; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c > 0) { + otherIndices.put(e2.getKey(), e2.getValue().clone()); + res = true; + } else if (c == 0) { + res |= e1.getValue().addAll(e2.getValue()); + } + } + } + + /** + * Generates the union set + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #addAll(LongSet) + */ + @SuppressWarnings("null") + public LongSet union(LongSet other) + { + if (other == null || other.isEmpty() || this == other) { + return clone(); + } + if (isEmpty()) { + return other.clone(); + } + + LongSet res = new LongSet(firstIndices.union(other.firstIndices), new TreeMap()); + if (other.otherIndices.isEmpty()) { + for (Entry e : otherIndices.entrySet()) { + res.otherIndices.put(e.getKey(), e.getValue().clone()); + } + return res; + } + if (otherIndices.isEmpty()) { + for (Entry e : other.otherIndices.entrySet()) { + res.otherIndices.put(e.getKey(), e.getValue().clone()); + } + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + if (c != 0) { + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } + while (itr2.hasNext()) { + e2 = itr2.next(); + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + while (itr1.hasNext()) { + e1 = itr1.next(); + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c < 0) { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } else if (c > 0) { + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } else { + res.otherIndices.put(e1.getKey(), e1.getValue().union(e2.getValue())); + } + } + } + + /** + * Removes from this set all of its elements that are contained in the + * specified collection. + * + * @param other collection containing elements to be removed from this set + * + * @return true if this set changed as a result of the call + * + * @throws NullPointerException if this set contains a null element and the specified + * collection does not permit null elements (optional), or if + * the specified collection is null + * @see #remove(long) + * @see #contains(long) + */ + @SuppressWarnings("null") + public boolean removeAll(LongSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return false; + } + if (this == other) { + clear(); + return true; + } + + boolean res = firstIndices.removeAll(other.firstIndices); + if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c == 0) { + res |= e1.getValue().removeAll(e2.getValue()); + if (e1.getValue().isEmpty()) { + itr1.remove(); + } + } + } + } + + /** + * Generates the difference set + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #removeAll(LongSet) + */ + @SuppressWarnings("null") + public LongSet difference(LongSet other) + { + if (other == null || other.isEmpty()) { + return clone(); + } + if (isEmpty() || this == other) { + return empty(); + } + + LongSet res = new LongSet(firstIndices.difference(other.firstIndices), new TreeMap()); + if (otherIndices.isEmpty()) { + return res; + } + if (other.otherIndices.isEmpty()) { + for (Entry e : otherIndices.entrySet()) { + res.otherIndices.put(e.getKey(), e.getValue().clone()); + } + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + while (itr1.hasNext()) { + e1 = itr1.next(); + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c < 0) { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } else if (c == 0) { + IntSet s = e1.getValue().difference(e2.getValue()); + if (!s.isEmpty()) { + res.otherIndices.put(e1.getKey(), s); + } + } + } + } + + /** + * Generates the symmetric difference set + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #flip(long) + */ + @SuppressWarnings("null") + public LongSet symmetricDifference(LongSet other) + { + if (other == null || other.isEmpty() || this == other) { + return clone(); + } + if (isEmpty()) { + return other.clone(); + } + + LongSet res = new LongSet(firstIndices.symmetricDifference(other.firstIndices), new TreeMap()); + if (other.otherIndices.isEmpty()) { + for (Entry e : otherIndices.entrySet()) { + res.otherIndices.put(e.getKey(), e.getValue().clone()); + } + return res; + } + if (otherIndices.isEmpty()) { + for (Entry e : other.otherIndices.entrySet()) { + res.otherIndices.put(e.getKey(), e.getValue().clone()); + } + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + if (c != 0) { + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } + while (itr2.hasNext()) { + e2 = itr2.next(); + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + while (itr1.hasNext()) { + e1 = itr1.next(); + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c < 0) { + res.otherIndices.put(e1.getKey(), e1.getValue().clone()); + } else if (c > 0) { + res.otherIndices.put(e2.getKey(), e2.getValue().clone()); + } else { + res.otherIndices.put(e1.getKey(), e1.getValue().symmetricDifference(e2.getValue())); + } + } + } + + /** + * Generates the complement set. The returned set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @return the complement set + * + * @see LongSet#complement() + */ + public LongSet complemented() + { + LongSet cloned = clone(); + cloned.complement(); + return cloned; + } + + /** + * Complements the current set. The modified set is represented by all the + * elements strictly less than {@link #last()} that do not exist in the + * current set. + * + * @see LongSet#complemented() + */ + public void complement() + { + if (otherIndices.isEmpty()) { + firstIndices.complement(); + return; + } + + // complement the last block + Iterator> itr = otherIndices.descendingMap().entrySet().iterator(); + Entry e = itr.next(); + e.getValue().complement(); + if (e.getValue().isEmpty()) { + itr.remove(); + } + + // complement other blocks + NavigableMap toAdd = new TreeMap(); // avoid concurrent modification + for (long i = e.getKey().longValue() - SUBSET_SIZE; i > 0L; i -= SUBSET_SIZE) { + while (e != null && e.getKey().longValue() > i) { + e = itr.hasNext() ? itr.next() : null; + } + + if (e != null && e.getKey().longValue() == i) { + if (e.getValue().add(SUBSET_SIZE - 1)) { + e.getValue().complement(); + e.getValue().add(SUBSET_SIZE - 1); + } else { + e.getValue().complement(); + } + if (e.getValue().isEmpty()) { + itr.remove(); + } + } else { + IntSet s = firstIndices.empty(); + s.fill(0, SUBSET_SIZE - 1); + toAdd.put(Long.valueOf(i), s); + } + } + otherIndices.putAll(toAdd); + if (firstIndices.add(SUBSET_SIZE - 1)) { + firstIndices.complement(); + firstIndices.add(SUBSET_SIZE - 1); + } else { + firstIndices.complement(); + } + } + + /** + * Computes the intersection set size. + *

+ * This is faster than calling {@link #intersection(LongSet)} and + * then {@link #size()} + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the size + */ + @SuppressWarnings("null") + public long intersectionSize(LongSet other) + { + if (isEmpty() || other == null || other.isEmpty()) { + return 0L; + } + if (this == other) { + return size(); + } + + long res = firstIndices.intersectionSize(other.firstIndices); + if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { + return res; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return res; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return res; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c == 0) { + res += e1.getValue().intersectionSize(e2.getValue()); + } + } + } + + /** + * Computes the union set size. + *

+ * This is faster than calling {@link #union(LongSet)} and then + * {@link #size()} + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the size + */ + public long unionSize(LongSet other) + { + return other == null ? size() : size() + other.size() - intersectionSize(other); + } + + /** + * Computes the symmetric difference set size. + *

+ * This is faster than calling {@link #symmetricDifference(LongSet)} + * and then {@link #size()} + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the size + */ + public long symmetricDifferenceSize(LongSet other) + { + return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); + } + + /** + * Computes the difference set size. + *

+ * This is faster than calling {@link #difference(LongSet)} and then + * {@link #size()} + * + * @param other {@link LongSet} instance that represents the right + * operand + * + * @return the size + */ + public long differenceSize(LongSet other) + { + return other == null ? size() : size() - intersectionSize(other); + } + + /** + * Computes the complement set size. + *

+ * This is faster than calling {@link #complemented()} and then + * {@link #size()} + * + * @return the size + */ + public long complementSize() + { + if (isEmpty()) { + return 0L; + } + return last() - size() + 1L; + } + + /** + * Generates an empty set + * + * @return the empty set + */ + public LongSet empty() + { + return new LongSet(firstIndices.empty(), new TreeMap()); + } + + /** + * See the clone() of {@link Object} + * + * @return cloned object + */ + @Override + public LongSet clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + NavigableMap otherIndicesClone = new TreeMap(); + for (Entry e : otherIndices.entrySet()) { + otherIndicesClone.put(e.getKey(), e.getValue().clone()); + } + return new LongSet(firstIndices.clone(), otherIndicesClone); + } + + /** + * Computes the compression factor of the equivalent bitmap representation + * (1 means not compressed, namely a memory footprint similar to + * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) + * + * @return the compression factor + */ + public double bitmapCompressionRatio() + { + //TODO + throw new RuntimeException("TODO"); + } + + /** + * Computes the compression factor of the equivalent integer collection (1 + * means not compressed, namely a memory footprint similar to + * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) + * + * @return the compression factor + */ + public double collectionCompressionRatio() + { + //TODO + throw new RuntimeException("TODO"); + } + + /** + * @return a {@link ExtendedLongIterator} instance to iterate over the set + */ + public ExtendedLongIterator longIterator() + { + return new ExtendedLongIterator(); + } + + /** + * @return a {@link ExtendedLongIterator} instance to iterate over the set in + * descending order + */ + public ExtendedLongIterator descendingLongIterator() + { + return new ReverseLongIterator(); + } + + /** + * {@inheritDoc} + */ + @Override + public Iterator iterator() + { + return new Iterator() + { + final ExtendedLongIterator itr = longIterator(); + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public Long next() {return Long.valueOf(itr.next());} + + @Override + public void remove() {itr.remove();} + }; + } + + /** + * Prints debug info about the given {@link LongSet} implementation + * + * @return a string that describes the internal representation of the + * instance + */ + public String debugInfo() + { + StringBuilder s = new StringBuilder(); + + s.append("elements: "); + s.append(toString()); + s.append("\nfirstIndices: " + firstIndices); + s.append('\n'); + s.append("otherIndices: " + otherIndices.size()); + s.append('\n'); + for (Entry e : otherIndices.entrySet()) { + s.append('\t'); + s.append(e.getKey()); + s.append(", "); + s.append(e.getValue()); + s.append('\n'); + } + + return s.toString(); + } + + /** + * Adds to the set all the elements between first and + * last, both included. + * + * @param from first element + * @param to last element + */ + public void fill(long from, long to) + { + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + if (from == to) { + add(from); + return; + } + + final long firstBlockIndex = (from / SUBSET_SIZE) * SUBSET_SIZE; + final long lastBlockIndex = (to / SUBSET_SIZE) * SUBSET_SIZE; + if (firstBlockIndex == lastBlockIndex) { + // Case 1: One block + if (firstBlockIndex == 0L) { + firstIndices.fill((int) from, (int) to); + } else { + IntSet s = otherIndices.get(firstBlockIndex); + if (s == null) { + otherIndices.put(firstBlockIndex, s = firstIndices.empty()); + } + s.fill((int) (from - firstBlockIndex), (int) (to - firstBlockIndex)); + } + } else { + // Case 2: Multiple blocks + // Handle first block + if (firstBlockIndex == 0L) { + firstIndices.fill((int) from, SUBSET_SIZE - 1); + } else { + IntSet s = otherIndices.get(firstBlockIndex); + if (s == null) { + otherIndices.put(firstBlockIndex, s = firstIndices.empty()); + } + s.fill((int) (from - firstBlockIndex), SUBSET_SIZE - 1); + } + + // Handle intermediate words, if any + for (long i = firstBlockIndex + SUBSET_SIZE; i < lastBlockIndex; i += SUBSET_SIZE) { + IntSet s = firstIndices.empty(); + s.fill(0, SUBSET_SIZE - 1); + otherIndices.put(Long.valueOf(i), s); + } + + // Handle last word + IntSet s = otherIndices.get(lastBlockIndex); + if (s == null) { + otherIndices.put(lastBlockIndex, s = firstIndices.empty()); + } + s.fill(0, (int) (to - lastBlockIndex)); + } + } + + /** + * Removes from the set all the elements between first and + * last, both included. + * + * @param from first element + * @param to last element + */ + public void clear(long from, long to) + { + if (from > to) { + throw new IndexOutOfBoundsException("from: " + from + " > to: " + to); + } + if (from == to) { + remove(from); + return; + } + + final long firstBlockIndex = (from / SUBSET_SIZE) * SUBSET_SIZE; + final long lastBlockIndex = (to / SUBSET_SIZE) * SUBSET_SIZE; + if (firstBlockIndex == lastBlockIndex) { + // Case 1: One block + if (firstBlockIndex == 0L) { + firstIndices.clear((int) from, (int) to); + } else { + IntSet s = otherIndices.get(firstBlockIndex); + if (s != null) { + s.clear((int) (from - firstBlockIndex), (int) (to - firstBlockIndex)); + if (s.isEmpty()) { + otherIndices.remove(firstBlockIndex); + } + } + } + } else { + // Case 2: Multiple blocks + // Handle first block + if (firstBlockIndex == 0L) { + firstIndices.clear((int) from, SUBSET_SIZE - 1); + } else { + IntSet s = otherIndices.get(firstBlockIndex); + if (s != null) { + s.clear((int) (from - firstBlockIndex), SUBSET_SIZE - 1); + if (s.isEmpty()) { + otherIndices.remove(firstBlockIndex); + } + } + } + + // Handle intermediate words, if any + for (long i = firstBlockIndex + SUBSET_SIZE; i < lastBlockIndex; i += SUBSET_SIZE) { + otherIndices.remove(Long.valueOf(i)); + } + + // Handle last word + IntSet s = otherIndices.get(lastBlockIndex); + if (s != null) { + s.clear(0, (int) (to - lastBlockIndex)); + if (s.isEmpty()) { + otherIndices.remove(lastBlockIndex); + } + } + } + } + + /** + * Adds the element if it not existing, or removes it if existing + * + * @param e element to flip + * + * @see #symmetricDifference(LongSet) + */ + public void flip(long e) + { + if (e < SUBSET_SIZE) { + firstIndices.flip((int) e); + return; + } + + final long block = (e / SUBSET_SIZE) * SUBSET_SIZE; + IntSet s = otherIndices.get(block); + if (s == null) { + otherIndices.put(block, s = firstIndices.empty()); + } + s.flip((int) (e - block)); + if (s.isEmpty()) { + otherIndices.remove(block); + } + } + + /** + * Gets the ith element of the set + * + * @param index position of the element in the sorted set + * + * @return the ith element of the set + * + * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to + * {@link #size()} + */ + public long get(long index) + { + if (index < firstIndices.size()) { + return firstIndices.get((int) index); + } + + index -= firstIndices.size(); + for (Entry e : otherIndices.entrySet()) { + if (index < e.getValue().size()) { + return e.getKey().longValue() + e.getValue().get((int) index); + } + index -= e.getValue().size(); + } + throw new IndexOutOfBoundsException(Long.toString(index)); + } + + /** + * Provides position of element within the set. + *

+ * It returns -1 if the element does not exist within the set. + * + * @param i element of the set + * + * @return the element position + */ + public long indexOf(long i) + { + if (i < SUBSET_SIZE) { + return firstIndices.indexOf((int) i); + } + long prev = firstIndices.size(); + for (Entry e : otherIndices.entrySet()) { + if (i < e.getKey().longValue() + SUBSET_SIZE) { + return prev + e.getValue().indexOf((int) (i - e.getKey().longValue())); + } + prev += e.getValue().size(); + } + return -1L; + } + + /** + * Converts a given array into an instance of the current class. + * + * @param a array to use to generate the new instance + * + * @return the converted collection + */ + public LongSet convert(long... a) + { + LongSet res = empty(); + if (a != null) { + a = Arrays.copyOf(a, a.length); + Arrays.sort(a); + for (long i : a) { + res.add(i); + } + } + return res; + } + + /** + * Converts a given array into an instance of the current class. + * + * @param a array to use to generate the new instance + * + * @return the converted collection + */ + public LongSet convert(Collection a) + { + LongSet res = empty(); + Collection sorted; + if (a != null) { + if (a instanceof SortedSet && ((SortedSet) a).comparator() == null) { + sorted = a; + } else { + sorted = new ArrayList(a); + Collections.sort((List) sorted); + } + for (long i : sorted) { + res.add(i); + } + } + return res; + } + + /** + * Returns the first (lowest) element currently in this set. + * + * @return the first (lowest) element currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public long first() + { + if (!firstIndices.isEmpty()) { + return firstIndices.first(); + } + if (otherIndices.isEmpty()) { + throw new NoSuchElementException(); + } + Entry e = otherIndices.firstEntry(); + return e.getKey().longValue() + e.getValue().first(); + } + + /** + * Returns the last (highest) element currently in this set. + * + * @return the last (highest) element currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public long last() + { + if (otherIndices.isEmpty() && firstIndices.isEmpty()) { + throw new NoSuchElementException(); + } + if (!otherIndices.isEmpty()) { + Entry e = otherIndices.lastEntry(); + return e.getKey().longValue() + e.getValue().last(); + } + return firstIndices.last(); + } + + /** + * @return the number of elements in this set (its cardinality) + */ + public long size() + { + long res = firstIndices.size(); + for (Entry e : otherIndices.entrySet()) { + res += e.getValue().size(); + } + return res; + } + + /** + * @return true if this set contains no elements + */ + public boolean isEmpty() + { + return firstIndices.isEmpty() && otherIndices.isEmpty(); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return 31 * firstIndices.hashCode() + otherIndices.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof LongSet)) { + return false; + } + final LongSet other = (LongSet) obj; + return firstIndices.equals(other.firstIndices) + && otherIndices.equals(other.otherIndices); + } + + /** + * Returns true if this set contains the specified element. + * + * @param i element whose presence in this set is to be tested + * + * @return true if this set contains the specified element + */ + public boolean contains(long i) + { + if (i < SUBSET_SIZE) { + return firstIndices.contains((int) i); + } + long first = (i / SUBSET_SIZE) * SUBSET_SIZE; + IntSet s = otherIndices.get(first); + if (s == null) { + return false; + } + return s.contains((int) (i - first)); + } + + /** + * Adds the specified element to this set if it is not already present. It + * ensures that sets never contain duplicate elements. + * + * @param i element to be added to this set + * + * @return true if this set did not already contain the specified + * element + * + * @throws IllegalArgumentException if some property of the specified element prevents it from + * being added to this set + */ + public boolean add(long i) + { + if (i < SUBSET_SIZE) { + return firstIndices.add((int) i); + } + long first = (i / SUBSET_SIZE) * SUBSET_SIZE; + IntSet s = otherIndices.get(first); + if (s == null) { + otherIndices.put(first, s = firstIndices.empty()); + } + return s.add((int) (i - first)); + } + + /** + * Removes the specified element from this set if it is present. + * + * @param i object to be removed from this set, if present + * + * @return true if this set contained the specified element + * + * @throws UnsupportedOperationException if the remove operation is not supported by this set + */ + public boolean remove(long i) + { + if (i < SUBSET_SIZE) { + return firstIndices.remove((int) i); + } + long first = (i / SUBSET_SIZE) * SUBSET_SIZE; + IntSet s = otherIndices.get(first); + if (s == null) { + return false; + } + boolean res = s.remove((int) (i - first)); + if (res && s.isEmpty()) { + otherIndices.remove(first); + } + return res; + } + + /** + * Returns true if this set contains all of the elements of the + * specified collection. + * + * @param other collection to be checked for containment in this set + * + * @return true if this set contains all of the elements of the + * specified collection + * + * @throws NullPointerException if the specified collection contains one or more null + * elements and this set does not permit null elements + * (optional), or if the specified collection is null + * @see #contains(long) + */ + @SuppressWarnings("null") + public boolean containsAll(LongSet other) + { + if (other == null || other.isEmpty() || other == this) { + return true; + } + if (isEmpty()) { + return false; + } + + if (!firstIndices.containsAll(other.firstIndices)) { + return false; + } + if (other.otherIndices.isEmpty()) { + return true; + } + if (otherIndices.isEmpty()) { + return false; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return c == 0 && !itr2.hasNext(); + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return true; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c > 0) { + return false; + } else if (c == 0) { + if (!e1.getValue().containsAll(e2.getValue())) { + return false; + } + } + } + } + + /** + * Returns true if the specified {@link LongSet} + * instance contains any elements that are also contained within this + * {@link LongSet} instance + * + * @param other {@link LongSet} to intersect with + * + * @return a boolean indicating whether this {@link LongSet} + * intersects the specified {@link LongSet}. + */ + @SuppressWarnings("null") + public boolean containsAny(LongSet other) + { + if (other == null || other.isEmpty() || other == this) { + return true; + } + if (isEmpty()) { + return false; + } + + if (firstIndices.containsAny(other.firstIndices) && !other.firstIndices.isEmpty()) { + return true; + } + if (other.otherIndices.isEmpty() || otherIndices.isEmpty()) { + return false; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return false; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return false; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c == 0 && e1.getValue().containsAny(e2.getValue())) { + return true; + } + } + } + + /** + * Returns true if the specified {@link LongSet} + * instance contains at least minElements elements that are + * also contained within this {@link LongSet} instance + * + * @param other {@link LongSet} instance to intersect with + * @param minElements minimum number of elements to be contained within this + * {@link LongSet} instance + * + * @return a boolean indicating whether this {@link LongSet} + * intersects the specified {@link LongSet}. + * + * @throws IllegalArgumentException if minElements < 1 + */ + @SuppressWarnings("null") + public boolean containsAtLeast(LongSet other, long minElements) + { + if (minElements < 1) { + throw new IllegalArgumentException(); + } + if (this == other) { + return size() >= minElements; + } + if (other == null || other.isEmpty() || isEmpty() || size() < minElements) { + return false; + } + + long res = firstIndices.intersectionSize(other.firstIndices); + if (res >= minElements) { + return true; + } + if (otherIndices.isEmpty() || other.otherIndices.isEmpty()) { + return false; + } + Iterator> itr1 = otherIndices.entrySet().iterator(); + Iterator> itr2 = other.otherIndices.entrySet().iterator(); + Entry e1 = null; + Entry e2 = null; + int c = 0; + while (true) { + if (c <= 0) { + if (itr1.hasNext()) { + e1 = itr1.next(); + } else { + return false; + } + } + if (c >= 0) { + if (itr2.hasNext()) { + e2 = itr2.next(); + } else { + return false; + } + } + + c = e1.getKey().compareTo(e2.getKey()); + if (c == 0) { + res += e1.getValue().intersectionSize(e2.getValue()); + if (res >= minElements) { + return true; + } + } + } + } + + /** + * Removes all of the elements from this set. The set will be empty after + * this call returns. + */ + public void clear() + { + firstIndices.clear(); + otherIndices.clear(); + } + + /** + * @return an array containing all the elements in this set, in the same + * order. + */ + public long[] toArray() + { + if (isEmpty()) { + return null; + } + return toArray(new long[(int) size()]); + } + + /** + * Returns an array containing all of the elements in this set. + *

+ * If this set fits in the specified array with room to spare (i.e., the + * array has more elements than this set), the element in the array + * immediately following the end of the set are left unchanged. + * + * @param a the array into which the elements of this set are to be + * stored. + * + * @return the array containing all the elements in this set + * + * @throws NullPointerException if the specified array is null + * @throws IllegalArgumentException if this set does not fit in the specified array + */ + public long[] toArray(long[] a) + { + if (a.length < size()) { + throw new IllegalArgumentException(); + } + if (isEmpty()) { + return a; + } + ExtendedLongIterator itr = longIterator(); + int i = 0; + while (itr.hasNext()) { + a[i++] = itr.next(); + } + return a; + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + ExtendedLongIterator itr = longIterator(); + if (!itr.hasNext()) { + return "[]"; + } + + StringBuilder sb = new StringBuilder(); + sb.append('['); + for (; ; ) { + long e = itr.next(); + sb.append(e); + if (!itr.hasNext()) { + return sb.append(']').toString(); + } + sb.append(", "); + } + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(LongSet o) + { + //TODO + throw new RuntimeException("TODO"); + } + + /** + * A {@link Iterator} -like interface that allows to "skip" some elements of the set + */ + public class ExtendedLongIterator + { + /** + * @uml.property name="itr" + * @uml.associationEnd + */ + protected IntIterator itr; + protected Iterator> otherItrs; + protected long first = 0; + /** + * @uml.property name="current" + * @uml.associationEnd + */ + protected IntSet current = null; + + private ExtendedLongIterator() + { + itr = firstIndices.iterator(); + otherItrs = otherIndices.entrySet().iterator(); + first = 0; + } + + protected void nextItr() + { + Entry e = otherItrs.next(); + current = e.getValue(); + itr = e.getValue().iterator(); + first = e.getKey().longValue(); + } + + /** + * @return true if the iterator has more elements. + */ + public boolean hasNext() + { + return otherItrs.hasNext() || itr.hasNext(); + } + + /** + * @return the next element in the iteration. + * + * @throws NoSuchElementException iteration has no more elements. + */ + public long next() + { + if (!itr.hasNext()) { + nextItr(); + } + return first + itr.next(); + } + + /** + * Removes from the underlying collection the last element returned by + * the iterator (optional operation). This method can be called only + * once per call to next. The behavior of an iterator is + * unspecified if the underlying collection is modified while the + * iteration is in progress in any way other than by calling this + * method. + * + * @throws UnsupportedOperationException if the remove operation is not supported by + * this Iterator. + * @throws IllegalStateException if the next method has not yet been called, + * or the remove method has already been called + * after the last call to the next method. + */ + public void remove() + { + itr.remove(); + if (current != null && current.isEmpty()) { + otherItrs.remove(); + } + } + + /** + * Skips all the elements before the the specified element, so that + * {@link #next()} gives the given element or, if it does not exist, the + * element immediately after according to the sorting provided by this + * set. + *

+ * If element is less than the next element, it does + * nothing + * + * @param element first element to not skip + */ + public void skipAllBefore(long element) + { + while (element >= first + SUBSET_SIZE) { + if (otherItrs.hasNext()) { + nextItr(); + } else { + itr.skipAllBefore(SUBSET_SIZE - 1); // no next + assert !itr.hasNext(); + return; + } + } + if (element < first) { + return; + } + itr.skipAllBefore((int) (element - first)); + } + } + + /** + * Iteration over the union of all indices, reverse order + */ + private class ReverseLongIterator extends ExtendedLongIterator + { + private ReverseLongIterator() + { + super(); + otherItrs = otherIndices.descendingMap().entrySet().iterator(); + nextItr(); + } + + @Override + protected void nextItr() + { + if (otherItrs.hasNext()) { + Entry e = otherItrs.next(); + current = e.getValue(); + itr = e.getValue().descendingIterator(); + first = e.getKey().longValue(); + } else { + itr = firstIndices.descendingIterator(); + current = null; + first = 0; + } + } + + @Override + public void skipAllBefore(long element) + { + while (element <= first) { + nextItr(); + } + if (element > first + SUBSET_SIZE) { + return; + } + itr.skipAllBefore((int) (element - first)); + } + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/BinaryMatrix.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/BinaryMatrix.java new file mode 100755 index 00000000000..3c1529e204b --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/BinaryMatrix.java @@ -0,0 +1,2052 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers.matrix; + +import io.druid.extendedset.intset.IntSet; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Formatter; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * Very similar to {@link IntSet} but for pairs of ints, that is a binary matrix + * + * @author Alessandro Colantonio + * @version $Id$ + * @see IntSet + */ +public class BinaryMatrix implements Cloneable, Comparable +{ + /** + * set of all rows + */ + private final List rows = new ArrayList(); + + /** + * {@link IntSet} instance to create empty rows + * + * @uml.property name="template" + * @uml.associationEnd + */ + private final IntSet template; + + /** + * used to cache the returned value + */ + private final int[] resultCache = new int[2]; + + /** + * Creates an empty matrix. The matrix is internally represented by putting + * rows (transactions) in sequence. The provided constructor allows to + * specify which {@link IntSet} instance must be used to internally + * represent rows. + * + * @param template {@link IntSet} instance to create empty rows + */ + public BinaryMatrix(IntSet template) + { + this.template = template; + } + + /** + * @return {@link IntSet} instance internally used to represent rows + */ + public IntSet emptyRow() + { + return template.empty(); + } + + /** + * Remove null cells at the end of {@link #rows} + */ + private void fixRows() + { + int last = rows.size() - 1; + while (last >= 0 && rows.get(last) == null) { + rows.remove(last--); + } + } + + /** + * Generates the intersection matrix + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #retainAll(BinaryMatrix) + */ + public BinaryMatrix intersection(BinaryMatrix other) + { + BinaryMatrix res = empty(); + final int rowCount = Math.min(rows.size(), other.rows.size()); + for (int i = 0; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null || s2 == null) { + res.rows.add(null); + } else { + IntSet r = s1.intersection(s2); + if (r.isEmpty()) { + res.rows.add(null); + } else { + res.rows.add(r); + } + } + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + res.fixRows(); + return res; + } + + /** + * Generates the union matrix + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #addAll(BinaryMatrix) + */ + public BinaryMatrix union(BinaryMatrix other) + { + BinaryMatrix res = empty(); + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null) { + if (s2 == null) { + res.rows.add(null); + } else { + res.rows.add(s2.clone()); + } + } else { + if (s2 == null) { + res.rows.add(s1.clone()); + } else { + res.rows.add(s1.union(s2)); + } + } + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + for (; i < rows.size(); i++) { + IntSet s = rows.get(i); + res.rows.add(s == null ? null : s.clone()); + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + for (; i < other.rows.size(); i++) { + IntSet s = other.rows.get(i); + res.rows.add(s == null ? null : s.clone()); + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + return res; + } + + /** + * Generates the difference matrix + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #removeAll(BinaryMatrix) + */ + public BinaryMatrix difference(BinaryMatrix other) + { + BinaryMatrix res = empty(); + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null) { + res.rows.add(null); + } else { + if (s2 == null) { + res.rows.add(s1.clone()); + } else { + IntSet r = s1.difference(s2); + res.rows.add(r.isEmpty() ? null : r); + } + } + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + for (; i < rows.size(); i++) { + IntSet s = rows.get(i); + res.rows.add(s == null ? null : s.clone()); + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + res.fixRows(); + return res; + } + + /** + * Generates the symmetric difference matrix + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the result of the operation + * + * @see #flip(int, int) + */ + public BinaryMatrix symmetricDifference(BinaryMatrix other) + { + BinaryMatrix res = empty(); + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null) { + if (s2 == null) { + res.rows.add(null); + } else { + res.rows.add(s2.clone()); + } + } else { + if (s2 == null) { + res.rows.add(s1.clone()); + } else { + res.rows.add(s1.symmetricDifference(s2)); + } + } + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + for (; i < rows.size(); i++) { + IntSet s = rows.get(i); + res.rows.add(s == null ? null : s.clone()); + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + for (; i < other.rows.size(); i++) { + IntSet s = other.rows.get(i); + res.rows.add(s == null ? null : s.clone()); + assert res.rows.get(i) == null || !res.rows.get(i).isEmpty(); + } + res.fixRows(); + return res; + } + + /** + * Generates the complement matrix, namely flipping all the cells. + * + * @return the complement matrix + * + * @see BinaryMatrix#complement() + */ + public BinaryMatrix complemented() + { + BinaryMatrix res = empty(); + + final int maxCol = maxCol(); + + for (int i = 0; i < rows.size(); i++) { + IntSet s = rows.get(i); + + if (s == null) { + s = template.empty(); + s.fill(0, maxCol); + } else { + s.add(maxCol + 1); + s.complemented(); + if (s.isEmpty()) { + s = null; + } + } + + res.rows.add(s); + } + + res.fixRows(); + return res; + } + + /** + * Complements the current matrix. + * + * @see BinaryMatrix#complemented() + */ + public void complement() + { + final int maxCol = maxCol(); + + for (int i = 0; i < rows.size(); i++) { + IntSet s = rows.get(i); + + if (s == null) { + s = template.empty(); + s.fill(0, maxCol - 1); + rows.set(i, s); + } else { + s.add(maxCol + 1); + s.complement(); + if (s.isEmpty()) { + rows.set(i, null); + } + } + } + + fixRows(); + } + + /** + * Returns true if the specified {@link BinaryMatrix} instance + * contains any cell that is also contained within this {@link BinaryMatrix} + * instance + * + * @param other {@link BinaryMatrix} to intersect with + * + * @return a boolean indicating whether this {@link BinaryMatrix} intersects + * the specified {@link BinaryMatrix}. + */ + public boolean containsAny(BinaryMatrix other) + { + final int rowCount = Math.min(rows.size(), other.rows.size()); + for (int i = 0; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 != null && s2 != null) { + if (s1.containsAny(s2)) { + return true; + } + } + } + return false; + } + + /** + * Returns true if the specified {@link BinaryMatrix} instance + * contains at least minElements cells that are also contained + * within this {@link BinaryMatrix} instance + * + * @param other {@link BinaryMatrix} instance to intersect with + * @param minCells minimum number of cells to be contained within this + * {@link BinaryMatrix} instance + * + * @return a boolean indicating whether this {@link BinaryMatrix} intersects + * the specified {@link BinaryMatrix}. + * + * @throws IllegalArgumentException if minElements < 1 + */ + public boolean containsAtLeast(BinaryMatrix other, int minCells) + { + // special cases + if (minCells < 1) { + throw new IllegalArgumentException(); + } + int size = size(); + if ((size < minCells) || other == null || other.isEmpty() || isEmpty()) { + return false; + } + if (this == other) { + return size >= minCells; + } + + // exact count before the last row + int res = 0; + final int last = Math.min(rows.size(), other.rows.size()) - 1; + for (int i = 0; i < last; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 != null && s2 != null) { + res += s1.intersectionSize(s2); + if (res >= minCells) { + return true; + } + } + } + + // last row more efficient! + IntSet l1 = rows.get(last); + IntSet l2 = other.rows.get(last); + if (l1 == null || l2 == null) { + return false; + } + return l1.containsAtLeast(l2, minCells - res); + } + + /** + * Computes the intersection matrix size. + *

+ * This is faster than calling {@link #intersection(BinaryMatrix)} and then + * {@link #size()} + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the size + */ + public int intersectionSize(BinaryMatrix other) + { + int res = 0; + final int rowCount = Math.min(rows.size(), other.rows.size()); + for (int i = 0; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 != null && s2 != null) { + res += s1.intersectionSize(s2); + } + } + return res; + } + + /** + * Computes the union matrix size. + *

+ * This is faster than calling {@link #union(BinaryMatrix)} and then + * {@link #size()} + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the size + */ + public int unionSize(BinaryMatrix other) + { + return other == null ? size() : size() + other.size() - intersectionSize(other); + } + + /** + * Computes the symmetric difference matrix size. + *

+ * This is faster than calling {@link #symmetricDifference(BinaryMatrix)} + * and then {@link #size()} + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the size + */ + public int symmetricDifferenceSize(BinaryMatrix other) + { + return other == null ? size() : size() + other.size() - 2 * intersectionSize(other); + } + + /** + * Computes the difference matrix size. + *

+ * This is faster than calling {@link #difference(BinaryMatrix)} and then + * {@link #size()} + * + * @param other {@link BinaryMatrix} instance that represents the right + * operand + * + * @return the size + */ + public int differenceSize(BinaryMatrix other) + { + return other == null ? size() : size() - intersectionSize(other); + } + + /** + * Computes the complement set size. + *

+ * This is faster than calling {@link #complemented()} and then + * {@link #size()} + * + * @return the size + */ + public int complementSize() + { + final int maxCol = maxCol(); + int res = 0; + for (int i = 0; i < rows.size(); i++) { + IntSet s = rows.get(i); + res += maxCol + 1; + if (s != null) { + res -= s.size(); + } + } + return res; + } + + /** + * Generates an empty matrix of the same dimension + * + * @return the empty matrix + */ + public BinaryMatrix empty() + { + return new BinaryMatrix(template); + } + + /** + * See the clone() of {@link Object} + * + * @return cloned object + */ + @Override + public BinaryMatrix clone() + { + BinaryMatrix res = empty(); + for (IntSet r : rows) { + res.rows.add(r == null ? null : r.clone()); + } + return res; + } + + /** + * Computes the compression factor of the equivalent bitmap representation + * (1 means not compressed, namely a memory footprint similar to + * {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.) + * + * @return the compression factor + */ + public double bitmapCompressionRatio() + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * Computes the compression factor of the equivalent integer collection (1 + * means not compressed, namely a memory footprint similar to + * {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.) + * + * @return the compression factor + */ + public double collectionCompressionRatio() + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * @return a {@link CellIterator} instance to iterate over the matrix + */ + public CellIterator iterator() + { + if (isEmpty()) { + return new CellIterator() + { + @Override + public boolean hasNext() {return false;} + + @Override + public int[] next() {throw new NoSuchElementException();} + + @Override + public void remove() {throw new IllegalStateException();} + + @Override + public void skipAllBefore(int row, int col) {return;} + }; + } + + return new CellIterator() + { + private final int[] itrResultCache = new int[2]; + int curRow = 0; + IntSet.IntIterator curRowItr; + + { + while (rows.get(curRow) == null) { + curRow++; + } + curRowItr = rows.get(curRow).iterator(); + itrResultCache[0] = curRow; + } + + @Override + public int[] next() + { + if (!curRowItr.hasNext()) { + IntSet s; + while ((s = rows.get(++curRow)) == null) {/**/} + curRowItr = s.iterator(); + itrResultCache[0] = curRow; + } + itrResultCache[1] = curRowItr.next(); + return itrResultCache; + } + + @Override + public boolean hasNext() + { + return curRow < rows.size() - 1 || curRowItr.hasNext(); + } + + @Override + public void skipAllBefore(int row, int col) + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + @Override + public void remove() + { + throw new UnsupportedOperationException("TODO"); //TODO + } + }; + } + + /** + * @return a {@link CellIterator} instance to iterate over the matrix in + * descending order + */ + public CellIterator descendingIterator() + { + if (isEmpty()) { + return new CellIterator() + { + @Override + public boolean hasNext() {return false;} + + @Override + public int[] next() {throw new NoSuchElementException();} + + @Override + public void remove() {throw new IllegalStateException();} + + @Override + public void skipAllBefore(int row, int col) {return;} + }; + } + + return new CellIterator() + { + final int minRow; + private final int[] itrResultCache = new int[2]; + int curRow = rows.size() - 1; + IntSet.IntIterator curRowItr; + + { + int m = 0; + while (rows.get(m) == null) { + m++; + } + minRow = m; + curRowItr = rows.get(curRow).descendingIterator(); + itrResultCache[0] = curRow; + } + + @Override + public int[] next() + { + if (!curRowItr.hasNext()) { + IntSet s; + while ((s = rows.get(--curRow)) == null) {/**/} + curRowItr = s.descendingIterator(); + itrResultCache[0] = curRow; + } + itrResultCache[1] = curRowItr.next(); + return itrResultCache; + } + + @Override + public boolean hasNext() + { + return curRow > minRow || curRowItr.hasNext(); + } + + @Override + public void skipAllBefore(int row, int col) + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + @Override + public void remove() + { + throw new UnsupportedOperationException("TODO"); //TODO + } + }; + } + + /** + * Prints debug info about the given {@link BinaryMatrix} implementation + * + * @return a string that describes the internal representation of the + * instance + */ + public String debugInfo() + { + if (isEmpty()) { + return "empty"; + } + + StringBuilder s = new StringBuilder(); + Formatter f = new Formatter(s); + + String format = String.format("%%%dd) ", (int) Math.log10(rows.size()) + 1); + for (int i = 0; i < rows.size(); i++) { + f.format(format, i); + s.append(rows.get(i) == null ? "-" : rows.get(i).toString()); + s.append('\n'); + } + + return s.toString(); + } + + /** + * Adds to the matrix all the cells of the specified sub-matrix, both + * corners included. + * + * @param fromRow first row of the sub-matrix + * @param fromCol first column of the sub-matrix + * @param toRow last row of the sub-matrix + * @param toCol last column of the sub-matrix + */ + public void fill(int fromRow, int fromCol, int toRow, int toCol) + { + if (fromRow > toRow) { + throw new IndexOutOfBoundsException("fromRow: " + fromRow + " > toRow: " + toRow); + } + if (fromCol > toCol) { + throw new IndexOutOfBoundsException("fromCol: " + fromCol + " > toCol: " + toCol); + } + + for (int r = rows.size(); r <= toRow; r++) { + rows.add(null); + } + + for (int r = fromRow; r <= toRow; r++) { + IntSet s = rows.get(r); + if (s == null) { + rows.set(r, s = template.empty()); + } + s.fill(fromCol, toCol); + } + } + + /** + * Removes from the set all the cells of the specified sub-matrix, both + * corners included. + * + * @param fromRow first row of the sub-matrix + * @param fromCol first column of the sub-matrix + * @param toRow last row of the sub-matrix + * @param toCol last column of the sub-matrix + */ + public void clear(int fromRow, int fromCol, int toRow, int toCol) + { + if (fromRow > toRow) { + throw new IndexOutOfBoundsException("fromRow: " + fromRow + " > toRow: " + toRow); + } + if (fromCol > toCol) { + throw new IndexOutOfBoundsException("fromCol: " + fromCol + " > toCol: " + toCol); + } + + for (int r = Math.min(toRow, rows.size() - 1); r >= fromRow; r--) { + IntSet s = rows.get(r); + if (s == null) { + continue; + } + s.clear(fromCol, toCol); + if (s.isEmpty()) { + rows.set(r, null); + } + } + fixRows(); + } + + /** + * Adds the cell if it not existing, or removes it if existing + * + * @param row row of the cell to flip + * @param col column of the cell to flip + * + * @see #symmetricDifference(BinaryMatrix) + */ + public void flip(int row, int col) + { + while (row >= rows.size()) { + rows.add(null); + } + IntSet r = rows.get(row); + if (r == null) { + rows.set(row, r = template.empty()); + } + r.flip(col); + if (r.isEmpty()) { + rows.set(row, null); + fixRows(); + } + } + + /** + * Gets the ith cell of the matrix. + * IMPORTANT: each call returns an array of two elements, where the + * first element is the row, while the second element is the column of the + * current cell. In order to reduce the produced heap garbage, there is only + * one array instantiated for each {@link BinaryMatrix} instance, + * whose content is overridden at each method call. + * + * @param i position of the cell in the sorted matrix + * + * @return the ith cell of the matrix, as a pair + * <row,column> + * + * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to + * {@link #size()} + */ + public int[] get(int i) + { + for (int r = 0; r < rows.size(); r++) { + IntSet s = rows.get(r); + if (s == null) { + continue; + } + int ss = s.size(); + if (ss <= i) { + i -= ss; + } else { + resultCache[0] = r; + resultCache[1] = s.get(i); + return resultCache; + } + } + throw new NoSuchElementException(); + } + + /** + * Provides position of cell within the matrix. + *

+ * It returns -1 if the cell does not exist within the set. + * + * @param row row of the cell + * @param col column of the cell + * + * @return the cell position + */ + public int indexOf(int row, int col) + { + if (row >= rows.size() || rows.get(row) == null) { + return -1; + } + int res = rows.get(row).indexOf(col); + if (res == -1) { + return -1; + } + for (int r = 0; r < row; r++) { + IntSet s = rows.get(r); + if (s == null) { + continue; + } + res += s.size(); + } + return res; + } + + /** + * Converts a given matrix of boolean n x m into an instance + * of the current class. + * + * @param a array to use to generate the new instance + * + * @return the converted collection + */ + public BinaryMatrix convert(boolean[][] a) + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * Returns the first (lowest) cell currently in this set. IMPORTANT: + * each call returns an array of two elements, where the first element is + * the row, while the second element is the column of the current cell. In + * order to reduce the produced heap garbage, there is only one array + * instantiated for each {@link BinaryMatrix} instance, whose content is + * overridden at each method call. + * + * @return the first (lowest) cell currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public int[] first() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + + // find the first non-empty row + int i = 0; + IntSet s; + while ((s = rows.get(i)) == null) { + i++; + } + + // prepare the result + resultCache[0] = i; + resultCache[1] = s.first(); + return resultCache; + } + + /** + * Returns the last (highest) cell currently in this set. IMPORTANT: + * each call returns an array of two elements, where the first element is + * the row, while the second element is the column of the current cell. In + * order to reduce the produced heap garbage, there is only one array + * instantiated for each {@link BinaryMatrix} instance, whose content is + * overridden at each method call. + * + * @return the last (highest) cell currently in this set + * + * @throws NoSuchElementException if this set is empty + */ + public int[] last() + { + if (isEmpty()) { + throw new NoSuchElementException(); + } + resultCache[0] = rows.size() - 1; + resultCache[1] = rows.get(rows.size() - 1).last(); + return resultCache; + } + + /** + * @return the number of cells in this matrix (its cardinality) + */ + public int size() + { + int res = 0; + for (IntSet s : rows) { + if (s != null) { + res += s.size(); + } + } + return res; + } + + /** + * @return true if this matrix contains no cells + */ + public boolean isEmpty() + { + return rows.isEmpty(); + } + + /** + * Returns true if this set contains the specified cell. + * + * @param row row of the cell + * @param col column of the cell + * + * @return true if this matrix contains the specified cell + */ + public boolean contains(int row, int col) + { + return row >= 0 && col >= 0 && row < rows.size() + && rows.get(row) != null && rows.get(row).contains(col); + } + + /** + * Adds the specified cell to this matrix if it is not already present. It + * ensures that matrices never contain duplicate cells. + * + * @param row row of the cell + * @param col column of the cell + * + * @return true if this matrix did not already contain the + * specified cell + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean add(int row, int col) + { + while (row >= rows.size()) { + rows.add(null); + } + IntSet r = rows.get(row); + if (r == null) { + rows.set(row, r = template.empty()); + } + return r.add(col); + } + + /** + * Adds the specified cells to this matrix, if not already present. The + * cells are represented by a given row and a set of columns. + * + * @param row index of the row + * @param cols indices of the columns + * + * @return true if this matrix did not already contain the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean addAll(int row, IntSet cols) + { + while (row >= rows.size()) { + rows.add(null); + } + IntSet r = rows.get(row); + if (r == null) { + rows.set(row, r = template.empty()); + } + return r.addAll(cols); + } + + /** + * Adds the specified cells to this matrix, if not already present. The + * cells are represented by a given set of rows and a given column + * + * @param rowSet indices of the rows + * @param col index of the column + * + * @return true if this matrix did not already contain the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean addAll(IntSet rowSet, int col) + { + if (rowSet == null || rowSet.isEmpty()) { + return false; + } + + // prepare the space + final int l = rowSet.last(); + while (l >= rows.size()) { + rows.add(null); + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int r = itr.next(); + IntSet s = rows.get(r); + if (s == null) { + rows.set(r, template.convert(col)); + res = true; + } else { + res |= s.add(col); + } + } + return res; + } + + /** + * Adds the specified cells to this matrix, if not already present. The + * cells are represented by the Cartesian product of a given set of rows and + * columns + * + * @param rowSet indices of the rows + * @param colSet indices of the columns + * + * @return true if this matrix did not already contain the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean addAll(IntSet rowSet, IntSet colSet) + { + if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { + return false; + } + + // prepare the space + final int l = rowSet.last(); + while (l >= rows.size()) { + rows.add(null); + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int row = itr.next(); + IntSet cols = rows.get(row); + if (cols == null) { + IntSet newCols = template.empty(); + newCols.addAll(colSet); + rows.set(row, newCols); + res = true; + } else { + res |= cols.addAll(colSet); + } + } + return res; + } + + /** + * Removes the specified cell from this matrix if it is present. + * + * @param row row of the cell + * @param col column of the cell + * + * @return true if this matrix contained the specified cell + * + * @throws UnsupportedOperationException if the remove operation is not supported by this + * matrix + */ + public boolean remove(int row, int col) + { + if (row < 0 || col < 0 || row >= rows.size()) { + return false; + } + IntSet r = rows.get(row); + if (r == null) { + return false; + } + if (r.remove(col)) { + if (r.isEmpty()) { + rows.set(row, null); + fixRows(); + } + return true; + } + return false; + } + + /** + * Removes the specified cells from this matrix. The cells are represented by + * a given row and a set of columns. + * + * @param row index of the row + * @param cols indices of the columns + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * removed from this matrix + */ + public boolean removeAll(int row, IntSet cols) + { + if (row < 0 || row >= rows.size()) { + return false; + } + IntSet r = rows.get(row); + if (r == null) { + return false; + } + if (r.removeAll(cols)) { + if (r.isEmpty()) { + rows.set(row, null); + fixRows(); + } + return true; + } + return false; + } + + /** + * Removes the specified cells from this matrix. The cells are represented + * by a given set of rows and a given column + * + * @param rowSet indices of the rows + * @param col index of the column + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean removeAll(IntSet rowSet, int col) + { + if (rowSet == null || rowSet.isEmpty()) { + return false; + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int r = itr.next(); + IntSet s = rows.get(r); + if (s == null) { + continue; + } + res |= s.remove(col); + if (s.isEmpty()) { + rows.set(r, null); + } + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Removes the specified cells from this matrix. The cells are represented + * by the Cartesian product of a given set of rows and columns + * + * @param rowSet indices of the rows + * @param colSet indices of the columns + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean removeAll(IntSet rowSet, IntSet colSet) + { + if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { + return false; + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int r = itr.next(); + IntSet s = rows.get(r); + if (s == null) { + continue; + } + res |= s.removeAll(colSet); + if (s.isEmpty()) { + rows.set(r, null); + } + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Retains the specified cells from this matrix. The cells are represented by + * a given row and a set of columns. + * + * @param row index of the row + * @param cols indices of the columns + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * removed from this matrix + */ + public boolean retainAll(int row, IntSet cols) + { + if (isEmpty()) { + return false; + } + if (row < 0 || row >= rows.size()) { + clear(); + return true; + } + + IntSet r = rows.get(row); + if (r == null) { + clear(); + return true; + } + boolean res = false; + for (int i = 0; i < rows.size(); i++) { + if (i == row) { + continue; + } + final IntSet r1 = rows.get(i); + if (r1 != null) { + res = true; + rows.set(i, null); + } + } + res |= r.retainAll(cols); + fixRows(); + return res; + } + + /** + * Removes the specified cells from this matrix. The cells are represented + * by a given set of rows and a given column + * + * @param rowSet indices of the rows + * @param col index of the column + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean retainAll(IntSet rowSet, int col) + { + if (isEmpty()) { + return false; + } + if (rowSet == null || rowSet.isEmpty()) { + clear(); + return false; + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + int i = 0; + int r = itr.next(); + do { + IntSet rr = rows.get(i); + if (rr == null) { + i++; + } else if (i < r) { + rows.set(i, null); + res = true; + i++; + } else if (i > r) { + r = itr.next(); + } else { + if (!rr.contains(col)) { + rows.set(i, null); + res = true; + } else if (rr.size() > 1) { + rr.clear(); + rr.add(col); + res = true; + } + i++; + r = itr.next(); + } + } while (i < rows.size() && itr.hasNext()); + res |= i < rows.size(); + for (; i < rows.size(); i++) { + rows.set(i, null); + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Removes the specified cells from this matrix. The cells are represented + * by the Cartesian product of a given set of rows and columns + * + * @param rowSet indices of the rows + * @param colSet indices of the columns + * + * @return true if this matrix contains at least one of the + * specified cells + * + * @throws IllegalArgumentException if some property of the specified cell prevents it from being + * added to this matrix + */ + public boolean retainAll(IntSet rowSet, IntSet colSet) + { + if (isEmpty()) { + return false; + } + if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { + clear(); + return false; + } + + boolean res = false; + IntSet.IntIterator itr = rowSet.iterator(); + int i = 0; + int r = itr.next(); + do { + IntSet rr = rows.get(i); + if (rr == null) { + i++; + } else if (i < r) { + rows.set(i, null); + res = true; + i++; + } else if (i > r) { + r = itr.next(); + } else { + res |= rr.retainAll(colSet); + if (rr.isEmpty()) { + rows.set(i, null); + } + i++; + r = itr.next(); + } + } while (i < rows.size() && itr.hasNext()); + res |= i < rows.size(); + for (; i < rows.size(); i++) { + rows.set(i, null); + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Returns true if this matrix contains all of the cells of the + * specified collection. + * + * @param other matrix to be checked for containment in this matrix + * + * @return true if this matrix contains all of the cells of the + * specified collection + * + * @throws NullPointerException if the specified collection contains one or more null cells + * and this matrix does not permit null cells (optional), or if + * the specified collection is null + * @see #contains(int, int) + */ + public boolean containsAll(BinaryMatrix other) + { + if (other == null || other.isEmpty() || other == this) { + return true; + } + if (isEmpty() || rows.size() < other.rows.size()) { + return false; + } + + for (int i = 0; i < other.rows.size(); i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s2 == null) { + continue; + } + if (s1 == null || !s1.containsAll(s2)) { + return false; + } + } + return true; + } + + /** + * Returns true if this matrix contains all of the cells of the + * specified collection. + * + * @param rowSet indices of the rows + * @param colSet indices of the columns + * + * @return true if this matrix contains all of the cells of the + * specified collection + */ + public boolean containsAll(IntSet rowSet, IntSet colSet) + { + if (rowSet == null || rowSet.isEmpty() || colSet == null || colSet.isEmpty()) { + return true; + } + if (isEmpty()) { + return false; + } + + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int i = itr.next(); + IntSet cols = rows.get(i); + if (cols == null || !cols.containsAll(colSet)) { + return false; + } + } + return true; + } + + /** + * Returns true if this matrix contains all of the cells of the + * specified collection. + * + * @param row index of the row + * @param colSet indices of the columns + * + * @return true if this matrix contains all of the cells of the + * specified collection + */ + public boolean containsAll(int row, IntSet colSet) + { + if (colSet == null || colSet.isEmpty()) { + return true; + } + if (isEmpty() || row < 0 || row >= rows.size()) { + return false; + } + IntSet cols = rows.get(row); + return cols != null && cols.containsAll(colSet); + } + + /** + * Returns true if this matrix contains all of the cells of the + * specified collection. + * + * @param rowSet indices of the rows + * @param col index of the column + * + * @return true if this matrix contains all of the cells of the + * specified collection + */ + public boolean containsAll(IntSet rowSet, int col) + { + if (rowSet == null || rowSet.isEmpty()) { + return true; + } + if (isEmpty() || col < 0) { + return false; + } + + IntSet.IntIterator itr = rowSet.iterator(); + while (itr.hasNext()) { + int i = itr.next(); + IntSet cols = rows.get(i); + if (cols == null || !cols.contains(col)) { + return false; + } + } + return true; + } + + /** + * Adds all of the cells in the specified collection to this matrix if + * they're not already present. + * + * @param other matrix containing cells to be added to this matrix + * + * @return true if this matrix changed as a result of the call + * + * @throws NullPointerException if the specified collection contains one or more null cells + * and this matrix does not permit null cells, or if the + * specified collection is null + * @throws IllegalArgumentException if some property of an cell of the specified collection + * prevents it from being added to this matrix + * @see #add(int, int) + */ + public boolean addAll(BinaryMatrix other) + { + boolean res = false; + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s2 == null) { + continue; + } + if (s1 == null) { + rows.set(i, s2.clone()); + res = true; + } else { + res |= s1.addAll(s2); + } + assert rows.get(i) == null || !rows.get(i).isEmpty(); + } + res |= i < other.rows.size(); + for (; i < other.rows.size(); i++) { + IntSet s = other.rows.get(i); + rows.add(s == null ? null : s.clone()); + assert rows.get(i) == null || !rows.get(i).isEmpty(); + } + return res; + } + + /** + * Retains only the cells in this matrix that are contained in the specified + * collection. In other words, removes from this matrix all of its cells + * that are not contained in the specified collection. + * + * @param other matrix containing cells to be retained in this matrix + * + * @return true if this matrix changed as a result of the call + * + * @throws NullPointerException if this matrix contains a null cell and the specified + * collection does not permit null cells (optional), or if the + * specified collection is null + * @see #remove(int, int) + */ + public boolean retainAll(BinaryMatrix other) + { + boolean res = false; + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null) { + continue; + } + if (s2 == null) { + rows.set(i, null); + res = true; + } else { + res |= s1.retainAll(s2); + if (s1.isEmpty()) { + rows.set(i, null); + } + } + assert rows.get(i) == null || !rows.get(i).isEmpty(); + } + res |= i < rows.size(); + for (; i < rows.size(); i++) { + rows.set(i, null); + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Removes from this matrix all of its cells that are contained in the + * specified collection. + * + * @param other matrix containing cells to be removed from this matrix + * + * @return true if this matrix changed as a result of the call + * + * @throws NullPointerException if this matrix contains a null cell and the specified + * collection does not permit null cells (optional), or if the + * specified collection is null + * @see #remove(int, int) + * @see #contains(int, int) + */ + public boolean removeAll(BinaryMatrix other) + { + boolean res = false; + final int rowCount = Math.min(rows.size(), other.rows.size()); + int i = 0; + for (; i < rowCount; i++) { + IntSet s1 = rows.get(i); + IntSet s2 = other.rows.get(i); + if (s1 == null || s2 == null) { + continue; + } + res |= s1.removeAll(s2); + if (s1.isEmpty()) { + rows.set(i, null); + } + assert rows.get(i) == null || !rows.get(i).isEmpty(); + } + if (i < rows.size()) { + return res; + } + if (res) { + fixRows(); + } + return res; + } + + /** + * Removes all of the cells from this matrix. The matrix will be empty after + * this call returns. + * + * @throws UnsupportedOperationException if the clear method is not supported by this matrix + */ + public void clear() + { + rows.clear(); + } + + /** + * @return an array containing all the cells in this matrix + */ + public boolean[][] toArray() + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * Returns an array containing all of the cells in this matrix. + *

+ * If this matrix fits in the specified array with room to spare (i.e., the + * array has more cells than this matrix), the cell in the array immediately + * following the end of the matrix are left unchanged. + * + * @param a the array into which the cells of this matrix are to be + * stored. + * + * @return the array containing all the cells in this matrix + * + * @throws NullPointerException if the specified array is null + * @throws IllegalArgumentException if this matrix does not fit in the specified array + */ + public boolean[][] toArray(boolean[][] a) + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(BinaryMatrix o) + { + throw new UnsupportedOperationException("TODO"); //TODO + } + + /** + * Gets a copy of the row with the given index + * + * @param row the row index + * + * @return the content of the row + */ + public IntSet getRow(int row) + { + if (row < 0) { + throw new IllegalArgumentException("negative row index: " + row); + } + if (row >= rows.size()) { + return template.empty(); + } + IntSet res = rows.get(row); + if (res == null) { + return template.empty(); + } + return res.clone(); + } + + // /** + // * Computes the power-set of the current matrix. + // *

+ // * It is a particular implementation of the algorithm Apriori (see: + // * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + // * Association Rules in Large Databases, in Proceedings of the + // * 20th International Conference on Very Large Data Bases, + // * p.487-499, 1994). The returned power-set does not contain the + // * empty matrix. + // *

+ // * The sub-matrices composing the power-set are returned in a list that is + // * sorted according to the lexicographical order provided by the integer + // * matrix. + // * + // * @return the power-set + // * @see #powerSet(int, int) + // * @see #powerSetSize() + // */ + // public List powerSet(); + // + // /** + // * Computes a subset of the power-set of the current matrix, composed by + // * those sub-matrices that have cardinality between min and + // * max. + // *

+ // * It is a particular implementation of the algorithm Apriori (see: + // * Rakesh Agrawal, Ramakrishnan Srikant, Fast Algorithms for Mining + // * Association Rules in Large Databases, in Proceedings of the + // * 20th International Conference on Very Large Data Bases, + // * p.487-499, 1994). The power-set does not contains the empty + // * matrix. + // *

+ // * The sub-matrices composing the power-set are returned in a list that is + // * sorted according to the lexicographical order provided by the integer + // * matrix. + // * + // * @param min + // * minimum sub-matrix size (greater than zero) + // * @param max + // * maximum sub-matrix size + // * @return the power-set + // * @see #powerSet() + // * @see #powerSetSize(int, int) + // */ + // public List powerSet(int min, int max); + // + // /** + // * Computes the power-set size of the current matrix. + // *

+ // * The power-set does not contains the empty matrix. + // * + // * @return the power-set size + // * @see #powerSet() + // */ + // public int powerSetSize(); + // + // /** + // * Computes the power-set size of the current matrix, composed by those + // * sub-matrices that have cardinality between min and + // * max. + // *

+ // * The returned power-set does not contain the empty matrix. + // * + // * @param min + // * minimum sub-matrix size (greater than zero) + // * @param max + // * maximum sub-matrix size + // * @return the power-set size + // * @see #powerSet(int, int) + // */ + // public int powerSetSize(int min, int max); + // + // /** + // * Computes the Jaccard similarity coefficient between this matrix and the + // * given matrix. + // *

+ // * The coefficient is defined as + // * |A intersection B| / |A union B|. + // * + // * @param other + // * the other matrix + // * @return the Jaccard similarity coefficient + // * @see #jaccardDistance(BinaryMatrix) + // */ + // public double jaccardSimilarity(BinaryMatrix other); + // + // /** + // * Computes the Jaccard distance between this matrix and the given matrix. + // *

+ // * The coefficient is defined as 1 - + // * {@link #jaccardSimilarity(BinaryMatrix)}. + // * + // * @param other + // * the other matrix + // * @return the Jaccard distance + // * @see #jaccardSimilarity(BinaryMatrix) + // */ + // public double jaccardDistance(BinaryMatrix other); + // + // /** + // * Computes the weighted version of the Jaccard similarity coefficient + // * between this matrix and the given matrix. + // *

+ // * The coefficient is defined as + // * sum of min(A_i, B_i) / sum of max(A_i, B_i). + // * + // * @param other + // * the other matrix + // * @return the weighted Jaccard similarity coefficient + // * @see #weightedJaccardDistance(BinaryMatrix) + // */ + // public double weightedJaccardSimilarity(BinaryMatrix other); + // + // /** + // * Computes the weighted version of the Jaccard distance between this + // matrix + // * and the given matrix. + // *

+ // * The coefficient is defined as 1 - + // * {@link #weightedJaccardSimilarity(BinaryMatrix)}. + // * + // * @param other + // * the other matrix + // * @return the weighted Jaccard distance + // * @see #weightedJaccardSimilarity(BinaryMatrix) + // */ + // public double weightedJaccardDistance(BinaryMatrix other); + + /** + * Gets a copy of the column with the given index + * + * @param col the column index + * + * @return the content of the column + */ + public IntSet getCol(int col) + { + if (col < 0) { + throw new IllegalArgumentException("negative column index: " + col); + } + IntSet res = template.empty(); + for (int row = 0; row < rows.size(); row++) { + final IntSet r = rows.get(row); + if (r != null && r.contains(col)) { + res.add(row); + } + } + return res; + } + + /** + * Generated a transposed matrix + * + * @return the transposed matrix + */ + public BinaryMatrix transposed() + { + BinaryMatrix res = empty(); + for (int row = 0; row < rows.size(); row++) { + IntSet r = rows.get(row); + if (r == null) { + continue; + } + IntSet.IntIterator itr = r.iterator(); + while (itr.hasNext()) { + res.add(itr.next(), row); + } + } + return res; + } + + /** + * Generates an ASCII-art matrix representation + */ + @Override + public String toString() + { + StringBuilder s = new StringBuilder(); + + final int maxCol = maxCol(); + + // initial line + s.append('+'); + for (int i = 0; i <= maxCol; i++) { + s.append('-'); + } + s.append("+\n"); + + // cells + for (IntSet row : rows) { + s.append('|'); + int col = 0; + if (row != null) { + IntSet.IntIterator itr = row.iterator(); + while (itr.hasNext()) { + int c = itr.next(); + while (col++ < c) { + s.append(' '); + } + s.append('*'); + } + } + while (col++ <= maxCol) { + s.append(' '); + } + s.append("|\n"); + } + + // final line + s.append('+'); + for (int i = 0; i <= maxCol; i++) { + s.append('-'); + } + s.append("+\n"); + + return s.toString(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof BinaryMatrix)) { + return false; + } + return rows.equals(((BinaryMatrix) obj).rows); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + int h = 1; + for (IntSet s : rows) { + h = (h << 5) - h; + if (s != null) { + h += s.hashCode(); + } + } + return h; + } + + /** + * @return the greatest non-empty row + */ + public int maxRow() + { + return rows.size() - 1; + } + + /** + * @return the greatest non-empty column + */ + public int maxCol() + { + int res = 0; + for (IntSet row : rows) { + if (row != null) { + assert !row.isEmpty(); + res = Math.max(res, row.last()); + } + } + return res; + } + + /** + * @return the index set of non-empty rows + */ + public IntSet involvedRows() + { + IntSet res = template.empty(); + for (int i = 0; i < rows.size(); i++) { + if (rows.get(i) != null) { + res.add(i); + } + } + return res; + } + + /** + * @return the index set of non-empty columns + */ + public IntSet involvedCols() + { + IntSet res = template.empty(); + for (int i = 0; i < rows.size(); i++) { + res.addAll(rows.get(i)); + } + return res; + } + + /** + * An {@link Iterator}-like interface + */ + public interface CellIterator + { + /** + * @return true if the iterator has more cells. + */ + boolean hasNext(); + + /** + * Returns the next cell in the iteration. IMPORTANT: each + * iteration returns an array of two elements, where the first element + * is the row, while the second element is the column of the current + * cell. In order to reduce the produced heap garbage, there is only + * one array instantiated for each iterator, whose content is + * overridden at each iteration. + * + * @return the next cell in the iteration. + * + * @throws NoSuchElementException iteration has no more cells. + */ + int[] next(); + + /** + * Removes from the underlying matrix the last cell returned by the + * iterator (optional operation). This method can be called only once + * per call to next. The behavior of an iterator is unspecified + * if the underlying collection is modified while the iteration is in + * progress in any way other than by calling this method. + * + * @throws UnsupportedOperationException if the remove operation is not supported by + * this Iterator. + * @throws IllegalStateException if the next method has not yet been called, + * or the remove method has already been called + * after the last call to the next method. + */ + void remove(); + + /** + * Skips all the cells before the the specified cell, so that + * {@link #next()} gives the given cell or, if it does not exist, the + * cell immediately after according to the sorting provided by this set. + *

+ * If cell is less than the next cell, it does nothing + * + * @param row row of the cell + * @param col column of the cell + */ + public void skipAllBefore(int row, int col); + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/Pair.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/Pair.java new file mode 100755 index 00000000000..dcdb34205a1 --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/Pair.java @@ -0,0 +1,106 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.wrappers.matrix; + +/** + * A class for representing a single transaction-item relationship. This class + * is mainly used in {@link PairSet} to iterate over the cells of a + * binary matrix. + * + * @param transaction type + * @param item type + * + * @author Alessandro Colantonio + * @version $Id: Pair.java 140 2011-02-07 21:30:29Z cocciasik $ + * @see PairSet + */ +public class Pair implements java.io.Serializable +{ + /** + * generated ID + */ + private static final long serialVersionUID = 328985131584539749L; + + /** + * the transaction + */ + public final T transaction; + + /** + * the item + */ + public final I item; + + /** + * Creates a new transaction-item pair + * + * @param transaction + * @param item + */ + public Pair(T transaction, I item) + { + this.transaction = transaction; + this.item = item; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + // 524287 * i = (i << 19) - i, where 524287 is prime. + // This hash function avoids transactions and items to overlap, + // since "item" can often stay in 32 - 19 = 13 bits. Therefore, it is + // better than multiplying by 31. + final int hi = item.hashCode(); + final int ht = transaction.hashCode(); + return (hi << 19) - hi + ht; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (obj == null) { + return false; + } + if (this == obj) { + return true; + } + if (!(obj instanceof Pair)) { + return false; + } + @SuppressWarnings("unchecked") + Pair other = (Pair) obj; + return transaction.equals(other.transaction) && item.equals(other.item); + } + + /** + * {@inheritDoc} + */ + @Override + public String toString() + { + return "(" + transaction + ", " + item + ")"; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairMap.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairMap.java new file mode 100755 index 00000000000..c68ffd1607c --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairMap.java @@ -0,0 +1,448 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset.wrappers.matrix; + +import java.io.Serializable; +import java.util.AbstractCollection; +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Set; + +/** + * An class that associates a value to each pair within a {@link PairSet} instance. It is not as fast as {@link HashMap} , but requires much less memory. + * + * @param < T > transaction type + * @param < I > item type + * @param < V > type of the value to associate + * + * @author Alessandro Colantonio + * @version $Id: PairMap.java 153 2011-05-30 16:39:57Z cocciasik $ + * @see PairSet + */ +public class PairMap extends AbstractMap, V> implements Serializable, Cloneable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = 4699094886888004702L; + + /** + * all existing keys + * + * @uml.property name="keys" + * @uml.associationEnd + */ + private final PairSet keys; + + /** + * values related to existing keys, according to the ordering provided by {@link #keys} + */ + private final ArrayList values; + + /** + * Creates an empty map + * + * @param keys {@link PairSet} instance internally used to store indices. If + * not empty, {@link #get(Object)} will return null + * for each existing pair if we do not also put a value. + */ + public PairMap(PairSet keys) + { + this.keys = keys; + values = new ArrayList(keys.size()); + for (int i = 0; i < keys.size(); i++) { + values.add(null); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + keys.clear(); + values.clear(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsKey(Object key) + { + return keys.contains(key); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsValue(Object value) + { + return values.contains(value); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public V get(Object key) + { + if (key == null || !(key instanceof Pair)) { + return null; + } + int index = keys.indexOf((Pair) key); + if (index < 0) { + return null; + } + return values.get(index); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return keys.isEmpty(); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public V put(Pair key, V value) + { + boolean isNew = keys.add(key); + int index = keys.indexOf(key); + Object old = null; + if (isNew) { + values.add(index, value); + } else { + old = values.set(index, value); + } + return (V) old; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public V remove(Object key) + { + if (key == null || !(key instanceof Pair)) { + return null; + } + int index = keys.indexOf((Pair) key); + if (index < 0) { + return null; + } + keys.remove(key); + return values.remove(index); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return keys.size(); + } + + /** + * {@inheritDoc} + */ + @Override + public PairMap clone() + { + // NOTE: do not use super.clone() since it is 10 times slower! + PairMap cloned = new PairMap(keys.clone()); + cloned.values.clear(); + cloned.values.addAll(values); + return cloned; + } + + /** + * {@inheritDoc} + */ + @Override + public Set> keySet() + { + return new AbstractSet>() + { + @Override + public boolean add(Pair e) + { + throw new UnsupportedOperationException(); + } + + @Override + public void clear() + { + PairMap.this.clear(); + } + + @Override + public boolean contains(Object o) + { + return keys.contains(o); + } + + @Override + public boolean containsAll(Collection c) + { + return keys.containsAll(c); + } + + @Override + public boolean isEmpty() + { + return keys.isEmpty(); + } + + @Override + public Iterator> iterator() + { + return new Iterator>() + { + Iterator> itr = keys.iterator(); + + @Override + public boolean hasNext() + { + return itr.hasNext(); + } + + @Override + public Pair next() + { + return itr.next(); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + @Override + public boolean remove(Object o) + { + throw new UnsupportedOperationException(); + } + + @Override + public int size() + { + return keys.size(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public Collection values() + { + return new AbstractCollection() + { + + @Override + public boolean add(V e) + { + throw new UnsupportedOperationException(); + } + + @Override + public void clear() + { + PairMap.this.clear(); + } + + @Override + public boolean contains(Object o) + { + return values.contains(o); + } + + @Override + public boolean isEmpty() + { + return keys.isEmpty(); + } + + @Override + public Iterator iterator() + { + return new Iterator() + { + Iterator itr = values.iterator(); + + @Override + public boolean hasNext() + { + return itr.hasNext(); + } + + @Override + public V next() + { + return itr.next(); + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + @Override + public boolean remove(Object o) + { + throw new UnsupportedOperationException(); + } + + @Override + public int size() + { + return values.size(); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public Set, V>> entrySet() + { + return new AbstractSet, V>>() + { + @Override + public boolean add(Entry, V> e) + { + V res = PairMap.this.put(e.getKey(), e.getValue()); + return res != e.getValue(); + } + + @Override + public void clear() + { + PairMap.this.clear(); + } + + @Override + public boolean contains(Object o) + { + return o != null + && o instanceof Entry + && PairMap.this.containsKey(((Entry) o).getKey()) + && PairMap.this.containsValue(((Entry) o).getValue()); + } + + @Override + public boolean isEmpty() + { + return keys.isEmpty(); + } + + @Override + public Iterator, V>> iterator() + { + return new Iterator, V>>() + { + final Iterator> keyItr = keys.iterator(); + int valueIndex = -1; + + @Override + public boolean hasNext() + { + return keyItr.hasNext(); + } + + @Override + public Entry, V> next() + { + final Pair key = keyItr.next(); + valueIndex++; + + return new Entry, V>() + { + @Override + public Pair getKey() + { + return key; + } + + @Override + public V getValue() + { + return values.get(valueIndex); + } + + @Override + public V setValue(V value) + { + return values.set(valueIndex, value); + } + + @Override + public String toString() + { + return "{" + getKey() + "=" + getValue() + "}"; + } + }; + } + + @Override + public void remove() + { + throw new UnsupportedOperationException(); + } + }; + } + + @Override + public boolean remove(Object o) + { + throw new UnsupportedOperationException(); + } + + @Override + public int size() + { + return keys.size(); + } + }; + } +} diff --git a/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairSet.java b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairSet.java new file mode 100755 index 00000000000..41cf34b507e --- /dev/null +++ b/extendedset/src/main/java/io/druid/extendedset/wrappers/matrix/PairSet.java @@ -0,0 +1,1403 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset.wrappers.matrix; + +import io.druid.extendedset.AbstractExtendedSet; +import io.druid.extendedset.ExtendedSet; +import io.druid.extendedset.intset.IntSet; +import io.druid.extendedset.wrappers.IndexedSet; +import io.druid.extendedset.wrappers.IntegerSet; +import io.druid.extendedset.wrappers.matrix.BinaryMatrix.CellIterator; + +import java.io.Serializable; +import java.util.AbstractCollection; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * A set of pairs internally represented by a binary matrix.

This class can be used to represent a set of transactions, where each transaction is a set of items. Rows are transactions, columns are the items involved with each transaction. + * + * @param < T > transaction type + * @param < I > item type + * + * @author Alessandro Colantonio + * @version $Id: PairSet.java 153 2011-05-30 16:39:57Z cocciasik $ + * @see Pair + * @see IntSet + */ +public class PairSet extends AbstractExtendedSet> implements Serializable +{ + /** + * generated serial ID + */ + private static final long serialVersionUID = 7902458899512666217L; + + /** + * binary matrix + * + * @uml.property name="matrix" + * @uml.associationEnd + */ + private final BinaryMatrix matrix; + + /** + * all possible transactions + * + * @uml.property name="allTransactions" + * @uml.associationEnd + */ + private final IndexedSet allTransactions; + + /** + * all possible items + * + * @uml.property name="allItems" + * @uml.associationEnd + */ + private final IndexedSet allItems; + + /** + * Initializes the set by specifying all possible transactions and items. + * + * @param matrix {@link BinaryMatrix} instance used to internally represent the matrix + * @param transactions collection of all possible transactions. The specified + * order will be preserved within when iterating over the + * {@link PairSet} instance. + * @param items collection of all possible items. The specified order + * will be preserved within each transaction {@link PairSet}. + */ + public PairSet(BinaryMatrix matrix, Collection transactions, Collection items) + { + if (transactions == null || items == null) { + throw new NullPointerException(); + } + this.matrix = matrix; + + IntSet tmp = matrix.emptyRow(); + if (transactions instanceof IndexedSet) { + allTransactions = (IndexedSet) transactions; + } else { + allTransactions = new IndexedSet(tmp.empty(), transactions).universe(); //.unmodifiable(); + } + if (items instanceof IndexedSet) { + allItems = (IndexedSet) items; + } else { + allItems = new IndexedSet(tmp.empty(), items).universe(); //.unmodifiable(); + } + } + + /** + * Initializes the set by specifying all possible transactions and items. + * + * @param matrix {@link BinaryMatrix} instance used to internally represent the + * matrix + * @param pairs arrays n x 2 of pairs of transactions (first) and items (second). + */ + public PairSet(BinaryMatrix matrix, final Object[][] pairs) + { + this(matrix, new AbstractCollection>() + { + @Override + public Iterator> iterator() + { + return new Iterator>() + { + int i = 0; + + @SuppressWarnings("unchecked") + @Override + public Pair next() {return new Pair(pairs[i][0], pairs[i++][1]);} + + @Override + public boolean hasNext() {return i < pairs.length;} + + @Override + public void remove() {throw new UnsupportedOperationException();} + }; + } + + @Override + public int size() {return pairs.length;} + }); + } + + /** + * Converts a generic collection of transaction-item pairs to a + * {@link PairSet} instance. + * + * @param matrix {@link IntSet} instance used to internally represent the set + * @param pairs collection of {@link Pair} instances + */ + public PairSet(BinaryMatrix matrix, Collection> pairs) + { + if (pairs == null) { + throw new RuntimeException("null pair set"); + } + if (pairs.isEmpty()) { + throw new RuntimeException("empty pair set"); + } + + // identify all possible transactions and items and their frequencies + final Map ts = new HashMap(); + final Map is = new HashMap(); + for (Pair p : pairs) { + Integer f; + + f = ts.get(p.transaction); + f = f == null ? 1 : f + 1; + ts.put(p.transaction, f); + + f = is.get(p.item); + f = f == null ? 1 : f + 1; + is.put(p.item, f); + } + + // sort transactions and items by descending frequencies + List> sortedPairs = new ArrayList>(pairs); + Collections.sort(sortedPairs, new Comparator>() + { + @Override + public int compare(Pair o1, Pair o2) + { + int r = ts.get(o2.transaction).compareTo(ts.get(o1.transaction)); + if (r == 0) { + r = is.get(o2.item).compareTo(is.get(o1.item)); + } + return r; + } + }); + List sortedTransactions = new ArrayList(ts.keySet()); + Collections.sort(sortedTransactions, new Comparator() + { + @Override + public int compare(T o1, T o2) + { + return ts.get(o2).compareTo(ts.get(o1)); + } + }); + List sortedItems = new ArrayList(is.keySet()); + Collections.sort(sortedItems, new Comparator() + { + @Override + public int compare(I o1, I o2) + { + return is.get(o2).compareTo(is.get(o1)); + } + }); + + // identify all transactions and items + this.matrix = matrix; + matrix.add(0, 0); + allTransactions = new IndexedSet(matrix.getRow(0), sortedTransactions).universe(); // .unmodifiable(); + allItems = new IndexedSet(matrix.getRow(0), sortedItems).universe(); // .unmodifiable(); + matrix.clear(); + + // create the matrix + for (Pair p : sortedPairs) { + add(p); + } + } + + /** + * Wraps a {@link BinaryMatrix} instance with a {@link PairSet} instance. + *

+ * NOTE: the maximum item and transaction IDs are those existing in + * the binary matrix when the wrapping take place + * + * @param b a {@link BinaryMatrix} instance to wrap + * + * @return a new {@link PairSet} instance, indexed by the given matrix + */ + public static PairSet createFromBinaryMatrix(BinaryMatrix b) + { + // TODO this is a little bit costly since PairSet will allocate an array + // and a HashMap of Integers to map elements of BinaryMatrix... + // Think about a IntegerPairSet class or to an "fake" IntegerIndexedSet + // just for this purpose. + + IntegerSet t = new IntegerSet(b.emptyRow()); + t.intSet().add(b.maxRow() + 1); + t.intSet().complement(); + + IntegerSet i = new IntegerSet(b.emptyRow()); + i.intSet().add(b.maxCol() + 1); + i.intSet().complement(); + + return new PairSet(b, t, i); + } + + /** + * maps a transaction to its index and returns -1 if not found + */ + private int transactionToIndex(T t) + { + Integer r = allTransactions.absoluteIndexOf(t); + return r == null ? -1 : r.intValue(); + } + + /** + * maps an item to its index and returns -1 if not found + */ + private int itemToIndex(I i) + { + Integer r = allItems.absoluteIndexOf(i); + return r == null ? -1 : r.intValue(); + } + + /** + * maps a pair of indices to the corresponding {@link Pair} + */ + private Pair indexToPair(int[] i) + { + return new Pair(allTransactions.absoluteGet(i[0]), allItems.absoluteGet(i[1])); + } + + /** + * A shortcut for new PairSet<T, I>(matrix, mapping) + * + * @param bm {@link BinaryMatrix} instance to link + * + * @return the new {@link PairSet} with the given {@link BinaryMatrix} + * instance and the same mapping of this + */ + private PairSet createFromIndices(BinaryMatrix bm) + { + return new PairSet(bm, allTransactions, allItems); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet clone() + { + return createFromIndices(matrix.clone()); + } + + /** + * Checks if the given collection is a instance of {@link PairSet} with + * the same index mappings + * + * @param c collection to check + * + * @return true if the given collection is a instance of + * {@link PairSet} with the same index mappings + */ + private boolean hasSameIndices(Collection c) + { + return c != null + && (c instanceof PairSet) + && (allTransactions == ((PairSet) c).allTransactions) + && (allItems == ((PairSet) c).allItems); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean add(Pair e) + { + return add(e.transaction, e.item); + } + + /** + * Adds a single transaction-item pair + * + * @param transaction the transaction of the pair + * @param item the item of the pair + * + * @return true if the set has been changed + */ + public boolean add(T transaction, I item) + { + return matrix.add(transactionToIndex(transaction), itemToIndex(item)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean addAll(Collection> c) + { + return matrix.addAll(convert(c).matrix); + } + + /** + * Add the pairs obtained from the Cartesian product of transactions + * and items + * + * @param trans collection of transactions + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean addAll(Collection trans, Collection items) + { + if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { + return false; + } + return matrix.addAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); + } + + /** + * Add the pairs obtained from the Cartesian product of transactions + * and items + * + * @param trans the given transaction + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean addAll(T trans, Collection items) + { + if (trans == null || items == null || items.isEmpty()) { + return false; + } + return matrix.addAll(transactionToIndex(trans), allItems.convert(items).indices()); + } + + /** + * Add the pairs obtained from the Cartesian product of transactions + * and items + * + * @param trans collection of transactions + * @param item the given item + * + * @return true if the set set has been changed + */ + public boolean addAll(Collection trans, I item) + { + if (trans == null || trans.isEmpty() || item == null) { + return false; + } + return matrix.addAll(allTransactions.convert(trans).indices(), itemToIndex(item)); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear() + { + matrix.clear(); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean contains(Object o) + { + return o != null + && o instanceof Pair + && contains(((Pair) o).transaction, ((Pair) o).item); + } + + /** + * Checks if the given transaction-item pair is contained within the set + * + * @param transaction the transaction of the pair + * @param item the item of the pair + * + * @return true if the given transaction-item pair is contained + * within the set + */ + public boolean contains(T transaction, I item) + { + int t = transactionToIndex(transaction); + if (t < 0) { + return false; + } + int i = itemToIndex(item); + if (i < 0) { + return false; + } + return matrix.contains(t, i); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAll(Collection c) + { + return matrix.containsAll(convert(c).matrix); + } + + /** + * Checks if the pairs obtained from the Cartesian product of + * transactions and items are contained + * + * @param trans collection of transactions + * @param items collection of items + * + * @return true if the pairs set set has been changed + */ + public boolean containsAll(Collection trans, Collection items) + { + if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { + return true; + } + if (isEmpty()) { + return false; + } + return matrix.containsAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); + } + + /** + * Checks if the pairs obtained from the Cartesian product of + * transactions and items are contained + * + * @param trans the transaction + * @param items collection of items + * + * @return true if the pairs set set has been changed + */ + public boolean containsAll(T trans, Collection items) + { + if (trans == null || items == null || items.isEmpty()) { + return true; + } + if (isEmpty()) { + return false; + } + return matrix.containsAll(transactionToIndex(trans), allItems.convert(items).indices()); + } + + /** + * Checks if the pairs obtained from the Cartesian product of + * transactions and items are contained + * + * @param trans collection of transactions + * @param item the item + * + * @return true if the pairs set set has been changed + */ + public boolean containsAll(Collection trans, I item) + { + if (trans == null || trans.isEmpty() || item == null) { + return true; + } + if (isEmpty()) { + return false; + } + return matrix.containsAll(allTransactions.convert(trans).indices(), itemToIndex(item)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean isEmpty() + { + return matrix.isEmpty(); + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator> iterator() + { + return new ExtendedIterator>() + { + CellIterator itr = matrix.iterator(); + + @Override + public Pair next() {return indexToPair(itr.next());} + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public void remove() {itr.remove();} + + @Override + public void skipAllBefore(Pair element) + { + itr.skipAllBefore( + transactionToIndex(element.transaction), + itemToIndex(element.item) + ); + } + }; + } + + /** + * {@inheritDoc} + */ + @Override + public ExtendedIterator> descendingIterator() + { + return new ExtendedIterator>() + { + CellIterator itr = matrix.descendingIterator(); + + @Override + public Pair next() {return indexToPair(itr.next());} + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public void remove() {itr.remove();} + + @Override + public void skipAllBefore(Pair element) + { + itr.skipAllBefore( + transactionToIndex(element.transaction), + itemToIndex(element.item) + ); + } + }; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public boolean remove(Object o) + { + return o instanceof Pair + && remove(((Pair) o).transaction, ((Pair) o).item); + } + + /** + * Removes a single transaction-item pair + * + * @param transaction the transaction of the pair + * @param item the item of the pair + * + * @return true if the pair set has been changed + */ + public boolean remove(T transaction, I item) + { + return matrix.remove(transactionToIndex(transaction), itemToIndex(item)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean removeAll(Collection c) + { + return matrix.removeAll(convert(c).matrix); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean retainAll(Collection c) + { + return matrix.retainAll(convert(c).matrix); + } + + /** + * Removes the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans collection of transactions + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean removeAll(Collection trans, Collection items) + { + if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { + return false; + } + return matrix.removeAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); + } + + /** + * Removes the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans a transaction + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean removeAll(T trans, Collection items) + { + if (trans == null || items == null || items.isEmpty()) { + return false; + } + return matrix.removeAll(transactionToIndex(trans), allItems.convert(items).indices()); + } + + /** + * Removes the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans collection of transactions + * @param item collection of items + * + * @return true if the set set has been changed + */ + public boolean removeAll(Collection trans, I item) + { + if (trans == null || trans.isEmpty() || item == null) { + return false; + } + return matrix.removeAll(allTransactions.convert(trans).indices(), itemToIndex(item)); + } + + /** + * Retains the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans collection of transactions + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean retainAll(Collection trans, Collection items) + { + if (isEmpty()) { + return false; + } + if (trans == null || trans.isEmpty() || items == null || items.isEmpty()) { + clear(); + return true; + } + return matrix.retainAll(allTransactions.convert(trans).indices(), allItems.convert(items).indices()); + } + + /** + * Retains the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans the transaction + * @param items collection of items + * + * @return true if the set set has been changed + */ + public boolean retainAll(T trans, Collection items) + { + if (isEmpty()) { + return false; + } + if (trans == null || items == null || items.isEmpty()) { + clear(); + return true; + } + return matrix.retainAll(transactionToIndex(trans), allItems.convert(items).indices()); + } + + /** + * Retains the pairs obtained from the Cartesian product of transactions and + * items + * + * @param trans collection of transactions + * @param item the item + * + * @return true if the set set has been changed + */ + public boolean retainAll(Collection trans, I item) + { + if (isEmpty()) { + return false; + } + if (trans == null || trans.isEmpty() || item == null) { + clear(); + return true; + } + return matrix.retainAll(allTransactions.convert(trans).indices(), itemToIndex(item)); + } + + /** + * {@inheritDoc} + */ + @Override + public int size() + { + return matrix.size(); + } + + /** + * Gets the set of all possible transactions that can be contained within + * the set + * + * @return the set of all possible transactions that can be contained within + * the set + */ + public IndexedSet allTransactions() + { + return allTransactions; + } + + /** + * Gets the set of all possible items that can be contained within each + * transaction + * + * @return the set of all possible items that can be contained within each + * transaction + */ + public IndexedSet allItems() + { + return allItems; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return matrix.hashCode(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (!(obj instanceof PairSet)) { + return false; + } + final PairSet other = (PairSet) obj; + return hasSameIndices(other) && matrix.equals(other.matrix); + } + + /** + * Lists all items contained within a given transaction + * + * @param transaction the given transaction + * + * @return items contained within the given transaction + */ + public IndexedSet itemsOf(T transaction) + { + IndexedSet res = allItems.empty(); + res.indices().addAll(matrix.getRow(transactionToIndex(transaction))); + return res; + } + + /** + * Lists all transactions involved with a specified item + * + * @param item the given item + * + * @return transactions involved with a specified item + */ + public IndexedSet transactionsOf(I item) + { + IndexedSet res = allTransactions.empty(); + res.indices().addAll(matrix.getCol(itemToIndex(item))); + return res; + } + + /** + * Gets the set of transactions in {@link #allTransactions()} that contains + * at least one item + * + * @return the set of transactions in {@link #allTransactions()} that + * contains at least one item + */ + public IndexedSet involvedTransactions() + { + IndexedSet res = allTransactions.empty(); + res.indices().addAll(matrix.involvedRows()); + return res; + } + + /** + * Gets the set of items in {@link #allItems()} that are contained in at + * least one transaction + * + * @return the set of items in {@link #allItems()} that are contained in at + * least one transaction + */ + public IndexedSet involvedItems() + { + IndexedSet res = allItems.empty(); + res.indices().addAll(matrix.involvedCols()); + return res; + } + + /** + * Gets the ith element of the set + * + * @param index position of the element in the sorted set + * + * @return the ith element of the set + * + * @throws IndexOutOfBoundsException if i is less than zero, or greater or equal to + * {@link #size()} + */ + @Override + public Pair get(int index) + { + return indexToPair(matrix.get(index)); + } + + /** + * Provides position of element within the set. + *

+ * It returns -1 if the element does not exist within the set. + * + * @param element element of the set + * + * @return the element position + */ + @Override + public int indexOf(Pair element) + { + return matrix.indexOf( + transactionToIndex(element.transaction), + itemToIndex(element.item) + ); + } + + /** + * {@inheritDoc} + */ + @Override + public String debugInfo() + { + StringBuilder s = new StringBuilder(); + + s.append("possible transactions: "); + s.append(allTransactions); + s.append('\n'); + s.append("possible items: "); + s.append(allItems); + s.append('\n'); + + s.append("pairs:\n"); + s.append(matrix.toString()); + s.append("info: " + matrix.debugInfo()); + + return s.toString(); + } + + /** + * {@inheritDoc} + */ + @Override + public double bitmapCompressionRatio() + { + return matrix.bitmapCompressionRatio(); + } + + /** + * {@inheritDoc} + */ + @Override + public double collectionCompressionRatio() + { + return matrix.collectionCompressionRatio(); + } + + /** + * Returns the set of indices. Modifications to this set are reflected to + * this {@link PairSet} instance. Trying to perform operation on + * out-of-bound indices will throw an {@link IllegalArgumentException} + * exception. + * + * @return the index set + */ + public BinaryMatrix matrix() + { + return matrix; + } + +// /** +// * Extracts a subset represented by a certain range of transactions and +// * items, according to the ordering provided by {@link #allTransactions()} +// * and {@link #allItems()}. +// * +// * @param fromTransaction +// * the first transaction of the range (if null it +// * represents the first one) +// * @param toTransaction +// * the last transaction of the range (if null it +// * represents the last one) +// * @param fromItem +// * the first item of the range (if null it +// * represents the first one) +// * @param toItem +// * the last item of the range (if null it represents +// * the last one) +// * @return the specified subset +// */ +// public PairSet subSet(T fromTransaction, T toTransaction, I fromItem, I toItem) { +// BinaryMatrix mask = matrix.empty(); +// mask.fill( +// transactionToIndex(fromTransaction), +// itemToIndex(fromItem), +// transactionToIndex(toTransaction), +// itemToIndex(toItem)); +// return new PairSet(matrix.intersection(mask), allTransactions, allItems); +// } +// +// /** +// * Extracts a subset represented by a collection of transactions and items +// * +// * @param involvedTransactions +// * involved transactions (if null, it represents all +// * transactions in {@link #allTransactions()}) +// * @param involvedItems +// * involved items (if null, it represents all items +// * in {@link #allItems()}) +// * @return all the transaction-item pairs that represent the specified +// * subset +// */ +// public PairSet subSet(Collection involvedTransactions, Collection involvedItems) { +// BinaryMatrix mask = matrix.empty(); +// mask.addAll( +// allTransactions.convert(involvedTransactions).indices(), +// allItems.convert(involvedItems).indices()); +// return new PairSet(matrix.intersection(mask), allTransactions, allItems); +// } + + /** + * {@inheritDoc} + */ + @Override + public PairSet empty() + { + return createFromIndices(matrix.empty()); + } + + /** + * {@inheritDoc} + */ + @Override + public void complement() + { + matrix.complement(); + } + + /** + * {@inheritDoc} + */ + @Override + public Comparator> comparator() + { + return new Comparator>() + { + @Override + public int compare(Pair o1, Pair o2) + { + int t1 = transactionToIndex(o1.transaction); + int t2 = transactionToIndex(o2.transaction); + int r = t1 < t2 ? -1 : (t1 == t2 ? 0 : 1); + if (r == 0) { + int i1 = itemToIndex(o1.item); + int i2 = itemToIndex(o2.item); + r = i1 < i2 ? -1 : (i1 == i2 ? 0 : 1); + } + return r; + } + }; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public PairSet convert(Collection c) + { + if (c == null) { + return empty(); + } + + // useless to convert... + if (hasSameIndices(c)) { + return (PairSet) c; + } + + // convert + PairSet res = empty(); + for (Pair p : (Collection>) c) { + res.matrix.add(transactionToIndex(p.transaction), itemToIndex(p.item)); + } + return res; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public PairSet convert(Object... e) + { + return (PairSet) super.convert(e); + } + + /** + * {@inheritDoc} + */ + @Override + public void clear(Pair from, Pair to) + { + matrix.clear( + transactionToIndex(from.transaction), + itemToIndex(from.item), + transactionToIndex(to.transaction), + itemToIndex(to.item) + ); + } + + /** + * {@inheritDoc} + */ + @Override + public int complementSize() + { + return matrix.complementSize(); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet complemented() + { + return createFromIndices(matrix.complemented()); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet difference(Collection> other) + { + return other == null ? clone() : createFromIndices(matrix.difference(convert(other).matrix)); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAny(Collection> other) + { + return other == null || matrix.containsAny(convert(other).matrix); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean containsAtLeast(Collection> other, int minElements) + { + return other != null && !other.isEmpty() && matrix.containsAtLeast(convert(other).matrix, minElements); + } + + /** + * {@inheritDoc} + */ + @Override + public int differenceSize(Collection> other) + { + return other == null ? (int) size() : (int) matrix.differenceSize(convert(other).matrix); + } + + /** + * {@inheritDoc} + */ + @Override + public void fill(Pair from, Pair to) + { + matrix.fill( + transactionToIndex(from.transaction), + itemToIndex(from.item), + transactionToIndex(to.transaction), + itemToIndex(to.item) + ); + } + + /** + * {@inheritDoc} + */ + @Override + public void flip(Pair e) + { + matrix.flip( + transactionToIndex(e.transaction), + itemToIndex(e.item) + ); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public PairSet subSet(Pair fromElement, Pair toElement) + { + return (PairSet) super.subSet(fromElement, toElement); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public PairSet headSet(Pair toElement) + { + return (PairSet) super.headSet(toElement); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public PairSet tailSet(Pair fromElement) + { + return (PairSet) super.tailSet(fromElement); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet intersection(Collection> c) + { + return c == null ? empty() : createFromIndices(matrix.intersection(convert(c).matrix)); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public List> powerSet() + { + return (List>) super.powerSet(); + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + @Override + public List> powerSet(int min, int max) + { + return (List>) super.powerSet(min, max); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet symmetricDifference(Collection> other) + { + return other == null ? clone() : createFromIndices(matrix.symmetricDifference(convert(other).matrix)); + } + + /** + * {@inheritDoc} + */ + @Override + public int symmetricDifferenceSize(Collection> other) + { + return other == null ? (int) size() : (int) matrix.symmetricDifferenceSize(convert(other).matrix); + } + + /** + * {@inheritDoc} + */ + @Override + public PairSet union(Collection> other) + { + return other == null ? clone() : createFromIndices(matrix.union(convert(other).matrix)); + } + + /** + * {@inheritDoc} + */ + @Override + public int unionSize(Collection> other) + { + return other == null ? (int) size() : (int) matrix.unionSize(convert(other).matrix); + } + +// /** +// * {@inheritDoc} +// */ +// @Override +// public PairSet unmodifiable() { +// return new PairSet(allTransactions, allItems, maxTransactionCount, maxItemCount, indices.unmodifiable()); +// } + + /** + * {@inheritDoc} + */ + @Override + public Pair first() + { + return indexToPair(matrix.first()); + } + + /** + * {@inheritDoc} + */ + @Override + public Pair last() + { + return indexToPair(matrix.last()); + } + + /** + * {@inheritDoc} + */ + @Override + public int compareTo(ExtendedSet> o) + { + return matrix.compareTo(convert(o).matrix); + } + + /** + * @return a transposed {@link PairSet} instance + */ + public PairSet transposed() + { + return new PairSet(matrix.transposed(), allItems, allTransactions); + } + + /** + * Creates a new {@link PairSet} instance with the union of all possible + * transactions and items as result for {@link #allTransactions()} and + * {@link #allItems()}, respectively, and the union of pairs. + * + * @param other the other {@link PairSet} instance to merge + * + * @return the merged {@link PairSet} instance + */ + public PairSet merged(PairSet other) + { + if (other == null) { + return clone(); + } + + // compute the new universe + Set newAllTransactions = new LinkedHashSet(allTransactions); + Set newAllItems = new LinkedHashSet(allItems); + newAllTransactions.addAll(other.allTransactions); + newAllItems.addAll(other.allItems); + + // compute the union of pairs + PairSet res = new PairSet( + matrix.clone(), + newAllTransactions, + newAllItems + ); + if (!other.isEmpty()) { + res.addAll(other); + } + return res; + } + + /** + * Creates a new {@link PairSet} instance with only non-empty transactions + * and items. + * + * @return the compacted {@link PairSet} instance + */ + public PairSet compacted() + { + // trivial case + if (isEmpty()) { + return empty(); + } + + // compute the new universe + final Set newAllTransactions = new LinkedHashSet(involvedTransactions()); + final Set newAllItems = new LinkedHashSet(involvedItems()); + if (newAllTransactions.size() == allTransactions.size() + && newAllItems.size() == allItems.size()) { + return clone(); + } + + // compute the union of pairs + PairSet res = new PairSet( + matrix.empty(), + newAllTransactions, + newAllItems + ); + res.addAll(this); + return res; + } + + +// // +// // COMPRESSED OBJECT SERIALIZATION +// // +// +// private static class ZipObjectOutputStream extends ObjectOutputStream { +// private GZIPOutputStream out; +// ZipObjectOutputStream(ObjectOutputStream out) throws IOException {this(new GZIPOutputStream(out));} +// ZipObjectOutputStream(GZIPOutputStream out) throws IOException {super(out); this.out = out;} +// @Override public void close() throws IOException {out.flush(); out.finish();} +// } +// +// private static class ZipObjectInputStream extends ObjectInputStream { +// ZipObjectInputStream(ObjectInputStream in) throws IOException {super(new GZIPInputStream(in));} +// } +// +// private void writeObject(ObjectOutputStream out) throws IOException { +// if (out instanceof ZipObjectOutputStream) { +// out.defaultWriteObject(); +// } else { +// ObjectOutputStream oos = new ZipObjectOutputStream(out); +// oos.writeObject(this); +// oos.close(); +// } +// } +// +// private transient Object serialize; +// +// @SuppressWarnings("unused") +// private Object readResolve() throws ObjectStreamException { +// if (serialize == null) +// serialize = this; +// return serialize; +// } +// +// private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { +// if (in instanceof ZipObjectInputStream) { +// in.defaultReadObject(); +// } else { +// ObjectInputStream ois = new ZipObjectInputStream(in); +// serialize = ois.readObject(); +// } +// } +} diff --git a/extendedset/src/test/java/io/druid/extendedset/Debug.java b/extendedset/src/test/java/io/druid/extendedset/Debug.java new file mode 100755 index 00000000000..7344af17e94 --- /dev/null +++ b/extendedset/src/test/java/io/druid/extendedset/Debug.java @@ -0,0 +1,1858 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.druid.extendedset; + +import io.druid.extendedset.ExtendedSet.ExtendedIterator; +import io.druid.extendedset.intset.AbstractIntSet; +import io.druid.extendedset.intset.ArraySet; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.FastSet; +import io.druid.extendedset.intset.HashIntSet; +import io.druid.extendedset.intset.IntSet; +import io.druid.extendedset.utilities.IntSetStatistics; +import io.druid.extendedset.utilities.random.MersenneTwister; +import io.druid.extendedset.wrappers.GenericExtendedSet; +import io.druid.extendedset.wrappers.IndexedSet; +import io.druid.extendedset.wrappers.IntegerSet; +import io.druid.extendedset.wrappers.matrix.BinaryMatrix; +import io.druid.extendedset.wrappers.matrix.BinaryMatrix.CellIterator; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + +//import it.uniroma3.mat.extendedset.intset.Concise2Set; + + +/** + * Test class for {@link ConciseSet}, {@link FastSet}, and {@link IndexedSet}. + * + * @author Alessandro Colantonio + * @version $Id: Debug.java 155 2011-05-30 22:27:00Z cocciasik $ + */ +public class Debug +{ + /** + * Checks if a {@link ExtendedSet} instance and a {@link TreeSet} instance + * contains the same elements. {@link TreeSet} is used because it is the + * most similar class to {@link ExtendedSet}. + * + * @param type of elements within the set + * @param bits bit-set to check + * @param items {@link TreeSet} instance that must contain the same elements + * of the bit-set + * + * @return true if the given {@link ConciseSet} and + * {@link TreeSet} are equals in terms of contained elements + */ + private static boolean checkContent(ExtendedSet bits, SortedSet items) + { + if (bits.size() != items.size()) { + return false; + } + if (bits.isEmpty() && items.isEmpty()) { + return true; + } + for (T i : bits) { + if (!items.contains(i)) { + return false; + } + } + for (T i : items) { + if (!bits.contains(i)) { + return false; + } + } + if (!bits.last().equals(items.last())) { + return false; + } + if (!bits.first().equals(items.first())) { + return false; + } + return true; + } + + /** + * Generates an empty set of the specified class + * + * @param c the given class + * + * @return the empty set + */ + private static > X empty(Class c) + { + try { + return c.newInstance(); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Stress test for {@link ConciseSet#add(Integer)} + *

+ * It starts from a very sparse set (most of the words will be 0's + * sequences) and progressively become very dense (words first + * become 0's sequences with 1 set bit and there will be almost one + * word per item, then words become literals, and finally they + * become 1's sequences and drastically reduce in number) + */ + private static void testForAdditionStress(Class> c) + { + ExtendedSet previousBits = empty(c); + ExtendedSet currentBits = empty(c); + TreeSet currentItems = new TreeSet(); + + Random rnd = new MersenneTwister(); + + // add 100000 random numbers + for (int i = 0; i < 100000; i++) { + // random number to add + int item = rnd.nextInt(10000 + 1); + + // keep the previous results + previousBits = currentBits; + currentBits = currentBits.clone(); + + // add the element + System.out.format("Adding %d...\n", item); + boolean itemExistsBefore = currentItems.contains(item); + boolean itemAdded = currentItems.add(item); + boolean itemExistsAfter = currentItems.contains(item); + boolean bitExistsBefore = currentBits.contains(item); + boolean bitAdded = currentBits.add(item); + boolean bitExistsAfter = currentBits.contains(item); + if (itemAdded ^ bitAdded) { + System.out.println("wrong add() result"); + return; + } + if (itemExistsBefore ^ bitExistsBefore) { + System.out.println("wrong contains() before"); + return; + } + if (itemExistsAfter ^ bitExistsAfter) { + System.out.println("wrong contains() after"); + return; + } + + // check the list of elements + if (!checkContent(currentBits, currentItems)) { + System.out.println("add() error"); + System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); + System.out.println("\tcorrect: " + currentItems.toString()); + System.out.println("\twrong: " + currentBits.toString()); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + + // check the representation + ExtendedSet otherBits = previousBits.convert(currentItems); + if (otherBits.hashCode() != currentBits.hashCode()) { + System.out.println("Representation error"); + System.out.println(currentBits.debugInfo()); + System.out.println(otherBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + + // check the union size + ExtendedSet singleBitSet = empty(c); + singleBitSet.add(item); + if (currentItems.size() != currentBits.unionSize(singleBitSet)) { + System.out.println("Size error"); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + } + + System.out.println("Final"); + System.out.println(currentBits.debugInfo()); + + System.out.println(); + System.out.println(IntSetStatistics.summary()); + } + + /** + * Stress test for {@link ConciseSet#remove(Object)} + *

+ * It starts from a very dense set (most of the words will be 1's + * sequences) and progressively become very sparse (words first + * become 1's sequences with 1 unset bit and there will be few + * words per item, then words become literals, and finally they + * become 0's sequences and drastically reduce in number) + * + * @param c class to test + */ + private static void testForRemovalStress(Class> c) + { + ExtendedSet previousBits = empty(c); + ExtendedSet currentBits = empty(c); + TreeSet currentItems = new TreeSet(); + + Random rnd = new MersenneTwister(); + + // create a 1-filled bitset + currentBits.add((1 << MatrixIntSet.COL_POW) * 5 - 1); + currentBits.complement(); + currentItems.addAll(currentBits); + if (currentItems.size() != (1 << MatrixIntSet.COL_POW) * 5 - 1) { + System.out.println("Unexpected error!"); + System.out.println(currentBits.size()); + System.out.println(currentItems.size()); + return; + } + + // remove 100000 random numbers + for (int i = 0; i < 100000 & !currentBits.isEmpty(); i++) { + // random number to remove + int item = rnd.nextInt(10000 + 1); + + // keep the previous results + previousBits = currentBits; + currentBits = currentBits.clone(); + + // remove the element + System.out.format("Removing %d...\n", item); + boolean itemExistsBefore = currentItems.contains(item); + boolean itemRemoved = currentItems.remove(item); + boolean itemExistsAfter = currentItems.contains(item); + boolean bitExistsBefore = currentBits.contains(item); + boolean bitRemoved = currentBits.remove(item); + boolean bitExistsAfter = currentBits.contains(item); + if (itemRemoved ^ bitRemoved) { + System.out.println("wrong remove() result"); + return; + } + if (itemExistsBefore ^ bitExistsBefore) { + System.out.println("wrong contains() before"); + return; + } + if (itemExistsAfter ^ bitExistsAfter) { + System.out.println("wrong contains() after"); + return; + } + + // check the list of elements + if (!checkContent(currentBits, currentItems)) { + System.out.println("remove() error"); + System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + + return; + } + + // check the representation + ExtendedSet otherBits = empty(c); + otherBits.addAll(currentItems); + if (otherBits.hashCode() != currentBits.hashCode()) { + System.out.println("Representation error"); + System.out.println(currentBits.debugInfo()); + System.out.println(otherBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + + return; + } + + // check the union size + ExtendedSet singleBitSet = empty(c); + singleBitSet.add(item); + if (currentItems.size() != currentBits.differenceSize(singleBitSet)) { + System.out.println("Size error"); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + + return; + } + } + + System.out.println("Final"); + System.out.println(currentBits.debugInfo()); + + System.out.println(); + System.out.println(IntSetStatistics.summary()); + } + + /** + * Random operations on random sets. + *

+ * It randomly chooses among {@link ConciseSet#addAll(Collection)}, + * {@link ConciseSet#removeAll(Collection)}, and + * {@link ConciseSet#retainAll(Collection)}, and perform the operation over + * random sets + * + * @param c class to test + */ + private static void testForRandomOperationsStress(Class> c, boolean testFillAndClear) + { + ExtendedSet bitsLeft = empty(c); + ExtendedSet bitsRight = empty(c); + SortedSet itemsLeft = new TreeSet(); + SortedSet itemsRight = new TreeSet(); + + Random r = new MersenneTwister(); + final int maxCardinality = 1000; + + // random operation loop + for (int i = 0; i < 1000000; i++) { + System.out.format("Test %,d (%,d): ", i, System.currentTimeMillis()); + + RandomNumbers rn; + switch (r.nextInt(3)) { + case 0: + rn = new RandomNumbers.Uniform( + r.nextInt(maxCardinality), + r.nextDouble() * 0.999, + r.nextInt(maxCardinality / 10) + ); + break; + case 1: + rn = new RandomNumbers.Zipfian( + r.nextInt(maxCardinality), + r.nextDouble() * 0.9, + r.nextInt(maxCardinality / 10), + 2 + ); + break; + case 2: + rn = new RandomNumbers.Markovian( + r.nextInt(maxCardinality), + r.nextDouble() * 0.999, + r.nextInt(maxCardinality / 10) + ); + break; + default: + throw new RuntimeException("unexpected"); + } + + /* + * fill() and clear() + */ + if (testFillAndClear) { + bitsRight.clear(); + itemsRight.clear(); + Iterator itr1 = rn.generate().iterator(); + Iterator itr2 = rn.generate().iterator(); + while (itr1.hasNext() && itr2.hasNext()) { + ExtendedSet clone = bitsRight.clone(); + Integer from = itr1.next(); + Integer to = itr2.next(); + if (from.compareTo(to) > 0) { + Integer s = from; + from = to; + to = s; + } + + boolean fill = r.nextBoolean(); + if (fill) { + for (int j = from; j <= to; j++) { + itemsRight.add(j); + } + bitsRight.fill(from, to); + } else { + for (int j = from; j <= to; j++) { + itemsRight.remove(j); + } + bitsRight.clear(from, to); + } + + if (!checkContent(bitsLeft, itemsLeft)) { + System.out.println("FILL/CLEAR ERROR!"); + System.out.println("Same elements: " + (itemsLeft.toString().equals(bitsLeft.toString()))); + System.out.println("itemsLeft:"); + System.out.println(itemsLeft); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + + System.out.println("itemsLeft.size(): " + itemsLeft.size() + " ?= bitsLeft.size(): " + bitsLeft.size()); + for (Integer x : bitsLeft) { + if (!itemsLeft.contains(x)) { + System.out.println("itemsLeft does not contain " + x); + } + } + for (Integer x : itemsLeft) { + if (!bitsLeft.contains(x)) { + System.out.println("itemsLeft does not contain " + x); + } + } + System.out.println("bitsLeft.last(): " + bitsLeft.last() + " ?= itemsLeft.last(): " + itemsLeft.last()); + System.out.println("bitsLeft.first(): " + bitsLeft.first() + " ?= itemsLeft.first(): " + itemsLeft.first()); + + return; + } + ExtendedSet app = empty(c); + app.addAll(itemsRight); + if (bitsRight.hashCode() != app.hashCode()) { + System.out.println("FILL/CLEAR FORMAT ERROR!"); + System.out.println("fill: " + fill); + System.out.println("from " + from + " to " + to); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("Append:"); + System.out.println(app.debugInfo()); + System.out.println("Clone:"); + System.out.println(clone.debugInfo()); + return; + } + } + } + + + /* + * contains(), add(), and remove() + */ + bitsRight.clear(); + itemsRight.clear(); + for (Integer e : rn.generate()) { + if (itemsRight.contains(e) ^ bitsRight.contains(e)) { + System.out.println("CONTAINS ERROR!"); + System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); + System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + return; + } + ExtendedSet clone = bitsRight.clone(); + boolean resItems = itemsRight.add(e); + boolean resBits = bitsRight.add(e); + ExtendedSet app = empty(c); + app.addAll(itemsRight); + if (bitsRight.hashCode() != app.hashCode()) { + System.out.println("ADD ERROR!"); + System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); + System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("Append:"); + System.out.println(app.debugInfo()); + System.out.println("Clone:"); + System.out.println(clone.debugInfo()); + return; + } + if (resItems != resBits) { + System.out.println("ADD BOOLEAN ERROR!"); + System.out.println("itemsRight.add(" + e + "): " + resItems); + System.out.println("bitsRight.add(" + e + "): " + resBits); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + return; + } + } + for (Integer e : rn.generate()) { + ExtendedSet clone = bitsRight.clone(); + boolean resItems = itemsRight.remove(e); + boolean resBits = bitsRight.remove(e); + ExtendedSet app = empty(c); + app.addAll(itemsRight); + if (bitsRight.hashCode() != app.hashCode()) { + System.out.println("REMOVE ERROR!"); + System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); + System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("Append:"); + System.out.println(app.debugInfo()); + System.out.println("Clone:"); + System.out.println(clone.debugInfo()); + return; + } + if (resItems != resBits) { + System.out.println("REMOVE BOOLEAN ERROR!"); + System.out.println("itemsRight.remove(" + e + "): " + resItems); + System.out.println("bitsRight.remove(" + e + "): " + resBits); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("Clone:"); + System.out.println(clone.debugInfo()); + return; + } + } + for (Integer e : rn.generate()) { + ExtendedSet clone = bitsRight.clone(); + if (!itemsRight.remove(e)) { + itemsRight.add(e); + } + bitsRight.flip(e); + ExtendedSet app = empty(c); + app.addAll(itemsRight); + if (bitsRight.hashCode() != app.hashCode()) { + System.out.println("FLIP ERROR!"); + System.out.println("itemsRight.contains(" + e + "): " + itemsRight.contains(e)); + System.out.println("bitsRight.contains(" + e + "): " + bitsRight.contains(e)); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("Append:"); + System.out.println(app.debugInfo()); + System.out.println("Clone:"); + System.out.println(clone.debugInfo()); + return; + } + } + + // new right operand + itemsRight = rn.generate(); + bitsRight.clear(); + bitsRight.addAll(itemsRight); + + /* + * check for content correctness, first(), and last() + */ + if (!checkContent(bitsRight, itemsRight)) { + System.out.println("RIGHT OPERAND ERROR!"); + System.out.println("Same elements: " + (itemsRight.toString().equals(bitsRight.toString()))); + System.out.println("itemsRight:"); + System.out.println(itemsRight); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + + System.out.println("itemsRight.size(): " + itemsRight.size() + " ?= bitsRight.size(): " + bitsRight.size()); + for (Integer x : bitsRight) { + if (!itemsRight.contains(x)) { + System.out.println("itemsRight does not contain " + x); + } + } + for (Integer x : itemsRight) { + if (!bitsRight.contains(x)) { + System.out.println("itemsRight does not contain " + x); + } + } + System.out.println("bitsRight.last(): " + bitsRight.last() + " ?= itemsRight.last(): " + itemsRight.last()); + System.out.println("bitsRight.first(): " + bitsRight.first() + " ?= itemsRight.first(): " + itemsRight.first()); + + return; + } + + /* + * containsAll() + */ + boolean bitsRes = bitsLeft.containsAll(bitsRight); + boolean itemsRes = itemsLeft.containsAll(itemsRight); + if (bitsRes != itemsRes) { + System.out.println("CONTAINS_ALL ERROR!"); + System.out.println("bitsLeft.containsAll(bitsRight): " + bitsRes); + System.out.println("itemsLeft.containsAll(itemsRight): " + itemsRes); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("bitsLeft.intersection(bitsRight)"); + System.out.println(bitsLeft.intersection(bitsRight)); + System.out.println("itemsLeft.retainAll(itemsRight)"); + itemsLeft.retainAll(itemsRight); + System.out.println(itemsLeft); + return; + } + + /* + * containsAny() + */ + bitsRes = bitsLeft.containsAny(bitsRight); + itemsRes = true; + for (Integer x : itemsRight) { + itemsRes = itemsLeft.contains(x); + if (itemsRes) { + break; + } + } + if (bitsRes != itemsRes) { + System.out.println("bitsLeft.containsAny(bitsRight): " + bitsRes); + System.out.println("itemsLeft.containsAny(itemsRight): " + itemsRes); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("bitsLeft.intersection(bitsRight)"); + System.out.println(bitsLeft.intersection(bitsRight)); + System.out.println("itemsLeft.retainAll(itemsRight)"); + itemsLeft.retainAll(itemsRight); + System.out.println(itemsLeft); + return; + } + + /* + * containsAtLeast() + */ + int l = 1 + r.nextInt(bitsRight.size() + 1); + bitsRes = bitsLeft.containsAtLeast(bitsRight, l); + int itemsResCnt = 0; + for (Integer x : itemsRight) { + if (itemsLeft.contains(x)) { + itemsResCnt++; + } + if (itemsResCnt >= l) { + break; + } + } + if (bitsRes != (itemsResCnt >= l)) { + System.out.println("bitsLeft.containsAtLeast(bitsRight, " + l + "): " + bitsRes); + System.out.println("itemsLeft.containsAtLeast(itemsRight, " + l + "): " + (itemsResCnt >= l)); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + System.out.println("bitsRight:"); + System.out.println(bitsRight.debugInfo()); + System.out.println("bitsLeft.intersection(bitsRight)"); + System.out.println(bitsLeft.intersection(bitsRight)); + System.out.println("itemsLeft.retainAll(itemsRight)"); + itemsLeft.retainAll(itemsRight); + System.out.println(itemsLeft); + return; + } + + /* + * Perform a random operation with the previous set: + * addAll() and unionSize() + * removeAll() and differenceSize() + * retainAll() and intersectionSize() + * symmetricDifference() and symmetricDifferenceSize() + * complement() and complementSize() + */ + ExtendedSet alternative = null; + int operationSize = 0; + boolean resItems = true, resBits = true; + switch (1 + r.nextInt(5)) { + case 1: + System.out.format(" union of %d elements with %d elements... ", itemsLeft.size(), itemsRight.size()); + System.out.flush(); + operationSize = bitsLeft.unionSize(bitsRight); + resItems = itemsLeft.addAll(itemsRight); + alternative = bitsLeft.union(bitsRight); + resBits = bitsLeft.addAll(bitsRight); + break; + + case 2: + System.out.format(" difference of %d elements with %d elements... ", itemsLeft.size(), itemsRight.size()); + System.out.flush(); + operationSize = bitsLeft.differenceSize(bitsRight); + resItems = itemsLeft.removeAll(itemsRight); + alternative = bitsLeft.difference(bitsRight); + resBits = bitsLeft.removeAll(bitsRight); + break; + + case 3: + System.out.format(" intersection of %d elements with %d elements... ", itemsLeft.size(), itemsRight.size()); + System.out.flush(); + operationSize = bitsLeft.intersectionSize(bitsRight); + resItems = itemsLeft.retainAll(itemsRight); + alternative = bitsLeft.intersection(bitsRight); + resBits = bitsLeft.retainAll(bitsRight); + break; + + case 4: + System.out.format( + " symmetric difference of %d elements with %d elements... ", + itemsLeft.size(), + itemsRight.size() + ); + System.out.flush(); + operationSize = bitsLeft.symmetricDifferenceSize(bitsRight); + TreeSet temp = new TreeSet(itemsRight); + temp.removeAll(itemsLeft); + itemsLeft.removeAll(itemsRight); + itemsLeft.addAll(temp); + bitsLeft = bitsLeft.symmetricDifference(bitsRight); + alternative = bitsLeft; + break; + + case 5: + System.out.format(" complement of %d elements... ", itemsLeft.size()); + System.out.flush(); + operationSize = bitsLeft.complementSize(); + if (!itemsLeft.isEmpty()) { + if ((bitsLeft instanceof IntegerSet) && (((IntegerSet) bitsLeft).intSet() instanceof MatrixIntSet)) { + BinaryMatrix m = ((MatrixIntSet) ((IntegerSet) bitsLeft).intSet()).matrix; + int x = m.maxCol(); + for (int rx = m.maxRow(); rx >= 0; rx--) { + for (int cx = x; cx >= 0; cx--) { + if (!itemsLeft.add(MatrixIntSet.toInt(rx, cx))) { + itemsLeft.remove(MatrixIntSet.toInt(rx, cx)); + } + } + } + } else { + for (int j = itemsLeft.last(); j >= 0; j--) { + if (!itemsLeft.add(j)) { + itemsLeft.remove(j); + } + } + } + } + bitsLeft.complement(); + alternative = bitsLeft; + break; + default: + throw new RuntimeException("Unexpected error!"); + } + + // check the list of elements + if (!checkContent(bitsLeft, itemsLeft)) { + System.out.println("OPERATION ERROR!"); + System.out.println("Same elements: " + (itemsLeft.toString().equals(bitsLeft.toString()))); + System.out.println("itemsLeft:"); + System.out.println(itemsLeft); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + + System.out.println("itemsLeft.size(): " + itemsLeft.size() + " ?= bitsLeft.size(): " + bitsLeft.size()); + for (Integer x : bitsLeft) { + if (!itemsLeft.contains(x)) { + System.out.println("itemsLeft does not contain " + x); + } + } + for (Integer x : itemsLeft) { + if (!bitsLeft.contains(x)) { + System.out.println("itemsLeft does not contain " + x); + } + } + System.out.println("bitsLeft.last(): " + bitsLeft.last() + " ?= itemsLeft.last(): " + itemsLeft.last()); + System.out.println("bitsLeft.first(): " + bitsLeft.first() + " ?= itemsLeft.first(): " + itemsLeft.first()); + + return; + } + + // check the size + if (itemsLeft.size() != operationSize) { + System.out.println("OPERATION SIZE ERROR"); + System.out.println("Wrong size: " + operationSize); + System.out.println("Correct size: " + itemsLeft.size()); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + return; + } + + // check the boolean result + if (resItems != resBits) { + System.out.println("OPERATION BOOLEAN ERROR!"); + System.out.println("resItems: " + resItems); + System.out.println("resBits: " + resBits); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + return; + } + + // check the internal representation of the result + ExtendedSet x = bitsLeft.empty(); + x.addAll(itemsLeft); + if (x.hashCode() != bitsLeft.hashCode()) { + System.out.println("Internal representation error!"); + System.out.println("FROM APPEND:"); + System.out.println(x.debugInfo()); + System.out.println("FROM OPERATION:"); + System.out.println(bitsLeft.debugInfo()); + return; + } + + // check similar results + if (!bitsLeft.equals(alternative)) { + System.out.println("ALTERNATIVE OPERATION ERROR!"); + System.out.println("bitsLeft:"); + System.out.println(bitsLeft.debugInfo()); + System.out.println("alternative:"); + System.out.println(alternative.debugInfo()); + return; + } + + System.out.println("done."); + } + } + + /** + * Stress test (addition) for {@link #subSet(Integer, Integer)} + */ + private static void testForSubSetAdditionStress() + { + IntegerSet previousBits = new IntegerSet(new ConciseSet()); + IntegerSet currentBits = new IntegerSet(new ConciseSet()); + TreeSet currentItems = new TreeSet(); + + Random rnd = new MersenneTwister(); + + for (int j = 0; j < 100000; j++) { + // keep the previous result + previousBits = currentBits; + currentBits = currentBits.clone(); + + // generate a new subview + int min = rnd.nextInt(10000); + int max = min + 1 + rnd.nextInt(10000 - (min + 1) + 1); + int item = min + rnd.nextInt((max - 1) - min + 1); + System.out.println("Adding " + item + " to the subview from " + min + " to " + max + " - 1"); + SortedSet subBits = currentBits.subSet(min, max); + SortedSet subItems = currentItems.subSet(min, max); + boolean subBitsResult = subBits.add(item); + boolean subItemsResult = subItems.add(item); + + if (subBitsResult != subItemsResult + || subBits.size() != subItems.size() + || !subBits.toString().equals(subItems.toString())) { + System.out.println("Subset error!"); + return; + } + + if (!checkContent(currentBits, currentItems)) { + System.out.println("Subview not correct!"); + System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + + // check the representation + IntegerSet otherBits = new IntegerSet(new ConciseSet()); + otherBits.addAll(currentItems); + if (otherBits.hashCode() != currentBits.hashCode()) { + System.out.println("Representation not correct!"); + System.out.println(currentBits.debugInfo()); + System.out.println(otherBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + } + + System.out.println(currentBits.debugInfo()); + System.out.println(IntSetStatistics.summary()); + } + + /** + * Stress test (addition) for {@link ConciseSet#subSet(Integer, Integer)} + */ + private static void testForSubSetRemovalStress() + { + IntegerSet previousBits = new IntegerSet(new ConciseSet()); + IntegerSet currentBits = new IntegerSet(new ConciseSet()); + TreeSet currentItems = new TreeSet(); + + // create a 1-filled bitset + currentBits.add(10001); + currentBits.complement(); + currentItems.addAll(currentBits); + if (currentItems.size() != 10001) { + System.out.println("Unexpected error!"); + return; + } + + Random rnd = new MersenneTwister(); + + for (int j = 0; j < 100000; j++) { + // keep the previous result + previousBits = currentBits; + currentBits = currentBits.clone(); + + // generate a new subview + int min = rnd.nextInt(10000); + int max = min + 1 + rnd.nextInt(10000 - (min + 1) + 1); + int item = rnd.nextInt(10000 + 1); + System.out.println("Removing " + item + " from the subview from " + min + " to " + max + " - 1"); + SortedSet subBits = currentBits.subSet(min, max); + SortedSet subItems = currentItems.subSet(min, max); + boolean subBitsResult = subBits.remove(item); + boolean subItemsResult = subItems.remove(item); + + if (subBitsResult != subItemsResult + || subBits.size() != subItems.size() + || !subBits.toString().equals(subItems.toString())) { + System.out.println("Subset error!"); + return; + } + + if (!checkContent(currentBits, currentItems)) { + System.out.println("Subview not correct!"); + System.out.println("Same elements: " + (currentItems.toString().equals(currentBits.toString()))); + System.out.println("Original: " + currentItems); + System.out.println(currentBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + + // check the representation + IntegerSet otherBits = new IntegerSet(new ConciseSet()); + otherBits.addAll(currentItems); + if (otherBits.hashCode() != currentBits.hashCode()) { + System.out.println("Representation not correct!"); + System.out.println(currentBits.debugInfo()); + System.out.println(otherBits.debugInfo()); + System.out.println(previousBits.debugInfo()); + return; + } + } + + System.out.println(currentBits.debugInfo()); + System.out.println(IntSetStatistics.summary()); + } + + /** + * Random operations on random sub sets. + *

+ * It randomly chooses among all operations and performs the operation over + * random sets + */ + private static void testForSubSetRandomOperationsStress() + { + IntegerSet bits = new IntegerSet(new ConciseSet()); + IntegerSet bitsPrevious = new IntegerSet(new ConciseSet()); + TreeSet items = new TreeSet(); + + Random rnd = new MersenneTwister(); + + // random operation loop + for (int i = 0; i < 100000; i++) { + System.out.print("Test " + i + ": "); + + // new set + bitsPrevious = bits.clone(); + if (!bitsPrevious.toString().equals(bits.toString())) { + throw new RuntimeException("clone() error!"); + } + bits.clear(); + items.clear(); + final int size = 1 + rnd.nextInt(10000); + final int min = 1 + rnd.nextInt(10000 - 1); + final int max = min + rnd.nextInt(10000 - min + 1); + final int minSub = 1 + rnd.nextInt(10000 - 1); + final int maxSub = minSub + rnd.nextInt(10000 - minSub + 1); + for (int j = 0; j < size; j++) { + int item = min + rnd.nextInt(max - min + 1); + bits.add(item); + items.add(item); + } + + // perform base checks + SortedSet bitsSubSet = bits.subSet(minSub, maxSub); + SortedSet itemsSubSet = items.subSet(minSub, maxSub); + if (!bitsSubSet.toString().equals(itemsSubSet.toString())) { + System.out.println("toString() difference!"); + System.out.println("value: " + bitsSubSet.toString()); + System.out.println("actual: " + itemsSubSet.toString()); + return; + } + if (bitsSubSet.size() != itemsSubSet.size()) { + System.out.println("size() difference!"); + System.out.println("value: " + bitsSubSet.size()); + System.out.println("actual: " + itemsSubSet.size()); + System.out.println("bits: " + bits.toString()); + System.out.println("items: " + items.toString()); + System.out.println("bitsSubSet: " + bitsSubSet.toString()); + System.out.println("itemsSubSet: " + itemsSubSet.toString()); + return; + } + if (!itemsSubSet.isEmpty() && (!bitsSubSet.first().equals(itemsSubSet.first()))) { + System.out.println("first() difference!"); + System.out.println("value: " + bitsSubSet.first()); + System.out.println("actual: " + itemsSubSet.first()); + System.out.println("bits: " + bits.toString()); + System.out.println("items: " + items.toString()); + System.out.println("bitsSubSet: " + bitsSubSet.toString()); + System.out.println("itemsSubSet: " + itemsSubSet.toString()); + return; + } + if (!itemsSubSet.isEmpty() && (!bitsSubSet.last().equals(itemsSubSet.last()))) { + System.out.println("last() difference!"); + System.out.println("value: " + bitsSubSet.last()); + System.out.println("actual: " + itemsSubSet.last()); + System.out.println("bits: " + bits.toString()); + System.out.println("items: " + items.toString()); + System.out.println("bitsSubSet: " + bitsSubSet.toString()); + System.out.println("itemsSubSet: " + itemsSubSet.toString()); + return; + } + + // perform the random operation + boolean resBits = false; + boolean resItems = false; + boolean exceptionBits = false; + boolean exceptionItems = false; + switch (1 + rnd.nextInt(4)) { + case 1: + System.out.format(" addAll() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); + try { + resBits = bitsSubSet.addAll(bitsPrevious); + } + catch (Exception e) { + bits.clear(); + System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); + exceptionBits = true; + } + try { + resItems = itemsSubSet.addAll(bitsPrevious); + } + catch (Exception e) { + items.clear(); + System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); + exceptionItems = true; + } + break; + + case 2: + System.out.format(" removeAll() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); + try { + resBits = bitsSubSet.removeAll(bitsPrevious); + } + catch (Exception e) { + bits.clear(); + System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); + exceptionBits = true; + } + try { + resItems = itemsSubSet.removeAll(bitsPrevious); + } + catch (Exception e) { + items.clear(); + System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); + exceptionItems = true; + } + break; + + case 3: + System.out.format(" retainAll() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); + try { + resBits = bitsSubSet.retainAll(bitsPrevious); + } + catch (Exception e) { + bits.clear(); + System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); + exceptionBits = true; + } + try { + resItems = itemsSubSet.retainAll(bitsPrevious); + } + catch (Exception e) { + items.clear(); + System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); + exceptionItems = true; + } + break; + + case 4: + System.out.format(" clear() of %d elements on %d elements... ", bitsPrevious.size(), bits.size()); + try { + bitsSubSet.clear(); + } + catch (Exception e) { + bits.clear(); + System.out.print("\n\tEXCEPTION on bitsSubSet: " + e.getClass() + " "); + exceptionBits = true; + } + try { + itemsSubSet.clear(); + } + catch (Exception e) { + items.clear(); + System.out.print("\n\tEXCEPTION on itemsSubSet: " + e.getClass() + " "); + exceptionItems = true; + } + break; + } + + if (exceptionBits != exceptionItems) { + System.out.println("Incorrect exception!"); + return; + } + + if (resBits != resItems) { + System.out.println("Incorrect results!"); + System.out.println("resBits: " + resBits); + System.out.println("resItems: " + resItems); + return; + } + + if (!checkContent(bits, items)) { + System.out.println("Subview not correct!"); + System.out.format("min: %d, max: %d, minSub: %d, maxSub: %d\n", min, max, minSub, maxSub); + System.out.println("Same elements: " + (items.toString().equals(bits.toString()))); + System.out.println("Original: " + items); + System.out.println(bits.debugInfo()); + System.out.println(bitsPrevious.debugInfo()); + return; + } + + // check the representation + IntegerSet otherBits = new IntegerSet(new ConciseSet()); + otherBits.addAll(items); + if (otherBits.hashCode() != bits.hashCode()) { + System.out.println("Representation not correct!"); + System.out.format("min: %d, max: %d, minSub: %d, maxSub: %d\n", min, max, minSub, maxSub); + System.out.println(bits.debugInfo()); + System.out.println(otherBits.debugInfo()); + System.out.println(bitsPrevious.debugInfo()); + return; + } + + System.out.println("done."); + } + } + + /** + * Test the method {@link ExtendedSet#compareTo(ExtendedSet)} + * + * @param c class to test + */ + private static void testForComparatorSimple(Class> c) + { + ExtendedSet bitsLeft = empty(c); + ExtendedSet bitsRight = empty(c); + + bitsLeft.add(1); + bitsLeft.add(2); + bitsLeft.add(3); + bitsLeft.add(100); + bitsRight.add(1000000); + System.out.println("A: " + bitsLeft); + System.out.println("B: " + bitsRight); + System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); + System.out.println(); + + bitsLeft.add(1000000); + bitsRight.add(1); + bitsRight.add(2); + bitsRight.add(3); + System.out.println("A: " + bitsLeft); + System.out.println("B: " + bitsRight); + System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); + System.out.println(); + + bitsLeft.remove(100); + System.out.println("A: " + bitsLeft); + System.out.println("B: " + bitsRight); + System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); + System.out.println(); + + bitsRight.remove(1); + System.out.println("A: " + bitsLeft); + System.out.println("B: " + bitsRight); + System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); + System.out.println(); + + bitsLeft.remove(1); + bitsLeft.remove(2); + System.out.println("A: " + bitsLeft); + System.out.println("B: " + bitsRight); + System.out.println("A.compareTo(B): " + bitsLeft.compareTo(bitsRight)); + System.out.println(); + } + + /** + * Another test for {@link ExtendedSet#compareTo(ExtendedSet)} + * + * @param c class to test + */ + private static void testForComparatorComplex(Class> c) + { + ExtendedSet bitsLeft = empty(c); + ExtendedSet bitsRight = empty(c); + + Random rnd = new MersenneTwister(); + for (int i = 0; i < 10000; i++) { + // empty numbers + BigInteger correctLeft = BigInteger.ZERO; + BigInteger correctRight = BigInteger.ZERO; + bitsLeft.clear(); + bitsRight.clear(); + + int size = 10 + rnd.nextInt(10000); + RandomNumbers rn; + if (rnd.nextBoolean()) { + rn = new RandomNumbers.Uniform(rnd.nextInt(size), rnd.nextDouble() * 0.999, rnd.nextInt(size / 10)); + } else { + rn = new RandomNumbers.Markovian(rnd.nextInt(size), rnd.nextDouble() * 0.999, rnd.nextInt(size / 10)); + } + bitsLeft.addAll(rn.generate()); + if (rnd.nextBoolean()) { + bitsRight.addAll(bitsLeft); + bitsRight.add(rnd.nextInt(size)); + } else { + bitsRight.addAll(rn.generate()); + } + for (int x : bitsLeft.descending()) { + correctLeft = correctLeft.setBit(x); + } + for (int x : bitsRight) { + correctRight = correctRight.setBit(x); + } + + // compare them! + boolean correct = bitsLeft.compareTo(bitsRight) == correctLeft.compareTo(correctRight); + System.out.println(i + ": " + correct); + if (!correct) { + System.out.println("ERROR!"); + System.out.println("bitsLeft: " + bitsLeft); + System.out.println(" " + bitsLeft.debugInfo()); + System.out.println("bitsRight: " + bitsRight); + System.out.println(" " + bitsRight.debugInfo()); + int maxLength = Math.max(correctLeft.bitLength(), correctRight.bitLength()); + System.out.format("correctLeft.toString(2): %" + maxLength + "s\n", correctLeft.toString(2)); + System.out.format("correctRight.toString(2): %" + maxLength + "s\n", correctRight.toString(2)); + System.out.println("correctLeft.compareTo(correctRight): " + correctLeft.compareTo(correctRight)); + System.out.println("bitsLeft.compareTo(bitsRight): " + bitsLeft.compareTo(bitsRight)); + + Iterator itrLeft = bitsLeft.descendingIterator(); + Iterator itrRight = bitsRight.descendingIterator(); + while (itrLeft.hasNext() && itrRight.hasNext()) { + int l = itrLeft.next(); + int r = itrRight.next(); + if (l != r) { + System.out.println("l != r --> " + l + ", " + r); + break; + } + } + return; + } + } + System.out.println("Done!"); + } + + /** + * Stress test for {@link ExtendedSet#descendingIterator()} + * + * @param c class to test + */ + private static void testForDescendingIterator(Class> c) + { + ExtendedSet bits = empty(c); + + Random rnd = new MersenneTwister(); + for (int i = 0; i < 100000; i++) { + int n = rnd.nextInt(10000); + System.out.print(i + ": add " + n); + bits.add(n); + + Set x = new HashSet(bits); + Set y = new HashSet(); + try { + for (Integer e : bits.descending()) { + y.add(e); + } + } + catch (Exception e) { + System.out.println("\nERROR!"); + System.out.println(e.getMessage()); + System.out.println(bits.debugInfo()); + break; + } + boolean correct = x.equals(y); + System.out.println(" --> " + correct); + if (!correct) { + System.out.println(bits.debugInfo()); + System.out.print("result: "); + for (Integer e : bits.descending()) { + System.out.print(e + ", "); + } + System.out.println(); + break; + } + } + + System.out.println("Done!"); + } + + /** + * Stress test for {@link ConciseSet#get(int)} + * + * @param c class to test + */ + private static void testForPosition(Class> c) + { + ExtendedSet bits = empty(c); + + Random rnd = new MersenneTwister(31); + for (int i = 0; i < 1000; i++) { + // new set + bits.clear(); + final int size = 1 + rnd.nextInt(10000); + final int min = 1 + rnd.nextInt(10000 - 1); + final int max = min + rnd.nextInt(10000 - min + 1); + for (int j = 0; j < size; j++) { + int item = min + rnd.nextInt(max - min + 1); + bits.add(item); + } + + // check correctness + String good = bits.toString(); + StringBuilder other = new StringBuilder(); + int s = bits.size(); + other.append('['); + for (int j = 0; j < s; j++) { + other.append(bits.get(j)); + if (j < s - 1) { + other.append(", "); + } + } + other.append(']'); + + if (good.equals(other.toString())) { + System.out.println(i + ") OK"); + } else { + System.out.println("ERROR"); + System.out.println(bits.debugInfo()); + System.out.println(bits); + System.out.println(other); + return; + } + + int pos = 0; + for (Integer x : bits) { + if (bits.indexOf(x) != pos) { + System.out.println("ERROR! " + pos + " != " + bits.indexOf(x) + " for element " + x); + System.out.println(bits.debugInfo()); + return; + } + pos++; + } + } + } + + /** + * Test for {@link ExtendedIterator#skipAllBefore(Object)} + * + * @param c class to test + */ + private static void testForSkip(Class> c) + { + ExtendedSet bits = empty(c); + + Random rnd = new MersenneTwister(31); + for (int i = 0; i < 10000; i++) { + int max = rnd.nextInt(10000); + bits = bits.convert(new RandomNumbers.Uniform( + rnd.nextInt(1000), + rnd.nextDouble() * 0.999, + rnd.nextInt(100) + ).generate()); + + for (int j = 0; j < 100; j++) { + int skip = rnd.nextInt(max + 1); + boolean reverse = rnd.nextBoolean(); + System.out.format("%d) size=%d, skip=%d, reverse=%b ---> ", (i * 100) + j + 1, bits.size(), skip, reverse); + + ExtendedIterator itr1, itr2; + if (!reverse) { + itr1 = bits.iterator(); + itr2 = bits.iterator(); + while (itr1.hasNext() && itr1.next() < skip) {/* nothing */} + } else { + itr1 = bits.descendingIterator(); + itr2 = bits.descendingIterator(); + while (itr1.hasNext() && itr1.next() > skip) {/* nothing */} + } + if (!itr1.hasNext()) { + System.out.println("Skipped!"); + continue; + } + itr2.skipAllBefore(skip); + itr2.next(); + Integer i1, i2; + if (!(i1 = itr1.next()).equals(i2 = itr2.next())) { + System.out.println("Error!"); + System.out.println("i1 = " + i1); + System.out.println("i2 = " + i2); + System.out.println(bits.debugInfo()); + return; + } + System.out.println("OK!"); + } + } + System.out.println("Done!"); + } + + /** + * Test launcher + * + * @param args ID of the test to execute + */ + public static void main(String[] args) + { + // NOTE: the most complete test is TestCase.RANDOM_OPERATION_STRESS +// TestCase testCase = TestCase.ADDITION_STRESS; +// TestCase testCase = TestCase.REMOVAL_STRESS; +// TestCase testCase = TestCase.RANDOM_OPERATION_STRESS; +// TestCase testCase = TestCase.FILL_CLEAR_STRESS; +// TestCase testCase = TestCase.SKIP; + TestCase testCase = TestCase.POSITION; +// TestCase testCase = TestCase.COMPARATOR_COMPLEX; +// TestCase testCase = TestCase.DESCENDING_ITERATOR; + +// Class> classToTest = IntegerHashSet.class; +// Class> classToTest = IntegerFastSet.class; +// Class> classToTest = IntegerConciseSet.class; +// Class> classToTest = IntegerConcise2Set.class; +// Class> classToTest = IntegerConcisePlusSet.class; +// Class> classToTest = IntegerWAHSet.class; +// Class> classToTest = ListSet.class; +// Class> classToTest = LinkedSet.class; + Class> classToTest = MatrixSet.class; + + if (args != null && args.length > 0) { + try { + testCase = TestCase.values()[Integer.parseInt(args[0])]; + } + catch (NumberFormatException ignore) { + // nothing to do + } + } + + switch (testCase) { + case ADDITION_STRESS: + testForAdditionStress(classToTest); + break; + case REMOVAL_STRESS: + testForRemovalStress(classToTest); + break; + case RANDOM_OPERATION_STRESS: + testForRandomOperationsStress(classToTest, false); + break; + case FILL_CLEAR_STRESS: + testForRandomOperationsStress(classToTest, true); + break; + case SUBSET_ADDITION_STRESS_CONCISESET: + testForSubSetAdditionStress(); + break; + case SUBSET_REMOVAL_STRESS_CONCISESET: + testForSubSetRemovalStress(); + break; + case SUBSET_RANDOM_OPERATION_STRESS_CONCISESET: + testForSubSetRandomOperationsStress(); + break; + case COMPARATOR_SIMPLE: + testForComparatorSimple(classToTest); + break; + case COMPARATOR_COMPLEX: + testForComparatorComplex(classToTest); + break; + case DESCENDING_ITERATOR: + testForDescendingIterator(classToTest); + break; + case POSITION: + testForPosition(classToTest); + break; + case SKIP: + testForSkip(classToTest); + } + } + + /** + * @author alessandrocolantonio + */ + private enum TestCase + { + /** + * @uml.property name="aDDITION_STRESS" + * @uml.associationEnd + */ + ADDITION_STRESS, + /** + * @uml.property name="rEMOVAL_STRESS" + * @uml.associationEnd + */ + REMOVAL_STRESS, + /** + * @uml.property name="rANDOM_OPERATION_STRESS" + * @uml.associationEnd + */ + RANDOM_OPERATION_STRESS, + /** + * @uml.property name="fILL_CLEAR_STRESS" + * @uml.associationEnd + */ + FILL_CLEAR_STRESS, + /** + * @uml.property name="sUBSET_ADDITION_STRESS_CONCISESET" + * @uml.associationEnd + */ + SUBSET_ADDITION_STRESS_CONCISESET, + /** + * @uml.property name="sUBSET_REMOVAL_STRESS_CONCISESET" + * @uml.associationEnd + */ + SUBSET_REMOVAL_STRESS_CONCISESET, + /** + * @uml.property name="sUBSET_RANDOM_OPERATION_STRESS_CONCISESET" + * @uml.associationEnd + */ + SUBSET_RANDOM_OPERATION_STRESS_CONCISESET, + /** + * @uml.property name="cOMPARATOR_SIMPLE" + * @uml.associationEnd + */ + COMPARATOR_SIMPLE, + /** + * @uml.property name="cOMPARATOR_COMPLEX" + * @uml.associationEnd + */ + COMPARATOR_COMPLEX, + /** + * @uml.property name="dESCENDING_ITERATOR" + * @uml.associationEnd + */ + DESCENDING_ITERATOR, + /** + * @uml.property name="pOSITION" + * @uml.associationEnd + */ + POSITION, + /** + * @uml.property name="sKIP" + * @uml.associationEnd + */ + SKIP,; + } + + @SuppressWarnings("unused") + private static class ListSet extends GenericExtendedSet + { + ListSet() + { + super(ArrayList.class); + } + } + + @SuppressWarnings("unused") + private static class LinkedSet extends GenericExtendedSet + { + LinkedSet() + { + super(LinkedList.class); + } + } + + @SuppressWarnings("unused") + private static class IntegerHashSet extends IntegerSet + { + IntegerHashSet() {super(new IntSetStatistics(new HashIntSet()));} + } + + @SuppressWarnings("unused") + private static class IntegerFastSet extends IntegerSet + { + IntegerFastSet() {super(new IntSetStatistics(new FastSet()));} + } + + @SuppressWarnings("unused") + private static class IntegerConciseSet extends IntegerSet + { + IntegerConciseSet() {super(new IntSetStatistics(new ConciseSet()));} + } + + // @SuppressWarnings("unused") +// private static class IntegerConcise2Set extends IntegerSet {IntegerConcise2Set() {super(new IntSetStatistics(new Concise2Set()));}} + @SuppressWarnings("unused") + private static class IntegerWAHSet extends IntegerSet + { + IntegerWAHSet() {super(new IntSetStatistics(new ConciseSet(true)));} + } + + @SuppressWarnings("unused") + private static class IntegerArraySet extends IntegerSet + { + IntegerArraySet() {super(new IntSetStatistics(new ArraySet()));} + } + + // @SuppressWarnings("unused") + private static class MatrixSet extends IntegerSet + { + MatrixSet() {super(new MatrixIntSet());} + } + + /** + * @author alessandrocolantonio + */ + final static class MatrixIntSet extends AbstractIntSet + { + final static int COL_POW = 10; + /** + * @uml.property name="matrix" + * @uml.associationEnd + */ + BinaryMatrix matrix = new BinaryMatrix(new FastSet()); + + final static int toInt(int row, int col) {return (row << COL_POW) + col;} + + final static int toRow(int index) {return index >>> COL_POW;} + + final static int toCol(int index) {return index & (0xFFFFFFFF >>> -COL_POW);} + + IntSet convert(BinaryMatrix m) + { + MatrixIntSet res = new MatrixIntSet(); + res.matrix = m; + return res; + } + + BinaryMatrix convert(IntSet s) + { + return ((MatrixIntSet) s).matrix; + } + + @Override + public IntSet convert(int... a) + { + MatrixIntSet res = new MatrixIntSet(); + for (int i : a) { + res.add(i); + } + return res; + } + + @Override + public IntSet convert(Collection c) + { + MatrixIntSet res = new MatrixIntSet(); + for (int i : c) { + res.add(i); + } + return res; + } + + @Override + public boolean add(int i) {return matrix.add(toRow(i), toCol(i));} + + @Override + public boolean addAll(IntSet c) {return matrix.addAll(convert(c));} + + @Override + public double bitmapCompressionRatio() {return matrix.bitmapCompressionRatio();} + + @Override + public void clear(int from, int to) {matrix.clear(toRow(from), toCol(from), toRow(to), toCol(to));} + + @Override + public void clear() {matrix.clear();} + + @Override + public double collectionCompressionRatio() {return matrix.collectionCompressionRatio();} + + @Override + public void complement() {matrix.complement();} + + @Override + public int complementSize() {return matrix.complementSize();} + + @Override + public IntSet complemented() {return convert(matrix.complemented());} + + @Override + public boolean contains(int i) {return matrix.contains(toRow(i), toCol(i));} + + @Override + public boolean containsAll(IntSet c) {return matrix.containsAll(convert(c));} + + @Override + public boolean containsAny(IntSet other) {return matrix.containsAny(convert(other));} + + @Override + public boolean containsAtLeast(IntSet other, int minElements) + { + return matrix.containsAtLeast( + convert(other), + minElements + ); + } + + @Override + public IntSet difference(IntSet other) {return convert(matrix.difference(convert(other)));} + + @Override + public int differenceSize(IntSet other) {return matrix.differenceSize(convert(other));} + + @Override + public IntSet empty() {return new MatrixIntSet();} + + @Override + public void fill(int from, int to) {matrix.fill(toRow(from), toCol(from), toRow(to), toCol(to));} + + @Override + public int first() {return toInt(matrix.first()[0], matrix.first()[1]);} + + @Override + public void flip(int e) {matrix.flip(toRow(e), toCol(e));} + + @Override + public int get(int i) {return toInt(matrix.get(i)[0], matrix.get(i)[1]);} + + @Override + public int indexOf(int e) {return matrix.indexOf(toRow(e), toCol(e));} + + @Override + public IntSet intersection(IntSet other) {return convert(matrix.intersection(convert(other)));} + + @Override + public int intersectionSize(IntSet other) {return matrix.intersectionSize(convert(other));} + + @Override + public boolean isEmpty() {return matrix.isEmpty();} + + @Override + public int last() {return toInt(matrix.last()[0], matrix.last()[1]);} + + @Override + public boolean remove(int i) {return matrix.remove(toRow(i), toCol(i));} + + @Override + public boolean removeAll(IntSet c) {return matrix.removeAll(convert(c));} + + @Override + public boolean retainAll(IntSet c) {return matrix.retainAll(convert(c));} + + @Override + public int size() {return matrix.size();} + + @Override + public IntSet symmetricDifference(IntSet other) {return convert(matrix.symmetricDifference(convert(other)));} + + @Override + public int symmetricDifferenceSize(IntSet other) {return matrix.symmetricDifferenceSize(convert(other));} + + @Override + public IntSet union(IntSet other) {return convert(matrix.union(convert(other)));} + + @Override + public int unionSize(IntSet other) {return matrix.unionSize(convert(other));} + + @Override + public int compareTo(IntSet o) {return matrix.compareTo(convert(o));} + + @Override + public double jaccardDistance(IntSet other) {return 0;} + + @Override + public double jaccardSimilarity(IntSet other) {return 0;} + + @Override + public double weightedJaccardDistance(IntSet other) {return 0;} + + @Override + public double weightedJaccardSimilarity(IntSet other) {return 0;} + + @Override + public List powerSet() {return null;} + + @Override + public List powerSet(int min, int max) {return null;} + + @Override + public int powerSetSize() {return 0;} + + @Override + public int powerSetSize(int min, int max) {return 0;} + + @Override + public IntIterator iterator() + { + return new IntIterator() + { + CellIterator itr = matrix.iterator(); + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public int next() + { + int[] c = itr.next(); + return toInt(c[0], c[1]); + } + + @Override + public void skipAllBefore(int element) {itr.skipAllBefore(toRow(element), toCol(element));} + + @Override + public void remove() {itr.remove();} + + @Override + public IntIterator clone() {throw new UnsupportedOperationException();} + }; + } + + @Override + public IntIterator descendingIterator() + { + return new IntIterator() + { + CellIterator itr = matrix.descendingIterator(); + + @Override + public boolean hasNext() {return itr.hasNext();} + + @Override + public int next() + { + int[] c = itr.next(); + return toInt(c[0], c[1]); + } + + @Override + public void skipAllBefore(int element) {itr.skipAllBefore(toRow(element), toCol(element));} + + @Override + public void remove() {itr.remove();} + + @Override + public IntIterator clone() {throw new UnsupportedOperationException();} + }; + } + + @Override + public IntSet clone() + { + MatrixIntSet res = new MatrixIntSet(); + res.matrix = matrix.clone(); + return res; + } + + @Override + public int hashCode() {return matrix.hashCode();} + + @Override + public boolean equals(Object obj) {return matrix.equals(((MatrixIntSet) obj).matrix);} + + @Override + public String debugInfo() + { + return super.toString() + "\n" + matrix.debugInfo(); + } + } +} + diff --git a/extendedset/src/test/java/io/druid/extendedset/Performance.java b/extendedset/src/test/java/io/druid/extendedset/Performance.java new file mode 100755 index 00000000000..9aa99c40da1 --- /dev/null +++ b/extendedset/src/test/java/io/druid/extendedset/Performance.java @@ -0,0 +1,496 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset; + +import io.druid.extendedset.intset.ArraySet; +import io.druid.extendedset.intset.ConciseSet; +import io.druid.extendedset.intset.FastSet; +import io.druid.extendedset.wrappers.GenericExtendedSet; +import io.druid.extendedset.wrappers.IntegerSet; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Locale; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; + +//import it.uniroma3.mat.extendedset.intset.Concise2Set; + +/** + * Class for performance evaluation. + * + * @author Alessandro Colantonio + * @version $Id: Performance.java 155 2011-05-30 22:27:00Z cocciasik $ + */ +public class Performance +{ + /** + * number of times to repeat each test + */ + private final static int REPETITIONS = 5; + /** + * minimum element + */ + private final static int SHIFT = 1000; + /** + * test results + */ + private final static Map, Double>> TIME_VALUES = new TreeMap, Double>>(); + /** + * time measurement, in nanoseconds + */ + private static long lastExecTime = -1; +// private static class IntegerConcise2Set extends IntegerSet {IntegerConcise2Set() {super(new Concise2Set());}} +// private static class IntegerWAHSet extends IntegerSet {IntegerWAHSet() {super(new WAHSet());}} + + /** + * Start time measurement + */ + private static void startTimer() + { + lastExecTime = System.nanoTime(); + } + + /** + * Stop time measurement + * + * @param c class being tested + * @param name method name + * @param div division factor (elapsed time and allocated memory will be + * divided by this number) + */ + private static void endTimer(Class c, String name, long div) + { + // final time + double t = ((double) (System.nanoTime() - lastExecTime)) / div; + Map, Double> measure = TIME_VALUES.get(name); + if (measure == null) { + TIME_VALUES.put(name, measure = new HashMap, Double>()); + } + + Double old = measure.get(c); + if (old == null || old > t) { + measure.put(c, t); + } + } + + /** + * Perform the time test + * + * @param classToTest class of the {@link Collection} instance to test + * @param leftOperand collection of integers representing the left operand + * {@link Collection} + * @param rightOperand collection of integers representing the right operand + * {@link Collection} + */ + @SuppressWarnings("unchecked") + private static void testClass( + Class classToTest, + Collection leftOperand, + Collection rightOperand + ) + { + // collections used for the test cases + Collection[] cAddAndRemove = new Collection[REPETITIONS]; + Collection[] cAddAll = new Collection[REPETITIONS]; + Collection[] cRemoveAll = new Collection[REPETITIONS]; + Collection[] cRetainAll = new Collection[REPETITIONS]; + Collection[] cRighOperand = new Collection[REPETITIONS]; + IntegerSet[] cLeftOperand = new IntegerSet[REPETITIONS]; + IntegerSet[] cUnionResults = new IntegerSet[REPETITIONS]; + IntegerSet[] cDifferenceResults = new IntegerSet[REPETITIONS]; + IntegerSet[] cIntersectionResults = new IntegerSet[REPETITIONS]; + + // CREATION + for (int i = 0; i < REPETITIONS; i++) { + try { + cAddAndRemove[i] = (Collection) classToTest.newInstance(); + cAddAll[i] = (Collection) classToTest.newInstance(); + cRemoveAll[i] = (Collection) classToTest.newInstance(); + cRetainAll[i] = (Collection) classToTest.newInstance(); + cRighOperand[i] = (Collection) classToTest.newInstance(); + cLeftOperand[i] = (IntegerSet) classToTest.newInstance(); + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + // APPEND + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + for (Integer x : rightOperand) { + cRighOperand[i].add(x); + } + for (Integer x : leftOperand) { + cAddAndRemove[i].add(x); + cLeftOperand[i].add(x); + cAddAll[i].add(x); + cRetainAll[i].add(x); + cRemoveAll[i].add(x); + } + endTimer(classToTest, "00) append()", (5 * leftOperand.size() + rightOperand.size())); + } + +// List xxx = new ArrayList(rightOperand); +// List yyy = new ArrayList(leftOperand); +// Collections.shuffle(xxx); +// Collections.shuffle(yyy); +// for (int i = 0; i < REPETITIONS; i++) { +// cRighOperand[i].clear(); +// cAddAndRemove[i].clear(); +// cLeftOperand[i].clear(); +// cAddAll[i].clear(); +// cRetainAll[i].clear(); +// cRemoveAll[i].clear(); +// } +// +// // ADDITION +// for (int i = 0; i < REPETITIONS; i++) { +// startTimer(); +// for (Integer x : xxx) +// cRighOperand[i].add(x); +// for (Integer x : yyy) { +// cAddAndRemove[i].add(x); +// cLeftOperand[i].add(x); +// cAddAll[i].add(x); +// cRetainAll[i].add(x); +// cRemoveAll[i].add(x); +// } +// endTimer(classToTest, "01) add()", (5 * leftOperand.size() + rightOperand.size())); +// } + + // REMOVAL + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + for (Integer x : rightOperand) { + cAddAndRemove[i].remove(x); + } + endTimer(classToTest, "02) remove()", rightOperand.size()); + } + + // CONTAINS + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + for (Integer x : rightOperand) { + cAddAll[i].contains(x); + } + endTimer(classToTest, "03) contains()", rightOperand.size()); + } + + // CONTAINS ALL + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cAddAll[i].containsAll(cRighOperand[i]); + endTimer(classToTest, "04) containsAll()", 1); + } + + // UNION + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cAddAll[i].addAll(cRighOperand[i]); + endTimer(classToTest, "05) addAll()", 1); + } + + // DIFFERENCE + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cRemoveAll[i].removeAll(cRighOperand[i]); + endTimer(classToTest, "06) removeAll()", 1); + } + + // INTERSECTION + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cRetainAll[i].retainAll(cRighOperand[i]); + endTimer(classToTest, "07) retainAll()", 1); + } + + // UNION + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cUnionResults[i] = cLeftOperand[i].union(cRighOperand[i]); + endTimer(classToTest, "08) union()", 1); + } + + // DIFFERENCE + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cDifferenceResults[i] = cLeftOperand[i].difference(cRighOperand[i]); + endTimer(classToTest, "09) difference()", 1); + } + + // INTERSECTION + for (int i = 0; i < REPETITIONS; i++) { + startTimer(); + cIntersectionResults[i] = cLeftOperand[i].intersection(cRighOperand[i]); + endTimer(classToTest, "10) intersection()", 1); + } + } + + /** + * Summary information + */ + private static void printSummary(int cardinality, double density, Class[] classes) + { + for (Entry, Double>> e : TIME_VALUES.entrySet()) { + // method name + System.out.format(Locale.ENGLISH, "%7d\t%.4f\t", cardinality, density); + System.out.print(e.getKey()); + for (Class c : classes) { + Double op = e.getValue().get(c); + System.out.format("\t%12d", (op == null ? 0 : op.intValue())); + } + System.out.println(); + } + } + + /** + * TEST + * + * @param args + */ + public static void main(String[] args) + { + boolean calcMemory = false; + boolean calcTime = true; + + boolean calcUniform = true; + boolean calcMarkovian = false; + boolean calcZipfian = false; + + int minCardinality = 10000; + int maxCardinality = 10000; + + /* + * MEMORY + */ + for (int i = 0; calcMemory && i < 3; i++) { + System.out.println(); + switch (i) { + case 0: + if (!calcUniform) { + continue; + } + System.out.println("#MEMORY UNIFORM"); + break; + case 1: + if (!calcMarkovian) { + continue; + } + System.out.println("#MEMORY MARKOVIAN"); + break; + case 2: + if (!calcZipfian) { + continue; + } + System.out.println("#MEMORY ZIPFIAN"); + break; + default: + throw new RuntimeException("unexpected"); + } + System.out.println("#cardinality\tdensity\tFastSet\tConciseSet\tWAHSet\tConcise2Set"); + for (int cardinality = minCardinality; cardinality <= maxCardinality; cardinality *= 10) { + for (double density = .0001; density < 1D; density *= 1.7) { + System.out.format(Locale.ENGLISH, "%7d\t%.4f\t", cardinality, density); + + Collection integers; + switch (i) { + case 0: + integers = new RandomNumbers.Uniform(cardinality, density, SHIFT).generate(); + break; + case 1: + integers = new RandomNumbers.Markovian(cardinality, density, SHIFT).generate(); + break; + case 2: + integers = new RandomNumbers.Zipfian(cardinality, density, SHIFT, 2).generate(); + break; + default: + throw new RuntimeException("unexpected"); + } + + IntegerSet s0 = new IntegerSet(new FastSet()); + s0.addAll(integers); + System.out.format("%7d\t", (int) (s0.collectionCompressionRatio() * cardinality)); + + IntegerSet s1 = new IntegerSet(new ConciseSet()); + s1.addAll(integers); + System.out.format("%7d\t", (int) (s1.collectionCompressionRatio() * cardinality)); + + IntegerSet s2 = new IntegerSet(new WAHSet()); + s2.addAll(integers); + System.out.format("%7d\t", (int) (s2.collectionCompressionRatio() * cardinality)); + +// IntegerSet s3 = new IntegerSet(new Concise2Set()); +// s3.addAll(integers); +// System.out.format("%7d\n", (int) (s3.collectionCompressionRatio() * cardinality)); + } + } + } + + Class[] classes = new Class[]{ +// ArrayList.class, +// LinkedList.class, +// ArrayListSet.class, +// LinkedListSet.class, +// HashSet.class, +// TreeSet.class, +IntegerArraySet.class, +IntegerFastSet.class, +// IntegerHashSet.class, +// IntegerWAHSet.class, +IntegerConciseSet.class, +// IntegerConcise2Set.class, + }; + + /* + * TIME + */ + for (int i = 0; calcTime && i < 3; i++) { + System.out.println(); + switch (i) { + case 0: + if (!calcUniform) { + continue; + } + System.out.println("#TIME UNIFORM"); + break; + case 1: + if (!calcMarkovian) { + continue; + } + System.out.println("#TIME MARKOVIAN"); + break; + case 2: + if (!calcZipfian) { + continue; + } + System.out.println("#TIME ZIPFIAN"); + break; + default: + throw new RuntimeException("unexpected"); + } + System.out.print("#cardinality\tdensity\toperation"); + for (Class c : classes) { + System.out.print("\t" + c.getSimpleName()); + } + System.out.println(); + for (int cardinality = minCardinality; cardinality <= maxCardinality; cardinality *= 10) { + RandomNumbers r; + switch (i) { + case 0: + r = new RandomNumbers.Uniform(cardinality, 0.5, SHIFT); + break; + case 1: + r = new RandomNumbers.Markovian(cardinality, 0.5, SHIFT); + break; + case 2: + r = new RandomNumbers.Zipfian(cardinality, 0.5, SHIFT, 2); + break; + default: + throw new RuntimeException("unexpected"); + } + Collection x = r.generate(), y = r.generate(); + for (Class c : classes) { + testClass(c, x, y); + testClass(c, x, y); + } + for (double density = .0001; density < 1D; density *= 1.2) { +// for (double density = .0001; density < 1D; density *= 1.7) { +// for (double density = .0041; density < 1D; density *= 1.7) { +// for (double density = 0.8272; density > 0.00005; density /= 1.7) { + switch (i) { + case 0: + r = new RandomNumbers.Uniform(cardinality, density, SHIFT); + break; + case 1: + r = new RandomNumbers.Markovian(cardinality, density, SHIFT); + break; + case 2: + r = new RandomNumbers.Zipfian(cardinality, density, SHIFT, 2); + break; + default: + throw new RuntimeException("unexpected"); + } + x = r.generate(); + y = r.generate(); + for (Class c : classes) { + testClass(c, x, y); + } + printSummary(cardinality, density, classes); + TIME_VALUES.clear(); + } + } + } + + System.out.println("\nDone!"); + } + + /* test classes */ + private static class WAHSet extends ConciseSet + { + private static final long serialVersionUID = -5048707825606872979L; + + WAHSet() {super(true);} + } + + private static class IntegerArraySet extends IntegerSet + { + IntegerArraySet() {super(new ArraySet());} + } + + // private static class IntegerHashSet extends IntegerSet {IntegerHashSet() {super(new HashIntSet());}} + private static class IntegerFastSet extends IntegerSet + { + IntegerFastSet() {super(new FastSet());} + } + + private static class IntegerConciseSet extends IntegerSet + { + IntegerConciseSet() {super(new ConciseSet());} + } + + /** + * Class to test the sorted array + */ + @SuppressWarnings("unused") + private static class ArrayListSet extends GenericExtendedSet + { + ArrayListSet() + { + super(ArrayList.class); + } + } + + /** + * Class to test the sorted linked lists + */ + @SuppressWarnings("unused") + private static class LinkedListSet extends GenericExtendedSet + { + LinkedListSet() + { + super(LinkedList.class); + } + } +} diff --git a/extendedset/src/test/java/io/druid/extendedset/RandomNumbers.java b/extendedset/src/test/java/io/druid/extendedset/RandomNumbers.java new file mode 100755 index 00000000000..d4b85f1a43a --- /dev/null +++ b/extendedset/src/test/java/io/druid/extendedset/RandomNumbers.java @@ -0,0 +1,242 @@ +/* + * (c) 2010 Alessandro Colantonio + * + * + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package io.druid.extendedset; + + +import io.druid.extendedset.utilities.random.MersenneTwister; + +import java.util.Collection; +import java.util.Random; +import java.util.SortedSet; +import java.util.TreeSet; + +/** + * Generation of random integer sets + * + * @author Alessandro Colantonio + * @version $Id: RandomNumbers.java 142 2011-02-15 23:12:28Z cocciasik $ + */ +public abstract class RandomNumbers +{ + /** + * pseudo-random number generator + */ + final private static Random RND = new MersenneTwister(); + + /** + * the smallest integer + */ + protected final int min; + + /** + * number of elements within the set + */ + protected final int cardinality; + + /** + * cardinality to range (i.e., {@link #max} - {@link #min} + 1) ratio + */ + protected final double density; + + /** + * Initializes internal data + * + * @param cardinality number of elements of the set (i.e., result of + * {@link Collection#size()} ) + * @param density cardinality to range ratio + * @param min the smallest integer + */ + private RandomNumbers(int cardinality, double density, int min) + { + // parameter check + if (cardinality < 0) { + throw new IllegalArgumentException("cardinality < 0: " + cardinality); + } + if (density < 0D) { + throw new IllegalArgumentException("density < 0: " + density); + } + if (density > 1D) { + throw new IllegalArgumentException("density > 1: " + density); + } + + this.cardinality = cardinality; + this.density = density; + this.min = min; + } + + /** + * Test + * + * @param args + */ + public static void main(String[] args) + { + int size = 100; + System.out.println(new Uniform(size, 0.1, 0).generate()); + System.out.println(new Uniform(size, 0.9, 0).generate()); + System.out.println(new Zipfian(size, 0.1, 0, 2).generate()); + System.out.println(new Zipfian(size, 0.9, 0, 2).generate()); + System.out.println(new Markovian(size, 0.1, 0).generate()); + System.out.println(new Markovian(size, 0.9, 0).generate()); + } + + /** + * Next integer, according to the given probability distribution + * + * @return next pseudo-random integer + */ + protected abstract int next(); + + /** + * Generates the integer set of pseudo-random numbers + * + * @return the integer set + */ + public SortedSet generate() + { + SortedSet res = new TreeSet(); + while (res.size() < cardinality) { + res.add(next()); + } + return res; + } + + /** + * Integral numbers with uniform distribution. + *

+ * The maximum number will be (cardinality / density) - 1, + * while the average gap between two consecutive numbers will be + * density * cardinality. + */ + public static class Uniform extends RandomNumbers + { + /** + * the greatest integer + */ + private final int max; + + /** + * Initializes internal data + * + * @param cardinality number of elements of the set (i.e., result of + * {@link Collection#size()} ) + * @param density cardinality to range ratio + * @param min the smallest integer + */ + public Uniform(int cardinality, double density, int min) + { + super(cardinality, density, min); + max = min + (int) (Math.round(cardinality / density)) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public int next() + { + return min + RND.nextInt(max - min + 1); + } + } + + /** + * Integral numbers with Zipfian (power-law) distribution. + *

+ * The maximum number will be (cardinality / density) - 1, + * while the average gap between two consecutive numbers will be + * density * cardinality. However, integers will be + * concentrated around the minimum value. + */ + public static class Zipfian extends RandomNumbers + { + /** + * the greatest integer + */ + private final int max; + + /** + * power-law exponent + */ + private final int k; + + /** + * Initializes internal data + * + * @param cardinality number of elements of the set (i.e., result of + * {@link Collection#size()} ) + * @param density cardinality to range ratio + * @param min the smallest integer + * @param k power-law exponent + */ + public Zipfian(int cardinality, double density, int min, int k) + { + super(cardinality, density, min); + this.k = k; + max = min + (int) (Math.round(cardinality / density)) - 1; + } + + /** + * {@inheritDoc} + */ + @Override + public int next() + { + return min + (int) ((max - min + 1) * Math.pow(RND.nextDouble(), k)); + } + } + + /** + * Integral numbers with Markovian distribution. The data will present + * sequences of subsequent integers followed by "gaps". In this case, + * cardinality indicates the probability of switching from a + * sequence to a gap, and vice-versa. For example, density = 0 + * means a set made up of one long sequence of numbers, while + * density = 1 means a set made up of all odd (or even) + * integers. + */ + public static class Markovian extends RandomNumbers + { + private boolean skip = false; + private int next = min; + + /** + * @param cardinality number of elements of the set (i.e., result of + * {@link Collection#size()} ) + * @param density cardinality to range ratio + * @param min the smallest integer + */ + public Markovian(int cardinality, double density, int min) + { + super(cardinality, density, min); + } + + /** + * {@inheritDoc} + */ + @Override + public int next() + { + while (skip ^= RND.nextDouble() < density) { + next++; + } + return min + next++; + } + } +} diff --git a/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java b/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java new file mode 100755 index 00000000000..f2542c1067d --- /dev/null +++ b/extendedset/src/test/java/io/druid/extendedset/intset/ImmutableConciseSetTest.java @@ -0,0 +1,1972 @@ +/* +* Copyright 2012 Metamarkets Group Inc. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package io.druid.extendedset.intset; + +import com.google.common.collect.Lists; +import junit.framework.Assert; +import org.junit.Test; + +import java.nio.IntBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Random; +import java.util.Set; + +/** + */ +public class ImmutableConciseSetTest +{ + public static final int NO_COMPLEMENT_LENGTH = -1; + + @Test + public void testWordIteratorNext1() + { + final int[] ints = {1, 2, 3, 4, 5}; + ConciseSet set = new ConciseSet(); + for (int i : ints) { + set.add(i); + } + ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); + + ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); + Assert.assertEquals(new Integer(0x8000003E), itr.next()); + + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testWordIteratorNext2() + { + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 100000; i++) { + set.add(i); + + } + ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); + + ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); + Assert.assertEquals(new Integer(0x40000C98), itr.next()); + Assert.assertEquals(new Integer(0x81FFFFFF), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + /** + * Advance to middle of a fill + */ + @Test + public void testWordIteratorAdvanceTo1() + { + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 100000; i++) { + set.add(i); + + } + ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); + + ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); + itr.advanceTo(50); + Assert.assertEquals(new Integer(1073744998), itr.next()); + Assert.assertEquals(new Integer(0x81FFFFFF), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + /** + * Advance past a fill directly to a new literal + */ + @Test + public void testWordIteratorAdvanceTo2() + { + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 100000; i++) { + set.add(i); + + } + ImmutableConciseSet iSet = ImmutableConciseSet.newImmutableFromMutable(set); + + ImmutableConciseSet.WordIterator itr = iSet.newWordIterator(); + itr.advanceTo(3225); + Assert.assertEquals(new Integer(0x81FFFFFF), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneLitOneLit() + { + int[] words = {-1, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x40000001), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneLitPureOneFill() + { + int[] words = {-1, 0x40000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x40000005), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneLitDirtyOneFill() + { + int[] words = {-1, 0x42000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(new Integer(0x42000004), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneFillOneLit() + { + int[] words = {0x40000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x40000005), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneFillPureOneFill() + { + int[] words = {0x40000004, 0x40000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x40000009), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactOneFillDirtyOneFill() + { + int[] words = {0x40000004, 0x42000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x40000004), itr.next()); + Assert.assertEquals(new Integer(0x42000004), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroLitZeroLit() + { + int[] words = {0x80000000, 0x80000000, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x00000001), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroLitPureZeroFill() + { + int[] words = {0x80000000, 0x00000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x00000005), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroLitDirtyZeroFill() + { + int[] words = {0x80000000, 0x02000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x80000000), itr.next()); + Assert.assertEquals(new Integer(0x02000004), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroFillZeroLit() + { + int[] words = {0x00000004, 0x80000000, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x00000005), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroFillPureZeroFill() + { + int[] words = {0x00000004, 0x00000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x00000009), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactZeroFillDirtyZeroFill() + { + int[] words = {0x00000004, 0x02000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x00000004), itr.next()); + Assert.assertEquals(new Integer(0x02000004), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleOneBitLitZeroLit() + { + int[] words = {0x80000001, 0x80000000, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x02000001), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactDoubleOneBitLitZeroLit() + { + int[] words = {0x80000003, 0x80000000, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x80000003), itr.next()); + Assert.assertEquals(new Integer(0x80000000), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleOneBitLitPureZeroFill() + { + int[] words = {0x80000001, 0x00000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x02000005), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactDoubleOneBitLitPureZeroFill() + { + int[] words = {0x80000003, 0x00000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x80000003), itr.next()); + Assert.assertEquals(new Integer(0x00000004), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleOneBitLitDirtyZeroFill() + { + int[] words = {0x80000001, 0x02000004, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x80000001), itr.next()); + Assert.assertEquals(new Integer(0x02000004), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleZeroBitLitOneLit() + { + int[] words = {0xFFFFFFFE, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x42000001), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactDoubleZeroBitLitOneLit() + { + int[] words = {0xFFFFFFEE, -1}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0xFFFFFFEE), itr.next()); + Assert.assertEquals(new Integer(-1), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleZeroBitLitPureOneFill() + { + int[] words = {0xFFFFFFFE, 0x40000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0x42000005), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactDoubleZeroBitLitPureOneFill() + { + int[] words = {0xFFFFFFFC, 0x40000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0xFFFFFFFC), itr.next()); + Assert.assertEquals(new Integer(0x40000004), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactSingleZeroBitLitDirtyOneFill() + { + int[] words = {0xFFFFFFFE, 0x42000004}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0xFFFFFFFE), itr.next()); + Assert.assertEquals(new Integer(0x42000004), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + @Test + public void testCompactTwoLiterals() + { + int[] words = {0xFFFFFFFE, 0xFFEFFEFF}; + + ImmutableConciseSet res = ImmutableConciseSet.compact(new ImmutableConciseSet(IntBuffer.wrap(words))); + ImmutableConciseSet.WordIterator itr = res.newWordIterator(); + + Assert.assertEquals(new Integer(0xFFFFFFFE), itr.next()); + Assert.assertEquals(new Integer(0xFFEFFEFF), itr.next()); + Assert.assertEquals(itr.hasNext(), false); + } + + /** + * Set 1: zero literal, zero fill with flipped bit 33, literal + * Set 2: zero literal, zero fill with flipped bit 34, literal + *

+ * Testing merge + */ + @Test + public void testUnion1() + { + final int[] ints1 = {33, 100000}; + final int[] ints2 = {34, 100000}; + List expected = Arrays.asList(33, 34, 100000); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyUnion(expected, sets); + } + + /** + * Set 1: zero literal, zero fill with flipped bit 33, literal + * Set 2: zero literal, zero fill with flipped bit 34, literal + *

+ * Testing merge + */ + @Test + public void testUnion2() + { + final int[] ints1 = {33, 100000}; + final int[] ints2 = {34, 200000}; + List expected = Arrays.asList(33, 34, 100000, 200000); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyUnion(expected, sets); + } + + /** + * Set 1: zero fill, one fill + * Set 2: zero fill, one fill with flipped bit 62 + *

+ * Testing merge + */ + @Test + public void testUnion3() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 62; i < 10001; i++) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 63; i < 10002; i++) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 62; i < 10002; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: zero literal, one fill with flipped bit 62 + * Set 2: zero literal, literal, one fill, literal + *

+ * Testing merge + */ + @Test + public void testUnion4() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 63; i < 1001; i++) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 64; i < 1002; i++) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 63; i < 1002; i++) { + expected.add(i); + } + + + ConciseSet blah = new ConciseSet(); + for (int i : expected) { + blah.add(i); + } + verifyUnion(expected, sets); + } + + /** + * Set 1: literal + * Set 2: zero fill, zero literal, zero fill with flipped 33 bit, zero fill with flipped 1000000 bit, literal + * Set3: literal, zero fill with flipped 34th bit, literal + *

+ * Testing merge + */ + @Test + public void testUnion5() + { + final int[] ints1 = {1, 2, 3, 4, 5}; + final int[] ints2 = {100000, 2405983, 33}; + final int[] ints3 = {0, 4, 5, 34, 333333}; + final List expected = Arrays.asList(0, 1, 2, 3, 4, 5, 33, 34, 100000, 333333, 2405983); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + ConciseSet set3 = new ConciseSet(); + for (int i : ints3) { + set3.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2), + ImmutableConciseSet.newImmutableFromMutable(set3) + ); + + verifyUnion(expected, sets); + } + + /** + * Set 1: literal + * Set 2: literal + *

+ * Testing merge + */ + @Test + public void testUnion6() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 30; i++) { + if (i != 28) { + set1.add(i); + } + } + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 30; i++) { + if (i != 27) { + set2.add(i); + } + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 30; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: zero literal, literal, one fill with flipped bit + * Set 2: zero literal, one fill with flipped bit + *

+ * Testing merge + */ + @Test + public void testUnion7() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 64; i < 1005; i++) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 63; i < 99; i++) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 63; i < 1005; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: One fill with flipped 27th bit + * Set 2: One fill with flipped 28th bit + *

+ * Testing creation of one fill with no flipped bits + */ + @Test + public void testUnion8() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + if (i != 27) { + set1.add(i); + } + } + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + if (i != 28) { + set2.add(i); + } + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1000; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: Literal and one fill + * Set 2: One fill with flipped 28th bit + *

+ * Testing creation of one fill with correct flipped bit + */ + @Test + public void testUnion9() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + if (!(i == 27 || i == 28)) { + set1.add(i); + } + } + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + if (i != 28) { + set2.add(i); + } + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1000; i++) { + if (i != 28) { + expected.add(i); + } + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: Multiple literals + * Set 2: Multiple literals + *

+ * Testing merge of pure sequences of literals + */ + @Test + public void testUnion10() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i += 2) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 1; i < 1000; i += 2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1000; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: Multiple literals + * Set 2: Zero fill and literal + *

+ * Testing skipping of zero fills + */ + @Test + public void testUnion11() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i += 2) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + set2.add(10000); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1000; i += 2) { + expected.add(i); + } + expected.add(10000); + + verifyUnion(expected, sets); + } + + /** + * Set 1: Literal with 4 bits marked + * Set 2: Zero fill with flipped bit 5 + *

+ * Testing merge of literal and zero fill with flipped bit + */ + @Test + public void testUnion12() + { + final int[] ints1 = {1, 2, 3, 4}; + final int[] ints2 = {5, 1000}; + final List expected = Arrays.asList(1, 2, 3, 4, 5, 1000); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyUnion(expected, sets); + } + + /** + * Set 1: Literal with bit 0 + * Set 2: One fill with flipped bit 0 + *

+ * Testing merge of literal and one fill with flipped bit + */ + @Test + public void testUnion13() + { + List expected = Lists.newArrayList(); + final int[] ints1 = {0}; + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 1; i < 100; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 100; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: Zero fill with flipped bit 0 + * Set 2: One fill with flipped bit 0 + *

+ * Testing merge of flipped bits in zero and one fills + */ + @Test + public void testUnion14() + { + List expected = Lists.newArrayList(); + final int[] ints1 = {0, 100}; + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 1; i < 100; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i <= 100; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: Zero fill with flipped bit 1 + * Set 2: Literal with 0th bit marked + * Set 3: One Fill from 1 to 100 with flipped bit 0 + *

+ * Testing merge of flipped bits in zero and one fills with a literal + */ + @Test + public void testUnion15() + { + List expected = Lists.newArrayList(); + final int[] ints1 = {1, 100}; + final int[] ints2 = {0}; + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + ConciseSet set3 = new ConciseSet(); + for (int i = 1; i < 100; i++) { + set3.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2), + ImmutableConciseSet.newImmutableFromMutable(set3) + ); + + for (int i = 0; i <= 100; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Testing merge of offset elements + */ + @Test + public void testUnion16() + { + final int[] ints1 = {1001, 1002, 1003}; + final int[] ints2 = {1034, 1035, 1036}; + List expected = Arrays.asList(1001, 1002, 1003, 1034, 1035, 1036); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyUnion(expected, sets); + } + + /** + * Testing merge of same elements + */ + @Test + public void testUnion17() + { + final int[] ints1 = {1, 2, 3, 4, 5}; + final int[] ints2 = {1, 2, 3, 4, 5}; + List expected = Arrays.asList(1, 2, 3, 4, 5); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyUnion(expected, sets); + } + + @Test + public void testUnion18() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + set2.add(1000); + set2.add(10000); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1001; i++) { + expected.add(i); + } + expected.add(10000); + + verifyUnion(expected, sets); + } + + /** + * Set 1: one fill, all ones literal + * Set 2: zero fill, one fill, literal + */ + @Test + public void testUnion19() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 93; i++) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i = 62; i < 1000; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 1000; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + /** + * Set 1: literal, one fill, literal + * Set 2: zero fill, literal that falls within the one fill above, one fill that falls in one fill above, one fill + */ + @Test + public void testUnion20() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 5; i++) { + set1.add(i); + } + for (int i = 31; i < 1000; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 62; i < 68; i++) { + set2.add(i); + } + for (int i = 800; i < 1000; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 0; i < 5; i++) { + expected.add(i); + } + for (int i = 31; i < 1000; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + @Test + public void testUnion21() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 32; i < 93; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 62; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + for (int i = 0; i < 93; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + @Test + public void testUnion22() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 93; i < 1000; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 32; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + for (int i = 0; i < 32; i++) { + expected.add(i); + } + for (int i = 93; i < 1000; i++) { + expected.add(i); + } + + verifyUnion(expected, sets); + } + + private void verifyUnion(List expected, List sets) + { + List actual = Lists.newArrayList(); + ImmutableConciseSet set = ImmutableConciseSet.union(sets); + IntSet.IntIterator itr = set.iterator(); + while (itr.hasNext()) { + actual.add(itr.next()); + } + Assert.assertEquals(expected, actual); + } + + /** + * Testing basic intersection of similar sets + */ + @Test + public void testIntersection1() + { + final int[] ints1 = {33, 100000}; + final int[] ints2 = {33, 100000}; + List expected = Arrays.asList(33, 100000); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyIntersection(expected, sets); + } + + /** + * Set1: literal, zero fill with flip bit, literal + * Set2: literal, zero fill with different flip bit, literal + */ + @Test + public void testIntersection2() + { + final int[] ints1 = {33, 100000}; + final int[] ints2 = {34, 100000}; + List expected = Arrays.asList(100000); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyIntersection(expected, sets); + } + + /** + * Testing intersection of one fills + */ + @Test + public void testIntersection3() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + set1.add(i); + set2.add(i); + expected.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyIntersection(expected, sets); + } + + /** + * Similar to previous test with one bit in the sequence set to zero + */ + @Test + public void testIntersection4() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + set1.add(i); + if (i != 500) { + set2.add(i); + expected.add(i); + } + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyIntersection(expected, sets); + } + + /** + * Testing with disjoint sets + */ + @Test + public void testIntersection5() + { + final int[] ints1 = {33, 100000}; + final int[] ints2 = {34, 200000}; + List expected = Lists.newArrayList(); + + ConciseSet set1 = new ConciseSet(); + for (int i : ints1) { + set1.add(i); + } + ConciseSet set2 = new ConciseSet(); + for (int i : ints2) { + set2.add(i); + } + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + verifyIntersection(expected, sets); + } + + /** + * Set 1: literal, zero fill, literal + * Set 2: one fill, literal that falls within the zero fill above, one fill + */ + @Test + public void testIntersection6() + { + List expected = Lists.newArrayList(); + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 5; i++) { + set1.add(i); + } + for (int i = 1000; i < 1005; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 800; i < 805; i++) { + set2.add(i); + } + for (int i = 806; i < 1005; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + for (int i = 1000; i < 1005; i++) { + expected.add(i); + } + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection7() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 3100; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + set2.add(100); + set2.add(500); + for (int i = 600; i < 700; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(100); + expected.add(500); + for (int i = 600; i < 700; i++) { + expected.add(i); + } + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection8() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 3100; i++) { + set1.add(i); + } + set1.add(4001); + + ConciseSet set2 = new ConciseSet(); + set2.add(100); + set2.add(500); + for (int i = 600; i < 700; i++) { + set2.add(i); + } + set2.add(4001); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(100); + expected.add(500); + for (int i = 600; i < 700; i++) { + expected.add(i); + } + expected.add(4001); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection9() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + set1.add(3005); + set1.add(3008); + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 3007; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(2005); + expected.add(3005); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection10() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 3100; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + + set2.add(500); + set2.add(600); + set2.add(4001); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(500); + expected.add(600); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection11() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + for (int i = 2800; i < 3500; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 3007; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(2005); + for (int i = 2800; i < 3007; i++) { + expected.add(i); + } + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection12() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + for (int i = 2800; i < 3500; i++) { + set1.add(i); + } + set1.add(10005); + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 3007; i++) { + set2.add(i); + } + set2.add(10005); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(2005); + for (int i = 2800; i < 3007; i++) { + expected.add(i); + } + expected.add(10005); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection13() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 100; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection14() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + set2.add(0); + set2.add(3); + set2.add(5); + set2.add(100); + set2.add(101); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(0); + expected.add(3); + expected.add(5); + expected.add(100); + expected.add(101); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection15() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 1000; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + set2.add(0); + set2.add(3); + set2.add(5); + for (int i = 100; i < 500; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(0); + expected.add(3); + expected.add(5); + for (int i = 100; i < 500; i++) { + expected.add(i); + } + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection16() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + + ConciseSet set2 = new ConciseSet(); + set2.add(0); + set2.add(3); + set2.add(5); + set2.add(100); + set2.add(101); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection17() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 0; i < 4002; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + set2.add(4001); + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(4001); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection18() + { + ConciseSet set1 = new ConciseSet(); + for (int i = 32; i < 93; i++) { + set1.add(i); + } + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 62; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + for (int i = 32; i < 62; i++) { + expected.add(i); + } + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersection19() + { + ConciseSet set1 = new ConciseSet(); + set1.add(2005); + + ConciseSet set2 = new ConciseSet(); + for (int i = 0; i < 10000; i++) { + set2.add(i); + } + + List sets = Arrays.asList( + ImmutableConciseSet.newImmutableFromMutable(set1), + ImmutableConciseSet.newImmutableFromMutable(set2) + ); + + List expected = Lists.newArrayList(); + expected.add(2005); + + verifyIntersection(expected, sets); + } + + @Test + public void testIntersectionTerminates() throws Exception + { + verifyIntersection(Arrays.asList(), Arrays.asList(new ImmutableConciseSet(), new ImmutableConciseSet())); + } + + private void verifyIntersection(List expected, List sets) + { + List actual = Lists.newArrayList(); + ImmutableConciseSet set = ImmutableConciseSet.intersection(sets); + IntSet.IntIterator itr = set.iterator(); + while (itr.hasNext()) { + actual.add(itr.next()); + } + Assert.assertEquals(expected, actual); + } + + /** + * Basic complement with no length + */ + @Test + public void testComplement1() + { + final int[] ints = {1, 100}; + List expected = Lists.newArrayList(); + + ConciseSet set = new ConciseSet(); + for (int i : ints) { + set.add(i); + } + + for (int i = 0; i <= 100; i++) { + if (i != 1 && i != 100) { + expected.add(i); + } + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, NO_COMPLEMENT_LENGTH); + } + + /** + * Complement of a single partial word + */ + @Test + public void testComplement2() + { + List expected = Lists.newArrayList(); + + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 15; i++) { + set.add(i); + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, NO_COMPLEMENT_LENGTH); + } + + /** + * Complement of a single partial word with a length set in the same word + */ + @Test + public void testComplement3() + { + List expected = Lists.newArrayList(); + final int length = 21; + + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 15; i++) { + set.add(i); + } + for (int i = 15; i < length; i++) { + expected.add(i); + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of a single partial word with a length set in a different word + */ + @Test + public void testComplement4() + { + List expected = Lists.newArrayList(); + final int length = 41; + + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 15; i++) { + set.add(i); + } + for (int i = 15; i < length; i++) { + expected.add(i); + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of a single partial word with a length set to create a one fill + */ + @Test + public void testComplement5() + { + List expected = Lists.newArrayList(); + final int length = 1001; + + ConciseSet set = new ConciseSet(); + for (int i = 0; i < 15; i++) { + set.add(i); + } + for (int i = 15; i < length; i++) { + expected.add(i); + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of words with a length set to create a one fill + */ + @Test + public void testComplement6() + { + List expected = Lists.newArrayList(); + final int length = 1001; + + ConciseSet set = new ConciseSet(); + for (int i = 65; i <= 100; i++) { + set.add(i); + } + for (int i = 0; i < length; i++) { + if (i < 65 || i > 100) { + expected.add(i); + } + } + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of 2 words with a length in the second word + */ + @Test + public void testComplement7() + { + List expected = Lists.newArrayList(); + final int length = 37; + + ConciseSet set = new ConciseSet(); + for (int i = 0; i <= 35; i++) { + set.add(i); + } + expected.add(36); + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of a one literal with a length set to complement the next bit in the next word + */ + @Test + public void testComplement8() + { + List expected = Lists.newArrayList(); + final int length = 32; + + ConciseSet set = new ConciseSet(); + for (int i = 0; i <= 30; i++) { + set.add(i); + } + expected.add(31); + + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement of a null set with a length + */ + @Test + public void testComplement9() + { + final List lengths = new ArrayList(); + lengths.addAll( + Arrays.asList( + 35, + 31, + 32, + 1, + 0, + 31 * 3, + 1024, + ConciseSetUtils.MAX_ALLOWED_INTEGER + ) + ); + final Random random = new Random(701534702L); + for (int i = 0; i < 10; ++i) { + lengths.add(random.nextInt(ConciseSetUtils.MAX_ALLOWED_INTEGER + 1)); + } + final ImmutableConciseSet emptySet = new ImmutableConciseSet(); + for (final int length : lengths) { + final ImmutableConciseSet complement = ImmutableConciseSet.complement(emptySet, length); + final IntSet.IntIterator intIterator = complement.iterator(); + for (int i = 0; i < length; i++) { + final int n = intIterator.next(); + if (i != n) { + Assert.assertEquals(String.format("Failure at bit [%d] on length [%d]", i, length), i, n); + } + } + NoSuchElementException ex = null; + try { + intIterator.next(); + } + catch (NoSuchElementException e) { + ex = e; + } + Assert.assertNotNull(ex); + } + } + + /** + * Complement of a null set to create a one fill + */ + @Test + public void testComplement10() + { + List expected = Lists.newArrayList(); + final int length = 93; + + for (int i = 0; i < length; i++) { + expected.add(i); + } + + ImmutableConciseSet testSet = new ImmutableConciseSet(); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement with correct last index + */ + @Test + public void testComplement11() + { + List expected = Lists.newArrayList(); + int length = 18930; + for (int i = 0; i < 500; i++) { + expected.add(i); + } + for (int i = 18881; i < length; i++) { + expected.add(i); + } + + ConciseSet set = new ConciseSet(); + for (int i = 500; i <= 18880; i++) { + set.add(i); + } + ImmutableConciseSet testSet = ImmutableConciseSet.newImmutableFromMutable(set); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement with empty set and length in first block + */ + @Test + public void testComplement12() + { + List expected = Lists.newArrayList(); + int length = 10; + for (int i = 0; i < 10; i++) { + expected.add(i); + } + + ImmutableConciseSet testSet = new ImmutableConciseSet(); + + verifyComplement(expected, testSet, length); + } + + /** + * Complement with empty list of some length + */ + @Test + public void testComplement13() + { + List expected = Lists.newArrayList(); + int length = 10; + for (int i = 0; i < length; i++) { + expected.add(i); + } + ImmutableConciseSet testSet = new ImmutableConciseSet(); + + verifyComplement(expected, testSet, length); + } + + private void verifyComplement(List expected, ImmutableConciseSet set, int endIndex) + { + List actual = Lists.newArrayList(); + + ImmutableConciseSet res; + if (endIndex == NO_COMPLEMENT_LENGTH) { + res = ImmutableConciseSet.complement(set); + } else { + res = ImmutableConciseSet.complement(set, endIndex); + } + + IntSet.IntIterator itr = res.iterator(); + while (itr.hasNext()) { + actual.add(itr.next()); + } + Assert.assertEquals(expected, actual); + } + + @Test + public void testContains() + { + final ConciseSet conciseSet = new ConciseSet(); + final Random random = new Random(543167436715430L); + final Set integerSet = new HashSet<>(); + int max = -1; + for (int i = 0; i < 100; ++i) { + final int j = random.nextInt(1 << 20); + integerSet.add(j); + conciseSet.add(j); + if (j > max) { + max = j; + } + } + final ImmutableConciseSet immutableConciseSet = ImmutableConciseSet.newImmutableFromMutable(conciseSet); + for (int i = 0; i < max + 10; ++i) { + final String s = Integer.toString(i); + Assert.assertEquals(s, integerSet.contains(i), conciseSet.contains(i)); + Assert.assertEquals(s, integerSet.contains(i), immutableConciseSet.contains(i)); + } + } +} diff --git a/pom.xml b/pom.xml index 65e56988b50..40ce8549925 100644 --- a/pom.xml +++ b/pom.xml @@ -91,6 +91,7 @@ aws-common java-util bytebuffer-collections + extendedset extensions-core/avro-extensions extensions-core/datasketches diff --git a/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java b/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java index c5ef97f594d..7cdcf6937e3 100644 --- a/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java +++ b/processing/src/main/java/io/druid/segment/data/ConciseBitmapSerdeFactory.java @@ -27,7 +27,7 @@ import io.druid.collections.bitmap.BitmapFactory; import io.druid.collections.bitmap.ConciseBitmapFactory; import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.collections.bitmap.WrappedImmutableConciseBitmap; -import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; +import io.druid.extendedset.intset.ImmutableConciseSet; /** */