Migrating extendedset from Metamarkets. (#3694)

* Migrating extendedset from Metamarkets.

* Notice change

* More details in NOTICE

* NOTICE formatting.

* suppress header checkstlye for extendedset.
This commit is contained in:
Akash Dwivedi 2017-01-17 10:10:27 -08:00 committed by Charles Allen
parent b0232b4e40
commit dd0c4e2ead
46 changed files with 28683 additions and 19 deletions

19
NOTICE
View File

@ -50,3 +50,22 @@ This product contains SQL query planning code adapted from Apache Calcite
* https://github.com/apache/calcite/blob/master/LICENSE (Apache License, Version 2.0) * https://github.com/apache/calcite/blob/master/LICENSE (Apache License, Version 2.0)
* HOMEPAGE: * HOMEPAGE:
* https://calcite.apache.org/ * https://calcite.apache.org/
This product contains a modified version of Metamarkets extendedset library
* LICENSE:
* https://github.com/metamx/extendedset/blob/master/LICENSE (Apache License, Version 2.0)
* HOMEPAGE:
* https://github.com/metamx/extendedset
* COMMIT TAG:
* https://github.com/metamx/extendedset/commit/c9d647d
This product contains a modified version of Alessandro Colantonio's CONCISE
(COmpressed 'N' Composable Integer SEt) library, extending the functionality of
ConciseSet to use IntBuffers.
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
* LICENSE:
* Apache License, Version 2.0
* HOMEPAGE:
* https://sourceforge.net/projects/concise/

View File

@ -55,7 +55,7 @@ import io.druid.segment.data.Indexed;
import io.druid.segment.data.RoaringBitmapSerdeFactory; import io.druid.segment.data.RoaringBitmapSerdeFactory;
import io.druid.segment.filter.BoundFilter; import io.druid.segment.filter.BoundFilter;
import io.druid.segment.serde.BitmapIndexColumnPartSupplier; import io.druid.segment.serde.BitmapIndexColumnPartSupplier;
import it.uniroma3.mat.extendedset.intset.ConciseSetUtils; import io.druid.extendedset.intset.ConciseSetUtils;
@State(Scope.Benchmark) @State(Scope.Benchmark)
@Fork(value = 1) @Fork(value = 1)

View File

@ -31,7 +31,7 @@ import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.infra.Blackhole;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; import io.druid.extendedset.intset.ImmutableConciseSet;
@State(Scope.Benchmark) @State(Scope.Benchmark)
public class ConciseComplementBenchmark public class ConciseComplementBenchmark

View File

@ -34,9 +34,9 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>com.metamx</groupId> <groupId>io.druid</groupId>
<artifactId>extendedset</artifactId> <artifactId>extendedset</artifactId>
<version>1.3.10</version> <version>${project.parent.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.google.guava</groupId> <groupId>com.google.guava</groupId>

View File

@ -22,7 +22,7 @@ package io.druid.collections.bitmap;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.Iterator; import java.util.Iterator;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; import io.druid.extendedset.intset.ImmutableConciseSet;
/** /**
* As the name suggests, this class instantiates bitmaps of the types * As the name suggests, this class instantiates bitmaps of the types

View File

@ -25,9 +25,9 @@ import org.roaringbitmap.IntIterator;
import com.google.common.primitives.Ints; import com.google.common.primitives.Ints;
import it.uniroma3.mat.extendedset.intset.ConciseSet; import io.druid.extendedset.intset.ConciseSet;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; import io.druid.extendedset.intset.ImmutableConciseSet;
import it.uniroma3.mat.extendedset.intset.IntSet; import io.druid.extendedset.intset.IntSet;
public class WrappedConciseBitmap implements MutableBitmap public class WrappedConciseBitmap implements MutableBitmap
{ {

View File

@ -21,7 +21,7 @@ package io.druid.collections.bitmap;
import org.roaringbitmap.IntIterator; import org.roaringbitmap.IntIterator;
import it.uniroma3.mat.extendedset.intset.IntSet; import io.druid.extendedset.intset.IntSet;
/** /**
*/ */

View File

@ -24,8 +24,8 @@ import java.nio.ByteBuffer;
import org.roaringbitmap.IntIterator; import org.roaringbitmap.IntIterator;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; import io.druid.extendedset.intset.ImmutableConciseSet;
import it.uniroma3.mat.extendedset.intset.IntSet; import io.druid.extendedset.intset.IntSet;
public class WrappedImmutableConciseBitmap implements ImmutableBitmap public class WrappedImmutableConciseBitmap implements ImmutableBitmap
{ {

View File

@ -38,7 +38,7 @@ import com.carrotsearch.junitbenchmarks.BenchmarkRule;
import com.carrotsearch.junitbenchmarks.Clock; import com.carrotsearch.junitbenchmarks.Clock;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; import io.druid.extendedset.intset.ImmutableConciseSet;
@BenchmarkOptions(clock = Clock.NANO_TIME, benchmarkRounds = 50) @BenchmarkOptions(clock = Clock.NANO_TIME, benchmarkRounds = 50)

View File

@ -29,8 +29,8 @@ import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import it.uniroma3.mat.extendedset.intset.ConciseSet; import io.druid.extendedset.intset.ConciseSet;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; import io.druid.extendedset.intset.ImmutableConciseSet;
import junit.framework.Assert; import junit.framework.Assert;
public class ConciseBitmapFactoryTest public class ConciseBitmapFactoryTest

View File

@ -29,8 +29,8 @@ import com.carrotsearch.junitbenchmarks.annotation.BenchmarkHistoryChart;
import com.carrotsearch.junitbenchmarks.annotation.LabelType; import com.carrotsearch.junitbenchmarks.annotation.LabelType;
import io.druid.test.annotation.Benchmark; import io.druid.test.annotation.Benchmark;
import it.uniroma3.mat.extendedset.intset.ConciseSet; import io.druid.extendedset.intset.ConciseSet;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; import io.druid.extendedset.intset.ImmutableConciseSet;
@Category({Benchmark.class}) @Category({Benchmark.class})
@BenchmarkHistoryChart(labelWith = LabelType.CUSTOM_KEY, maxRuns = 20) @BenchmarkHistoryChart(labelWith = LabelType.CUSTOM_KEY, maxRuns = 20)

View File

@ -29,8 +29,8 @@ import com.carrotsearch.junitbenchmarks.annotation.BenchmarkHistoryChart;
import com.carrotsearch.junitbenchmarks.annotation.LabelType; import com.carrotsearch.junitbenchmarks.annotation.LabelType;
import io.druid.test.annotation.Benchmark; import io.druid.test.annotation.Benchmark;
import it.uniroma3.mat.extendedset.intset.ConciseSet; import io.druid.extendedset.intset.ConciseSet;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; import io.druid.extendedset.intset.ImmutableConciseSet;
@Category({Benchmark.class}) @Category({Benchmark.class})
@BenchmarkHistoryChart(labelWith = LabelType.CUSTOM_KEY, maxRuns = 20) @BenchmarkHistoryChart(labelWith = LabelType.CUSTOM_KEY, maxRuns = 20)

View File

@ -33,4 +33,7 @@
<suppress checks="AvoidStaticImport" files="[\\/]src[\\/]test[\\/]" /> <suppress checks="AvoidStaticImport" files="[\\/]src[\\/]test[\\/]" />
<suppress checks="Header" files="[\\/]target[\\/]generated-test-sources[\\/]" /> <suppress checks="Header" files="[\\/]target[\\/]generated-test-sources[\\/]" />
<!-- extendedset is a fork of Alessandro Colantonio's CONCISE (COmpressed 'N' Composable Integer SEt) repository and licensed to Metamarkets under a CLA is not true. -->
<suppress checks="Header" files="[\\/]extendedset[\\/]" />
</suppressions> </suppressions>

54
extendedset/pom.xml Executable file
View File

@ -0,0 +1,54 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to Metamarkets Group Inc. (Metamarkets) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. Metamarkets licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>extendedset</artifactId>
<name>extendedset</name>
<description>
Implementation of CONCISE (COmpressed 'N" Composable Integer SEt) bit map compression algorithm by Alessandro
Colantonio with some enhanced features - http://ricerca.mat.uniroma3.it/users/colanton/docs/concise.pdf
</description>
<parent>
<groupId>io.druid</groupId>
<artifactId>druid</artifactId>
<version>0.9.3-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>16.0.1</version>
</dependency>
<!-- Tests -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,592 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset;
import io.druid.extendedset.intset.ArraySet;
import io.druid.extendedset.intset.IntSet;
import io.druid.extendedset.wrappers.GenericExtendedSet;
import io.druid.extendedset.wrappers.IndexedSet;
import io.druid.extendedset.wrappers.IntegerSet;
import io.druid.extendedset.wrappers.LongSet;
import io.druid.extendedset.wrappers.matrix.PairSet;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.SortedSet;
/**
* An interface which extends {@link SortedSet} by adding
* intersection/union/difference and other set operations.
*
* @param <T> the type of elements maintained by this set
*
* @author Alessandro Colantonio
* @version $Id: ExtendedSet.java 140 2011-02-07 21:30:29Z cocciasik $
* @see AbstractExtendedSet
* @see IndexedSet
* @see GenericExtendedSet
* @see ArraySet
* @see IntegerSet
* @see LongSet
* @see PairSet
*/
public interface ExtendedSet<T> extends SortedSet<T>, Cloneable, Comparable<ExtendedSet<T>>
{
/**
* Generates the intersection set
*
* @param other {@link ExtendedSet} instance that represents the right
* operand
*
* @return the result of the operation
*
* @see #retainAll(java.util.Collection)
*/
public ExtendedSet<T> intersection(Collection<? extends T> other);
/**
* Generates the union set
*
* @param other {@link ExtendedSet} instance that represents the right
* operand
*
* @return the result of the operation
*
* @see #addAll(java.util.Collection)
*/
public ExtendedSet<T> union(Collection<? extends T> other);
/**
* Generates the difference set
*
* @param other {@link ExtendedSet} instance that represents the right
* operand
*
* @return the result of the operation
*
* @see #removeAll(java.util.Collection)
*/
public ExtendedSet<T> difference(Collection<? extends T> other);
/**
* Generates the symmetric difference set
*
* @param other {@link ExtendedSet} instance that represents the right
* operand
*
* @return the result of the operation
*
* @see #flip(Object)
*/
public ExtendedSet<T> symmetricDifference(Collection<? extends T> other);
/**
* Generates the complement set. The returned set is represented by all the
* elements strictly less than {@link #last()} that do not exist in the
* current set.
*
* @return the complement set
*
* @see ExtendedSet#complement()
*/
public ExtendedSet<T> complemented();
/**
* Complements the current set. The modified set is represented by all the
* elements strictly less than {@link #last()} that do not exist in the
* current set.
*
* @see ExtendedSet#complemented()
*/
public void complement();
/**
* Returns <code>true</code> if the specified {@link Collection} instance
* contains any elements that are also contained within this
* {@link ExtendedSet} instance
*
* @param other {@link ExtendedSet} to intersect with
*
* @return a boolean indicating whether this {@link ExtendedSet} intersects
* the specified {@link ExtendedSet}.
*/
public boolean containsAny(Collection<? extends T> other);
/**
* Returns <code>true</code> if the specified {@link Collection} instance
* contains at least <code>minElements</code> elements that are also
* contained within this {@link ExtendedSet} instance
*
* @param other {@link Collection} instance to intersect with
* @param minElements minimum number of elements to be contained within this
* {@link ExtendedSet} instance
*
* @return a boolean indicating whether this {@link ExtendedSet} intersects
* the specified {@link Collection}.
*
* @throws IllegalArgumentException if <code>minElements &lt; 1</code>
*/
public boolean containsAtLeast(Collection<? extends T> other, int minElements);
/**
* Computes the intersection set size.
* <p>
* This is faster than calling {@link #intersection(Collection)} and
* then {@link #size()}
*
* @param other {@link Collection} instance that represents the right
* operand
*
* @return the size
*/
public int intersectionSize(Collection<? extends T> other);
/**
* Computes the union set size.
* <p>
* This is faster than calling {@link #union(Collection)} and then
* {@link #size()}
*
* @param other {@link Collection} instance that represents the right
* operand
*
* @return the size
*/
public int unionSize(Collection<? extends T> other);
/**
* Computes the symmetric difference set size.
* <p>
* This is faster than calling
* {@link #symmetricDifference(Collection)} and then {@link #size()}
*
* @param other {@link Collection} instance that represents the right
* operand
*
* @return the size
*/
public int symmetricDifferenceSize(Collection<? extends T> other);
/**
* Computes the difference set size.
* <p>
* This is faster than calling {@link #difference(Collection)} and
* then {@link #size()}
*
* @param other {@link Collection} instance that represents the right
* operand
*
* @return the size
*/
public int differenceSize(Collection<? extends T> other);
/**
* Computes the complement set size.
* <p>
* This is faster than calling {@link #complemented()} and then
* {@link #size()}
*
* @return the size
*/
public int complementSize();
/**
* Generates an empty set
*
* @return the empty set
*/
public ExtendedSet<T> empty();
/**
* See the <code>clone()</code> of {@link Object}
*
* @return cloned object
*/
public ExtendedSet<T> clone();
/**
* Computes the compression factor of the equivalent bitmap representation
* (1 means not compressed, namely a memory footprint similar to
* {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.)
*
* @return the compression factor
*/
public double bitmapCompressionRatio();
/**
* Computes the compression factor of the equivalent integer collection (1
* means not compressed, namely a memory footprint similar to
* {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.)
*
* @return the compression factor
*/
public double collectionCompressionRatio();
/**
* {@inheritDoc}
*/
@Override
public ExtendedIterator<T> iterator();
/**
* Gets the descending order iterator over the elements of type
* <code>T</code>
*
* @return descending iterator
*/
public ExtendedIterator<T> descendingIterator();
/**
* Allows to use the Java "for-each" statement in descending order
*
* @return {@link Iterable} instance to iterate items in descending
* order
*/
public Iterable<T> descending();
/**
* Computes the power-set of the current set.
* <p>
* It is a particular implementation of the algorithm <i>Apriori</i> (see:
* Rakesh Agrawal, Ramakrishnan Srikant, <i>Fast Algorithms for Mining
* Association Rules in Large Databases</i>, in Proceedings of the
* 20<sup>th</sup> International Conference on Very Large Data Bases,
* p.487-499, 1994). The returned power-set does <i>not</i> contain the
* empty set.
* <p>
* The subsets composing the powerset are returned in a list that is sorted
* according to the lexicographical order provided by the sorted set.
*
* @return the power-set
*
* @see #powerSet(int, int)
* @see #powerSetSize()
*/
public List<? extends ExtendedSet<T>> powerSet();
/**
* Computes a subset of the power-set of the current set, composed by those
* subsets that have cardinality between <code>min</code> and
* <code>max</code>.
* <p>
* It is a particular implementation of the algorithm <i>Apriori</i> (see:
* Rakesh Agrawal, Ramakrishnan Srikant, <i>Fast Algorithms for Mining
* Association Rules in Large Databases</i>, in Proceedings of the
* 20<sup>th</sup> International Conference on Very Large Data Bases,
* p.487-499, 1994). The power-set does <i>not</i> contains the empty set.
* <p>
* The subsets composing the powerset are returned in a list that is sorted
* according to the lexicographical order provided by the sorted set.
*
* @param min minimum subset size (greater than zero)
* @param max maximum subset size
*
* @return the power-set
*
* @see #powerSet()
* @see #powerSetSize(int, int)
*/
public List<? extends ExtendedSet<T>> powerSet(int min, int max);
/**
* Computes the power-set size of the current set.
* <p>
* The power-set does <i>not</i> contains the empty set.
*
* @return the power-set size
*
* @see #powerSet()
*/
public int powerSetSize();
/**
* Computes the power-set size of the current set, composed by those subsets
* that have cardinality between <code>min</code> and <code>max</code>.
* <p>
* The returned power-set does <i>not</i> contain the empty set.
*
* @param min minimum subset size (greater than zero)
* @param max maximum subset size
*
* @return the power-set size
*
* @see #powerSet(int, int)
*/
public int powerSetSize(int min, int max);
/**
* Prints debug info about the given {@link ExtendedSet} implementation
*
* @return a string that describes the internal representation of the
* instance
*/
public String debugInfo();
/**
* Adds to the set all the elements between <code>first</code> and
* <code>last</code>, both included. It supposes that there is an ordering
* of the elements of type <code>T</code> and that the universe of all
* possible elements is known.
*
* @param from first element
* @param to last element
*/
public void fill(T from, T to);
/**
* Removes from the set all the elements between <code>first</code> and
* <code>last</code>, both included. It supposes that there is an ordering
* of the elements of type <code>T</code> and that the universe of all
* possible elements is known.
*
* @param from first element
* @param to last element
*/
public void clear(T from, T to);
/**
* Adds the element if it not existing, or removes it if existing
*
* @param e element to flip
*
* @see #symmetricDifference(Collection)
*/
public void flip(T e);
/**
* Gets the read-only version of the current set
*
* @return the read-only version of the current set
*/
public ExtendedSet<T> unmodifiable();
/**
* Gets the <code>i</code><sup>th</sup> element of the set
*
* @param i position of the element in the sorted set
*
* @return the <code>i</code><sup>th</sup> element of the set
*
* @throws IndexOutOfBoundsException if <code>i</code> is less than zero, or greater or equal to
* {@link #size()}
*/
public T get(int i);
/**
* Provides position of element within the set.
* <p>
* It returns -1 if the element does not exist within the set.
*
* @param e element of the set
*
* @return the element position
*/
public int indexOf(T e);
/**
* {@inheritDoc}
*/
@Override
public ExtendedSet<T> tailSet(T fromElement);
/**
* {@inheritDoc}
*/
@Override
public ExtendedSet<T> headSet(T toElement);
/**
* {@inheritDoc}
*/
@Override
public ExtendedSet<T> subSet(T fromElement, T toElement);
/**
* Converts a given {@link Collection} instance into an instance of the
* current class. <b>NOTE:</b> when the collection is already an instance of
* the current class, the method returns the collection itself.
*
* @param c collection to use to generate the new instance
*
* @return the converted collection
*
* @see #convert(Object...)
*/
public ExtendedSet<T> convert(Collection<?> c);
/**
* Converts a given integer array into an instance of the current class
*
* @param e objects to use to generate the new instance
*
* @return the converted collection
*
* @see #convert(Collection)
*/
public ExtendedSet<T> convert(Object... e);
/**
* Computes the Jaccard similarity coefficient between this set and the
* given set.
* <p>
* The coefficient is defined as
* <code>|A intersection B| / |A union B|</code>.
*
* @param other the other set
*
* @return the Jaccard similarity coefficient
*
* @see #jaccardDistance(ExtendedSet)
*/
public double jaccardSimilarity(ExtendedSet<T> other);
/**
* Computes the Jaccard distance between this set and the given set.
* <p>
* The coefficient is defined as
* <code>1 - </code> {@link #jaccardSimilarity(ExtendedSet)}.
*
* @param other the other set
*
* @return the Jaccard distance
*
* @see #jaccardSimilarity(ExtendedSet)
*/
public double jaccardDistance(ExtendedSet<T> other);
/**
* Computes the weighted version of the Jaccard similarity coefficient
* between this set and the given set.
* <p>
* The coefficient is defined as
* <code>sum of min(A_i, B_i) / sum of max(A_i, B_i)</code>.
* <p>
* <b>NOTE:</b> <code>T</code> must be a number, namely one of
* {@link Integer}, {@link Double}, {@link Float}, {@link Byte},
* {@link Long}, {@link Short}.
*
* @param other the other set
*
* @return the weighted Jaccard similarity coefficient
*
* @throws IllegalArgumentException if <code>T</code> is not a number
* @see #weightedJaccardDistance(ExtendedSet)
*/
public double weightedJaccardSimilarity(ExtendedSet<T> other);
/**
* Computes the weighted version of the Jaccard distance between this set
* and the given set.
* <p>
* The coefficient is defined as <code>1 - </code>
* {@link #weightedJaccardSimilarity(ExtendedSet)}.
* <p>
* <b>NOTE:</b> <code>T</code> must be a number, namely one of
* {@link Integer}, {@link Double}, {@link Float}, {@link Byte},
* {@link Long}, {@link Short}.
*
* @param other the other set
*
* @return the weighted Jaccard distance
*
* @throws IllegalArgumentException if <code>T</code> is not a number
* @see #weightedJaccardSimilarity(ExtendedSet)
*/
public double weightedJaccardDistance(ExtendedSet<T> other);
/**
* Compares this object with the specified object for order. Returns a
* negative integer, zero, or a positive integer as this object is less
* than, equal to, or greater than the specified object. An {@link IntSet}
* instance <code>A</code> is less than another {@link IntSet} instance
* <code>B</code> if <code>B-A</code> (that is, the elements in
* <code>B</code> that are not contained in <code>A</code>) contains at
* least one element that is greater than all the elements in
* <code>A-B</code>.
* <p>
* <p>
* The implementor must ensure <tt>sgn(x.compareTo(y)) ==
* -sgn(y.compareTo(x))</tt> for all <tt>x</tt> and <tt>y</tt>. (This
* implies that <tt>x.compareTo(y)</tt> must throw an exception iff
* <tt>y.compareTo(x)</tt> throws an exception.)
* <p>
* <p>
* The implementor must also ensure that the relation is transitive:
* <tt>(x.compareTo(y)&gt;0 &amp;&amp; y.compareTo(z)&gt;0)</tt> implies
* <tt>x.compareTo(z)&gt;0</tt>.
* <p>
* <p>
* Finally, the implementor must ensure that <tt>x.compareTo(y)==0</tt>
* implies that <tt>sgn(x.compareTo(z)) == sgn(y.compareTo(z))</tt>, for all
* <tt>z</tt>.
* <p>
* <p>
* It is strongly recommended, but <i>not</i> strictly required that
* <tt>(x.compareTo(y)==0) == (x.equals(y))</tt>. Generally speaking, any
* class that implements the <tt>Comparable</tt> interface and violates this
* condition should clearly indicate this fact. The recommended language is
* "Note: this class has a natural ordering that is inconsistent with
* equals."
* <p>
* <p>
* In the foregoing description, the notation <tt>sgn(</tt><i>expression</i>
* <tt>)</tt> designates the mathematical <i>signum</i> function, which is
* defined to return one of <tt>-1</tt>, <tt>0</tt>, or <tt>1</tt> according
* to whether the value of <i>expression</i> is negative, zero or positive.
*
* @param o the object to be compared.
*
* @return a negative integer, zero, or a positive integer as this object is
* less than, equal to, or greater than the specified object.
*
* @throws ClassCastException if the specified object's type prevents it from being
* compared to this object.
*/
@Override
public int compareTo(ExtendedSet<T> o);
/**
* Extended version of the {@link Iterator} interface that allows to "skip"
* some elements of the set
*
* @param <X> the type of elements maintained by this set
*/
public interface ExtendedIterator<X> extends Iterator<X>
{
/**
* Skips all the elements before the the specified element, so that
* {@link Iterator#next()} gives the given element or, if it does not
* exist, the element immediately after according to the sorting
* provided by this {@link SortedSet} instance.
* <p>
* If <code>element</code> is less than the next element, it does
* nothing
*
* @param element first element to not skip
*/
public void skipAllBefore(X element);
}
}

View File

@ -0,0 +1,744 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset.intset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.NoSuchElementException;
/**
* This class provides a skeletal implementation of the {@link IntSet}
* interface to minimize the effort required to implement this interface.
*
* @author Alessandro Colantonio
* @version $Id: AbstractIntSet.java 156 2011-09-01 00:13:57Z cocciasik $
*/
public abstract class AbstractIntSet implements IntSet
{
/**
* {@inheritDoc}
*/
@Override
public IntSet union(IntSet other)
{
IntSet res = clone();
res.addAll(other);
return res;
}
/**
* {@inheritDoc}
*/
@Override
public IntSet difference(IntSet other)
{
IntSet res = clone();
res.removeAll(other);
return res;
}
/**
* {@inheritDoc}
*/
@Override
public IntSet intersection(IntSet other)
{
IntSet res = clone();
res.retainAll(other);
return res;
}
/**
* {@inheritDoc}
*/
@Override
public IntSet symmetricDifference(IntSet c)
{
IntSet res = clone();
IntIterator itr = c.iterator();
while (itr.hasNext()) {
res.flip(itr.next());
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public IntSet complemented()
{
IntSet res = clone();
res.complement();
return res;
}
/**
* {@inheritDoc}
*/
@Override
public void complement()
{
if (isEmpty()) {
return;
}
for (int e = last(); e >= 0; e--) {
flip(e);
}
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAll(IntSet c)
{
IntIterator itr = c.iterator();
boolean res = true;
while (res && itr.hasNext()) {
res &= contains(itr.next());
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAny(IntSet c)
{
IntIterator itr = c.iterator();
boolean res = true;
while (res && itr.hasNext()) {
if (contains(itr.next())) {
return true;
}
}
return false;
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAtLeast(IntSet c, int minElements)
{
IntIterator itr = c.iterator();
while (minElements > 0 && itr.hasNext()) {
if (contains(itr.next())) {
minElements--;
}
}
return minElements == 0;
}
/**
* {@inheritDoc}
*/
@Override
public int intersectionSize(IntSet c)
{
int res = 0;
IntIterator itr = c.iterator();
while (itr.hasNext()) {
if (contains(itr.next())) {
res++;
}
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public int unionSize(IntSet other)
{
return other == null ? size() : size() + other.size() - intersectionSize(other);
}
/**
* {@inheritDoc}
*/
@Override
public int symmetricDifferenceSize(IntSet other)
{
return other == null ? size() : size() + other.size() - 2 * intersectionSize(other);
}
/**
* {@inheritDoc}
*/
@Override
public int differenceSize(IntSet other)
{
return other == null ? size() : size() - intersectionSize(other);
}
/**
* {@inheritDoc}
*/
@Override
public int complementSize()
{
if (isEmpty()) {
return 0;
}
return last() - size() + 1;
}
/**
* {@inheritDoc}
*/
@Override
public abstract IntSet empty();
/**
* {@inheritDoc}
*/
@Override
public abstract IntSet clone();
/**
* {@inheritDoc}
*/
@Override
public abstract double bitmapCompressionRatio();
/**
* {@inheritDoc}
*/
@Override
public abstract double collectionCompressionRatio();
/**
* {@inheritDoc}
*/
@Override
public abstract IntIterator iterator();
/**
* {@inheritDoc}
*/
@Override
public abstract IntIterator descendingIterator();
/**
* {@inheritDoc}
*/
@Override
public abstract String debugInfo();
/**
* {@inheritDoc}
*/
@Override
public void clear()
{
IntIterator itr = iterator();
while (itr.hasNext()) {
itr.next();
itr.remove();
}
}
/**
* {@inheritDoc}
*/
@Override
public void clear(int from, int to)
{
if (from > to) {
throw new IndexOutOfBoundsException("from: " + from + " > to: " + to);
}
for (int e = from; e <= to; e++) {
remove(e);
}
}
/**
* {@inheritDoc}
*/
@Override
public void fill(int from, int to)
{
if (from > to) {
throw new IndexOutOfBoundsException("from: " + from + " > to: " + to);
}
for (int e = from; e <= to; e++) {
add(e);
}
}
/**
* {@inheritDoc}
*/
@Override
public void flip(int e)
{
if (!add(e)) {
remove(e);
}
}
/**
* {@inheritDoc}
*/
@Override
public abstract int get(int i);
/**
* {@inheritDoc}
*/
@Override
public abstract int indexOf(int e);
/**
* {@inheritDoc}
*/
@Override
public abstract IntSet convert(int... a);
/**
* {@inheritDoc}
*/
@Override
public abstract IntSet convert(Collection<Integer> c);
/**
* {@inheritDoc}
*/
@Override
public int first()
{
if (isEmpty()) {
throw new NoSuchElementException();
}
return iterator().next();
}
/**
* {@inheritDoc}
*/
@Override
public abstract int last();
/**
* {@inheritDoc}
*/
@Override
public abstract int size();
/**
* {@inheritDoc}
*/
@Override
public abstract boolean isEmpty();
/**
* {@inheritDoc}
*/
@Override
public abstract boolean contains(int i);
/**
* {@inheritDoc}
*/
@Override
public abstract boolean add(int i);
/**
* {@inheritDoc}
*/
@Override
public abstract boolean remove(int i);
/**
* {@inheritDoc}
*/
@Override
public boolean addAll(IntSet c)
{
if (c == null || c.isEmpty()) {
return false;
}
IntIterator itr = c.iterator();
boolean res = false;
while (itr.hasNext()) {
res |= add(itr.next());
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public boolean removeAll(IntSet c)
{
if (c == null || c.isEmpty()) {
return false;
}
IntIterator itr = c.iterator();
boolean res = false;
while (itr.hasNext()) {
res |= remove(itr.next());
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public boolean retainAll(IntSet c)
{
if (c == null || c.isEmpty()) {
return false;
}
IntIterator itr = iterator();
boolean res = false;
while (itr.hasNext()) {
int e = itr.next();
if (!c.contains(e)) {
res = true;
itr.remove();
}
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public int[] toArray()
{
if (isEmpty()) {
return null;
}
return toArray(new int[size()]);
}
/**
* {@inheritDoc}
*/
@Override
public int[] toArray(int[] a)
{
if (a.length < size()) {
a = new int[size()];
}
IntIterator itr = iterator();
int i = 0;
while (itr.hasNext()) {
a[i++] = itr.next();
}
for (; i < a.length; i++) {
a[i] = 0;
}
return a;
}
/**
* {@inheritDoc}
*/
@Override
public String toString()
{
IntIterator itr = iterator();
if (!itr.hasNext()) {
return "[]";
}
StringBuilder sb = new StringBuilder();
sb.append('[');
for (; ; ) {
int e = itr.next();
sb.append(e);
if (!itr.hasNext()) {
return sb.append(']').toString();
}
sb.append(", ");
}
}
/**
* {@inheritDoc}
*/
@Override
public int compareTo(IntSet o)
{
IntIterator thisIterator = this.descendingIterator();
IntIterator otherIterator = o.descendingIterator();
while (thisIterator.hasNext() && otherIterator.hasNext()) {
int thisItem = thisIterator.next();
int otherItem = otherIterator.next();
if (thisItem < otherItem) {
return -1;
}
if (thisItem > otherItem) {
return 1;
}
}
return thisIterator.hasNext() ? 1 : (otherIterator.hasNext() ? -1 : 0);
}
/**
* {@inheritDoc}
*/
@Override
public List<? extends IntSet> powerSet()
{
return powerSet(1, Integer.MAX_VALUE);
}
/**
* {@inheritDoc}
*/
@Override
public List<? extends IntSet> powerSet(int min, int max)
{
if (min < 1 || max < min) {
throw new IllegalArgumentException();
}
// special cases
List<IntSet> res = new ArrayList<IntSet>();
if (size() < min) {
return res;
}
if (size() == min) {
res.add(clone());
return res;
}
if (size() == min + 1) {
IntIterator itr = descendingIterator();
while (itr.hasNext()) {
IntSet set = clone();
set.remove(itr.next());
res.add(set);
}
if (max > min) {
res.add(clone());
}
return res;
}
// the first level contains only one prefix made up of all 1-subsets
List<List<IntSet>> level = new ArrayList<List<IntSet>>();
level.add(new ArrayList<IntSet>());
IntIterator itr = iterator();
while (itr.hasNext()) {
IntSet single = empty();
single.add(itr.next());
level.get(0).add(single);
}
if (min == 1) {
res.addAll(level.get(0));
}
// all combinations
int lvl = 2;
while (!level.isEmpty() && lvl <= max) {
List<List<IntSet>> newLevel = new ArrayList<List<IntSet>>();
for (List<IntSet> prefix : level) {
for (int i = 0; i < prefix.size() - 1; i++) {
List<IntSet> newPrefix = new ArrayList<IntSet>();
for (int j = i + 1; j < prefix.size(); j++) {
IntSet x = prefix.get(i).clone();
x.add(prefix.get(j).last());
newPrefix.add(x);
if (lvl >= min) {
res.add(x);
}
}
if (newPrefix.size() > 1) {
newLevel.add(newPrefix);
}
}
}
level = newLevel;
lvl++;
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public int powerSetSize()
{
return isEmpty() ? 0 : (int) Math.pow(2, size()) - 1;
}
/**
* {@inheritDoc}
*/
@Override
public int powerSetSize(int min, int max)
{
if (min < 1 || max < min) {
throw new IllegalArgumentException();
}
final int size = size();
// special cases
if (size < min) {
return 0;
}
if (size == min) {
return 1;
}
/*
* Compute the sum of binomial coefficients ranging from (size choose
* max) to (size choose min) using dynamic programming
*/
// trivial cases
max = Math.min(size, max);
if (max == min && (max == 0 || max == size)) {
return 1;
}
// compute all binomial coefficients for "n"
int[] b = new int[size + 1];
for (int i = 0; i <= size; i++) {
b[i] = 1;
}
for (int i = 1; i <= size; i++) {
for (int j = i - 1; j > 0; j--) {
b[j] += b[j - 1];
}
}
// sum binomial coefficients
int res = 0;
for (int i = min; i <= max; i++) {
res += b[i];
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public double jaccardSimilarity(IntSet other)
{
if (isEmpty() && other.isEmpty()) {
return 1D;
}
int inters = intersectionSize(other);
return (double) inters / (size() + other.size() - inters);
}
/**
* {@inheritDoc}
*/
@Override
public double jaccardDistance(IntSet other)
{
return 1D - jaccardSimilarity(other);
}
/**
* {@inheritDoc}
*/
@Override
public double weightedJaccardSimilarity(IntSet other)
{
if (isEmpty() && other.isEmpty()) {
return 1D;
}
IntIterator itr = intersection(other).iterator();
double intersectionSum = 0D;
while (itr.hasNext()) {
intersectionSum += itr.next();
}
itr = symmetricDifference(other).iterator();
double symmetricDifferenceSum = 0D;
while (itr.hasNext()) {
symmetricDifferenceSum += itr.next();
}
return intersectionSum / (intersectionSum + symmetricDifferenceSum);
}
/**
* {@inheritDoc}
*/
@Override
public double weightedJaccardDistance(IntSet other)
{
return 1D - weightedJaccardSimilarity(other);
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj)
{
// special cases
if (this == obj) {
return true;
}
if (!(obj instanceof IntSet)) {
return false;
}
if (size() != ((IntSet) obj).size()) {
return false;
}
// compare all the integrals, according to their natural order
IntIterator itr1 = iterator();
IntIterator itr2 = ((IntSet) obj).iterator();
while (itr1.hasNext()) {
if (itr1.next() != itr2.next()) {
return false;
}
}
return true;
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode()
{
if (isEmpty()) {
return 0;
}
int h = 1;
IntIterator itr = iterator();
if (!itr.hasNext()) {
h = (h << 5) - h + itr.next();
}
return h;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,563 @@
package io.druid.extendedset.intset;
import io.druid.extendedset.utilities.BitCount;
import java.util.NoSuchElementException;
/**
*/
public class ConciseSetUtils
{
/**
* The highest representable integer.
* <p/>
* Its value is computed as follows. The number of bits required to
* represent the longest sequence of 0's or 1's is
* <tt>ceil(log<sub>2</sub>(({@link Integer#MAX_VALUE} - 31) / 31)) = 27</tt>.
* Indeed, at least one literal exists, and the other bits may all be 0's or
* 1's, that is <tt>{@link Integer#MAX_VALUE} - 31</tt>. If we use:
* <ul>
* <li> 2 bits for the sequence type;
* <li> 5 bits to indicate which bit is set;
* </ul>
* then <tt>32 - 5 - 2 = 25</tt> is the number of available bits to
* represent the maximum sequence of 0's and 1's. Thus, the maximal bit that
* can be set is represented by a number of 0's equals to
* <tt>31 * (1 << 25)</tt>, followed by a literal with 30 0's and the
* MSB (31<sup>st</sup> bit) equal to 1
*/
public final static int MAX_ALLOWED_INTEGER = 31 * (1 << 25) + 30; // 1040187422
/**
* The lowest representable integer.
*/
public final static int MIN_ALLOWED_SET_BIT = 0;
/**
* Maximum number of representable bits within a literal
*/
public final static int MAX_LITERAL_LENGTH = 31;
/**
* Literal that represents all bits set to 1 (and MSB = 1)
*/
public final static int ALL_ONES_LITERAL = 0xFFFFFFFF;
/**
* Literal that represents all bits set to 0 (and MSB = 1)
*/
public final static int ALL_ZEROS_LITERAL = 0x80000000;
/**
* All bits set to 1 and MSB = 0
*/
public final static int ALL_ONES_WITHOUT_MSB = 0x7FFFFFFF;
/**
* Sequence bit
*/
public final static int SEQUENCE_BIT = 0x40000000;
/**
* Calculates the modulus division by 31 in a faster way than using <code>n % 31</code>
* <p/>
* This method of finding modulus division by an integer that is one less
* than a power of 2 takes at most <tt>O(lg(32))</tt> time. The number of operations
* is at most <tt>12 + 9 * ceil(lg(32))</tt>.
* <p/>
* See <a
* href="http://graphics.stanford.edu/~seander/bithacks.html">http://graphics.stanford.edu/~seander/bithacks.html</a>
*
* @param n number to divide
*
* @return <code>n % 31</code>
*/
public static int maxLiteralLengthModulus(int n)
{
int m = (n & 0xC1F07C1F) + ((n >>> 5) & 0xC1F07C1F);
m = (m >>> 15) + (m & 0x00007FFF);
if (m <= 31) {
return m == 31 ? 0 : m;
}
m = (m >>> 5) + (m & 0x0000001F);
if (m <= 31) {
return m == 31 ? 0 : m;
}
m = (m >>> 5) + (m & 0x0000001F);
if (m <= 31) {
return m == 31 ? 0 : m;
}
m = (m >>> 5) + (m & 0x0000001F);
if (m <= 31) {
return m == 31 ? 0 : m;
}
m = (m >>> 5) + (m & 0x0000001F);
if (m <= 31) {
return m == 31 ? 0 : m;
}
m = (m >>> 5) + (m & 0x0000001F);
return m == 31 ? 0 : m;
}
/**
* Calculates the multiplication by 31 in a faster way than using <code>n * 31</code>
*
* @param n number to multiply
*
* @return <code>n * 31</code>
*/
public static int maxLiteralLengthMultiplication(int n)
{
return (n << 5) - n;
}
/**
* Calculates the division by 31
*
* @param n number to divide
*
* @return <code>n / 31</code>
*/
public static int maxLiteralLengthDivision(int n)
{
return n / 31;
}
/**
* Checks whether a word is a literal one
*
* @param word word to check
*
* @return <code>true</code> if the given word is a literal word
*/
public static boolean isLiteral(int word)
{
// "word" must be 1*
// NOTE: this is faster than "return (word & 0x80000000) == 0x80000000"
return (word & 0x80000000) != 0;
}
/**
* Checks whether a word contains a sequence of 1's
*
* @param word word to check
*
* @return <code>true</code> if the given word is a sequence of 1's
*/
public static boolean isOneSequence(int word)
{
// "word" must be 01*
return (word & 0xC0000000) == SEQUENCE_BIT;
}
/**
* Checks whether a word contains a sequence of 0's
*
* @param word word to check
*
* @return <code>true</code> if the given word is a sequence of 0's
*/
public static boolean isZeroSequence(int word)
{
// "word" must be 00*
return (word & 0xC0000000) == 0;
}
/**
* Checks whether a word contains a sequence of 0's with no set bit, or 1's
* with no unset bit.
* <p/>
* <b>NOTE:</b> when {@link #simulateWAH} is <code>true</code>, it is
* equivalent to (and as fast as) <code>!</code>{@link #isLiteral(int)}
*
* @param word word to check
*
* @return <code>true</code> if the given word is a sequence of 0's or 1's
* but with no (un)set bit
*/
public static boolean isSequenceWithNoBits(int word)
{
// "word" must be 0?00000*
return (word & 0xBE000000) == 0x00000000;
}
/**
* Gets the number of blocks of 1's or 0's stored in a sequence word
*
* @param word word to check
*
* @return the number of blocks that follow the first block of 31 bits
*/
public static int getSequenceCount(int word)
{
// get the 25 LSB bits
return word & 0x01FFFFFF;
}
public static int getSequenceNumWords(int word)
{
return getSequenceCount(word) + 1;
}
/**
* Clears the (un)set bit in a sequence
*
* @param word word to check
*
* @return the sequence corresponding to the given sequence and with no
* (un)set bits
*/
public static int getSequenceWithNoBits(int word)
{
// clear 29 to 25 LSB bits
return (word & 0xC1FFFFFF);
}
/**
* Gets the literal word that represents the first 31 bits of the given the
* word (i.e. the first block of a sequence word, or the bits of a literal word).
* <p/>
* If the word is a literal, it returns the unmodified word. In case of a
* sequence, it returns a literal that represents the first 31 bits of the
* given sequence word.
*
* @param word word to check
*
* @return the literal contained within the given word, <i>with the most
* significant bit set to 1</i>.
*/
public static int getLiteral(int word, boolean simulateWAH)
{
if (isLiteral(word)) {
return word;
}
if (simulateWAH) {
return isZeroSequence(word) ? ALL_ZEROS_LITERAL : ALL_ONES_LITERAL;
}
// get bits from 30 to 26 and use them to set the corresponding bit
// NOTE: "1 << (word >>> 25)" and "1 << ((word >>> 25) & 0x0000001F)" are equivalent
// NOTE: ">>> 1" is required since 00000 represents no bits and 00001 the LSB bit set
int literal = (1 << (word >>> 25)) >>> 1;
return isZeroSequence(word)
? (ALL_ZEROS_LITERAL | literal)
: (ALL_ONES_LITERAL & ~literal);
}
public static int getLiteralFromZeroSeqFlipBit(int word)
{
int flipBit = getFlippedBit(word);
if (flipBit > -1) {
return ALL_ZEROS_LITERAL | flipBitAsBinaryString(flipBit);
}
return ALL_ZEROS_LITERAL;
}
public static int getLiteralFromOneSeqFlipBit(int word)
{
int flipBit = getFlippedBit(word);
if (flipBit > -1) {
return ALL_ONES_LITERAL ^ flipBitAsBinaryString(flipBit);
}
return ALL_ONES_LITERAL;
}
/**
* Gets the position of the flipped bit within a sequence word. If the
* sequence has no set/unset bit, returns -1.
* <p/>
* Note that the parameter <i>must</i> a sequence word, otherwise the
* result is meaningless.
*
* @param word sequence word to check
*
* @return the position of the set bit, from 0 to 31. If the sequence has no
* set/unset bit, returns -1.
*/
public static int getFlippedBit(int word)
{
// get bits from 30 to 26
// NOTE: "-1" is required since 00000 represents no bits and 00001 the LSB bit set
return ((word >>> 25) & 0x0000001F) - 1;
}
public static int flipBitAsBinaryString(int flipBit)
{
return ((Number) Math.pow(2, flipBit)).intValue();
}
/**
* Gets the number of set bits within the literal word
*
* @param word literal word
*
* @return the number of set bits within the literal word
*/
public static int getLiteralBitCount(int word)
{
return BitCount.count(getLiteralBits(word));
}
/**
* Gets the bits contained within the literal word
*
* @param word literal word
*
* @return the literal word with the most significant bit cleared
*/
public static int getLiteralBits(int word)
{
return ALL_ONES_WITHOUT_MSB & word;
}
public static boolean isAllOnesLiteral(int word)
{
return (word & -1) == -1;
}
public static boolean isAllZerosLiteral(int word)
{
return (word | 0x80000000) == 0x80000000;
}
public static boolean isLiteralWithSingleZeroBit(int word)
{
return isLiteral(word) && (Integer.bitCount(~word) == 1);
}
public static boolean isLiteralWithSingleOneBit(int word)
{
return isLiteral(word) && (Integer.bitCount(word) == 2);
}
public static int clearBitsAfterInLastWord(int lastWord, int lastSetBit)
{
return lastWord &= ALL_ZEROS_LITERAL | (0xFFFFFFFF >>> (31 - lastSetBit));
}
public static int onesUntil(int bit)
{
return 0x80000000 | ((1 << bit) - 1);
}
public static LiteralAndZeroFillExpander newLiteralAndZeroFillExpander()
{
return new LiteralAndZeroFillExpander();
}
public static OneFillExpander newOneFillExpander()
{
return new OneFillExpander();
}
public interface WordExpander
{
public boolean hasNext();
public boolean hasPrevious();
public int next();
public int previous();
public void skipAllAfter(int i);
public void skipAllBefore(int i);
public void reset(int offset, int word, boolean fromBeginning);
public WordExpander clone();
}
/**
* Iterator over the bits of literal and zero-fill words
*/
public static class LiteralAndZeroFillExpander implements WordExpander
{
final int[] buffer = new int[MAX_LITERAL_LENGTH];
int len = 0;
int current = 0;
@Override
public boolean hasNext()
{
return current < len;
}
@Override
public boolean hasPrevious()
{
return current > 0;
}
@Override
public int next()
{
if (!hasNext()) {
throw new NoSuchElementException();
}
return buffer[current++];
}
@Override
public int previous()
{
if (!hasPrevious()) {
throw new NoSuchElementException();
}
return buffer[--current];
}
@Override
public void skipAllAfter(int i)
{
while (hasPrevious() && buffer[current - 1] > i) {
current--;
}
}
@Override
public void skipAllBefore(int i)
{
while (hasNext() && buffer[current] < i) {
current++;
}
}
@Override
public void reset(int offset, int word, boolean fromBeginning)
{
if (isLiteral(word)) {
len = 0;
for (int i = 0; i < MAX_LITERAL_LENGTH; i++) {
if ((word & (1 << i)) != 0) {
buffer[len++] = offset + i;
}
}
current = fromBeginning ? 0 : len;
} else {
if (isZeroSequence(word)) {
if (isSequenceWithNoBits(word)) {
len = 0;
current = 0;
} else {
len = 1;
buffer[0] = offset + ((0x3FFFFFFF & word) >>> 25) - 1;
current = fromBeginning ? 0 : 1;
}
} else {
throw new RuntimeException("sequence of ones!");
}
}
}
@Override
public WordExpander clone()
{
LiteralAndZeroFillExpander retVal = new LiteralAndZeroFillExpander();
System.arraycopy(buffer, 0, retVal.buffer, 0, buffer.length);
retVal.len = len;
retVal.current = current;
return retVal;
}
}
/**
* Iterator over the bits of one-fill words
*/
public static class OneFillExpander implements WordExpander
{
int firstInt = 1;
int lastInt = -1;
int current = 0;
int exception = -1;
@Override
public boolean hasNext()
{
return current < lastInt;
}
@Override
public boolean hasPrevious()
{
return current > firstInt;
}
@Override
public int next()
{
if (!hasNext()) {
throw new NoSuchElementException();
}
current++;
if (current == exception) {
current++;
}
return current;
}
@Override
public int previous()
{
if (!hasPrevious()) {
throw new NoSuchElementException();
}
current--;
if (current == exception) {
current--;
}
return current;
}
@Override
public void skipAllAfter(int i)
{
if (i >= current) {
return;
}
current = i + 1;
}
@Override
public void skipAllBefore(int i)
{
if (i <= current) {
return;
}
current = i - 1;
}
@Override
public void reset(int offset, int word, boolean fromBeginning)
{
if (!isOneSequence(word)) {
throw new RuntimeException("NOT a sequence of ones!");
}
firstInt = offset;
lastInt = offset + maxLiteralLengthMultiplication(getSequenceCount(word) + 1) - 1;
exception = offset + ((0x3FFFFFFF & word) >>> 25) - 1;
if (exception == firstInt) {
firstInt++;
}
if (exception == lastInt) {
lastInt--;
}
current = fromBeginning ? (firstInt - 1) : (lastInt + 1);
}
@Override
public WordExpander clone()
{
OneFillExpander retVal = new OneFillExpander();
retVal.firstInt = firstInt;
retVal.lastInt = lastInt;
retVal.current = current;
retVal.exception = exception;
return retVal;
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,662 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset.intset;
import io.druid.extendedset.ExtendedSet;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
/**
* Very similar to {@link ExtendedSet} but for the primitive <code>int</code>
* type.
*
* @author Alessandro Colantonio
* @version $Id: IntSet.java 135 2011-01-04 15:54:48Z cocciasik $
* @see ArraySet
* @see ConciseSet
* @see FastSet
* @see HashIntSet
*/
public interface IntSet extends Cloneable, Comparable<IntSet>
{
/**
* Generates the intersection set
*
* @param other {@link IntSet} instance that represents the right
* operand
*
* @return the result of the operation
*
* @see #retainAll(IntSet)
*/
public IntSet intersection(IntSet other);
/**
* Generates the union set
*
* @param other {@link IntSet} instance that represents the right
* operand
*
* @return the result of the operation
*
* @see #addAll(IntSet)
*/
public IntSet union(IntSet other);
/**
* Generates the difference set
*
* @param other {@link IntSet} instance that represents the right
* operand
*
* @return the result of the operation
*
* @see #removeAll(IntSet)
*/
public IntSet difference(IntSet other);
/**
* Generates the symmetric difference set
*
* @param other {@link IntSet} instance that represents the right
* operand
*
* @return the result of the operation
*
* @see #flip(int)
*/
public IntSet symmetricDifference(IntSet other);
/**
* Generates the complement set. The returned set is represented by all the
* elements strictly less than {@link #last()} that do not exist in the
* current set.
*
* @return the complement set
*
* @see IntSet#complement()
*/
public IntSet complemented();
/**
* Complements the current set. The modified set is represented by all the
* elements strictly less than {@link #last()} that do not exist in the
* current set.
*
* @see IntSet#complemented()
*/
public void complement();
/**
* Returns <code>true</code> if the specified {@link IntSet}
* instance contains any elements that are also contained within this
* {@link IntSet} instance
*
* @param other {@link IntSet} to intersect with
*
* @return a boolean indicating whether this {@link IntSet}
* intersects the specified {@link IntSet}.
*/
public boolean containsAny(IntSet other);
/**
* Returns <code>true</code> if the specified {@link IntSet}
* instance contains at least <code>minElements</code> elements that are
* also contained within this {@link IntSet} instance
*
* @param other {@link IntSet} instance to intersect with
* @param minElements minimum number of elements to be contained within this
* {@link IntSet} instance
*
* @return a boolean indicating whether this {@link IntSet}
* intersects the specified {@link IntSet}.
*
* @throws IllegalArgumentException if <code>minElements &lt; 1</code>
*/
public boolean containsAtLeast(IntSet other, int minElements);
/**
* Computes the intersection set size.
* <p>
* This is faster than calling {@link #intersection(IntSet)} and
* then {@link #size()}
*
* @param other {@link IntSet} instance that represents the right
* operand
*
* @return the size
*/
public int intersectionSize(IntSet other);
/**
* Computes the union set size.
* <p>
* This is faster than calling {@link #union(IntSet)} and then
* {@link #size()}
*
* @param other {@link IntSet} instance that represents the right
* operand
*
* @return the size
*/
public int unionSize(IntSet other);
/**
* Computes the symmetric difference set size.
* <p>
* This is faster than calling {@link #symmetricDifference(IntSet)}
* and then {@link #size()}
*
* @param other {@link IntSet} instance that represents the right
* operand
*
* @return the size
*/
public int symmetricDifferenceSize(IntSet other);
/**
* Computes the difference set size.
* <p>
* This is faster than calling {@link #difference(IntSet)} and then
* {@link #size()}
*
* @param other {@link IntSet} instance that represents the right
* operand
*
* @return the size
*/
public int differenceSize(IntSet other);
/**
* Computes the complement set size.
* <p>
* This is faster than calling {@link #complemented()} and then
* {@link #size()}
*
* @return the size
*/
public int complementSize();
/**
* Generates an empty set
*
* @return the empty set
*/
public IntSet empty();
/**
* See the <code>clone()</code> of {@link Object}
*
* @return cloned object
*/
public IntSet clone();
/**
* Computes the compression factor of the equivalent bitmap representation
* (1 means not compressed, namely a memory footprint similar to
* {@link BitSet}, 2 means twice the size of {@link BitSet}, etc.)
*
* @return the compression factor
*/
public double bitmapCompressionRatio();
/**
* Computes the compression factor of the equivalent integer collection (1
* means not compressed, namely a memory footprint similar to
* {@link ArrayList}, 2 means twice the size of {@link ArrayList}, etc.)
*
* @return the compression factor
*/
public double collectionCompressionRatio();
/**
* @return a {@link IntIterator} instance to iterate over the set
*/
public IntIterator iterator();
/**
* @return a {@link IntIterator} instance to iterate over the set in
* descending order
*/
public IntIterator descendingIterator();
/**
* Prints debug info about the given {@link IntSet} implementation
*
* @return a string that describes the internal representation of the
* instance
*/
public String debugInfo();
/**
* Adds to the set all the elements between <code>first</code> and
* <code>last</code>, both included.
*
* @param from first element
* @param to last element
*/
public void fill(int from, int to);
/**
* Removes from the set all the elements between <code>first</code> and
* <code>last</code>, both included.
*
* @param from first element
* @param to last element
*/
public void clear(int from, int to);
/**
* Adds the element if it not existing, or removes it if existing
*
* @param e element to flip
*
* @see #symmetricDifference(IntSet)
*/
public void flip(int e);
/**
* Gets the <code>i</code><sup>th</sup> element of the set
*
* @param i position of the element in the sorted set
*
* @return the <code>i</code><sup>th</sup> element of the set
*
* @throws IndexOutOfBoundsException if <code>i</code> is less than zero, or greater or equal to
* {@link #size()}
*/
public int get(int i);
/**
* Provides position of element within the set.
* <p>
* It returns -1 if the element does not exist within the set.
*
* @param e element of the set
*
* @return the element position
*/
public int indexOf(int e);
/**
* Converts a given array into an instance of the current class.
*
* @param a array to use to generate the new instance
*
* @return the converted collection
*/
public IntSet convert(int... a);
/**
* Converts a given collection into an instance of the current class.
*
* @param c array to use to generate the new instance
*
* @return the converted collection
*/
public IntSet convert(Collection<Integer> c);
/**
* Returns the first (lowest) element currently in this set.
*
* @return the first (lowest) element currently in this set
*
* @throws NoSuchElementException if this set is empty
*/
public int first();
/**
* Returns the last (highest) element currently in this set.
*
* @return the last (highest) element currently in this set
*
* @throws NoSuchElementException if this set is empty
*/
public int last();
/**
* @return the number of elements in this set (its cardinality)
*/
public int size();
/**
* @return <tt>true</tt> if this set contains no elements
*/
public boolean isEmpty();
/**
* Returns <tt>true</tt> if this set contains the specified element.
*
* @param i element whose presence in this set is to be tested
*
* @return <tt>true</tt> if this set contains the specified element
*/
public boolean contains(int i);
/**
* Adds the specified element to this set if it is not already present. It
* ensures that sets never contain duplicate elements.
*
* @param i element to be added to this set
*
* @return <tt>true</tt> if this set did not already contain the specified
* element
*
* @throws IllegalArgumentException if some property of the specified element prevents it from
* being added to this set
*/
public boolean add(int i);
/**
* Removes the specified element from this set if it is present.
*
* @param i object to be removed from this set, if present
*
* @return <tt>true</tt> if this set contained the specified element
*
* @throws UnsupportedOperationException if the <tt>remove</tt> operation is not supported by this set
*/
public boolean remove(int i);
/**
* Returns <tt>true</tt> if this set contains all of the elements of the
* specified collection.
*
* @param c collection to be checked for containment in this set
*
* @return <tt>true</tt> if this set contains all of the elements of the
* specified collection
*
* @throws NullPointerException if the specified collection contains one or more null
* elements and this set does not permit null elements
* (optional), or if the specified collection is null
* @see #contains(int)
*/
public boolean containsAll(IntSet c);
/**
* Adds all of the elements in the specified collection to this set if
* they're not already present.
*
* @param c collection containing elements to be added to this set
*
* @return <tt>true</tt> if this set changed as a result of the call
*
* @throws NullPointerException if the specified collection contains one or more null
* elements and this set does not permit null elements, or if
* the specified collection is null
* @throws IllegalArgumentException if some property of an element of the specified collection
* prevents it from being added to this set
* @see #add(int)
*/
public boolean addAll(IntSet c);
/**
* Retains only the elements in this set that are contained in the specified
* collection. In other words, removes from this set all of its elements
* that are not contained in the specified collection.
*
* @param c collection containing elements to be retained in this set
*
* @return <tt>true</tt> if this set changed as a result of the call
*
* @throws NullPointerException if this set contains a null element and the specified
* collection does not permit null elements (optional), or if
* the specified collection is null
* @see #remove(int)
*/
public boolean retainAll(IntSet c);
/**
* Removes from this set all of its elements that are contained in the
* specified collection.
*
* @param c collection containing elements to be removed from this set
*
* @return <tt>true</tt> if this set changed as a result of the call
*
* @throws NullPointerException if this set contains a null element and the specified
* collection does not permit null elements (optional), or if
* the specified collection is null
* @see #remove(int)
* @see #contains(int)
*/
public boolean removeAll(IntSet c);
/**
* Removes all of the elements from this set. The set will be empty after
* this call returns.
*
* @throws UnsupportedOperationException if the <tt>clear</tt> method is not supported by this set
*/
public void clear();
/**
* @return an array containing all the elements in this set, in the same
* order.
*/
public int[] toArray();
/**
* Returns an array containing all of the elements in this set.
* <p>
* If this set fits in the specified array with room to spare (i.e., the
* array has more elements than this set), the element in the array
* immediately following the end of the set are left unchanged.
*
* @param a the array into which the elements of this set are to be
* stored.
*
* @return the array containing all the elements in this set
*
* @throws NullPointerException if the specified array is null
* @throws IllegalArgumentException if this set does not fit in the specified array
*/
public int[] toArray(int[] a);
/**
* Computes the power-set of the current set.
* <p>
* It is a particular implementation of the algorithm <i>Apriori</i> (see:
* Rakesh Agrawal, Ramakrishnan Srikant, <i>Fast Algorithms for Mining
* Association Rules in Large Databases</i>, in Proceedings of the
* 20<sup>th</sup> International Conference on Very Large Data Bases,
* p.487-499, 1994). The returned power-set does <i>not</i> contain the
* empty set.
* <p>
* The subsets composing the powerset are returned in a list that is sorted
* according to the lexicographical order provided by the integer set.
*
* @return the power-set
*
* @see #powerSet(int, int)
* @see #powerSetSize()
*/
public List<? extends IntSet> powerSet();
/**
* Computes a subset of the power-set of the current set, composed by those
* subsets that have cardinality between <code>min</code> and
* <code>max</code>.
* <p>
* It is a particular implementation of the algorithm <i>Apriori</i> (see:
* Rakesh Agrawal, Ramakrishnan Srikant, <i>Fast Algorithms for Mining
* Association Rules in Large Databases</i>, in Proceedings of the
* 20<sup>th</sup> International Conference on Very Large Data Bases,
* p.487-499, 1994). The power-set does <i>not</i> contains the empty set.
* <p>
* The subsets composing the powerset are returned in a list that is sorted
* according to the lexicographical order provided by the integer set.
*
* @param min minimum subset size (greater than zero)
* @param max maximum subset size
*
* @return the power-set
*
* @see #powerSet()
* @see #powerSetSize(int, int)
*/
public List<? extends IntSet> powerSet(int min, int max);
/**
* Computes the power-set size of the current set.
* <p>
* The power-set does <i>not</i> contains the empty set.
*
* @return the power-set size
*
* @see #powerSet()
*/
public int powerSetSize();
/**
* Computes the power-set size of the current set, composed by those subsets
* that have cardinality between <code>min</code> and <code>max</code>.
* <p>
* The returned power-set does <i>not</i> contain the empty set.
*
* @param min minimum subset size (greater than zero)
* @param max maximum subset size
*
* @return the power-set size
*
* @see #powerSet(int, int)
*/
public int powerSetSize(int min, int max);
/**
* Computes the Jaccard similarity coefficient between this set and the
* given set.
* <p>
* The coefficient is defined as
* <code>|A intersection B| / |A union B|</code>.
*
* @param other the other set
*
* @return the Jaccard similarity coefficient
*
* @see #jaccardDistance(IntSet)
*/
public double jaccardSimilarity(IntSet other);
/**
* Computes the Jaccard distance between this set and the given set.
* <p>
* The coefficient is defined as
* <code>1 - </code> {@link #jaccardSimilarity(IntSet)}.
*
* @param other the other set
*
* @return the Jaccard distance
*
* @see #jaccardSimilarity(IntSet)
*/
public double jaccardDistance(IntSet other);
/**
* Computes the weighted version of the Jaccard similarity coefficient
* between this set and the given set.
* <p>
* The coefficient is defined as
* <code>sum of min(A_i, B_i) / sum of max(A_i, B_i)</code>.
*
* @param other the other set
*
* @return the weighted Jaccard similarity coefficient
*
* @see #weightedJaccardDistance(IntSet)
*/
public double weightedJaccardSimilarity(IntSet other);
/**
* Computes the weighted version of the Jaccard distance between this set
* and the given set.
* <p>
* The coefficient is defined as <code>1 - </code>
* {@link #weightedJaccardSimilarity(IntSet)}.
*
* @param other the other set
*
* @return the weighted Jaccard distance
*
* @see #weightedJaccardSimilarity(IntSet)
*/
public double weightedJaccardDistance(IntSet other);
/**
* An {@link Iterator}-like interface that allows to "skip" some elements of
* the set
*/
public interface IntIterator
{
/**
* @return <tt>true</tt> if the iterator has more elements.
*/
boolean hasNext();
/**
* @return the next element in the iteration.
*
* @throws NoSuchElementException iteration has no more elements.
*/
int next();
/**
* Removes from the underlying collection the last element returned by
* the iterator (optional operation). This method can be called only
* once per call to <tt>next</tt>. The behavior of an iterator is
* unspecified if the underlying collection is modified while the
* iteration is in progress in any way other than by calling this
* method.
*
* @throws UnsupportedOperationException if the <tt>remove</tt> operation is not supported by
* this Iterator.
* @throws IllegalStateException if the <tt>next</tt> method has not yet been called,
* or the <tt>remove</tt> method has already been called
* after the last call to the <tt>next</tt> method.
*/
void remove();
/**
* Skips all the elements before the the specified element, so that
* {@link #next()} gives the given element or, if it does not exist, the
* element immediately after according to the sorting provided by this
* set.
* <p>
* If <code>element</code> is less than the next element, it does
* nothing
*
* @param element first element to not skip
*/
public void skipAllBefore(int element);
/**
* Clone the iterator
*
* @return a clone of the IntIterator
*/
public IntIterator clone();
}
}

View File

@ -0,0 +1,299 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset.utilities;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.AbstractMap;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
/**
* A {@link Map} backed by an array, where keys are the indices of the array,
* and values are the elements of the array.
* <p>
* Modifications to the map (i.e., through {@link #put(Integer, Object)} and
* {@link java.util.Map.Entry#setValue(Object)}) are reflected to the original array.
* However, the map has a fixed length, that is the length of the array.
*
* @param <T> the type of elements represented by columns
*
* @author Alessandro Colantonio
* @version $Id$
*/
public class ArrayMap<T> extends AbstractMap<Integer, T> implements java.io.Serializable
{
/**
* generated serial ID
*/
private static final long serialVersionUID = -578029467093308343L;
/**
* array backed by this map
*/
private final T[] array;
/**
* first index of the map
*/
private final int indexShift;
/**
* {@link Set} instance to iterate over #array
*/
private transient Set<Entry<Integer, T>> entrySet;
/**
* Initializes the map
*
* @param array array to manipulate
* @param indexShift first index of the map
*/
ArrayMap(T[] array, int indexShift)
{
this.array = array;
this.indexShift = indexShift;
entrySet = null;
}
/**
* Initializes the map
*
* @param array array to manipulate
*/
ArrayMap(T[] array)
{
this(array, 0);
}
/**
* Test
*
* @param args
*/
public static void main(String[] args)
{
ArrayMap<String> am = new ArrayMap<String>(new String[]{"Three", "Four", "Five"}, 3);
System.out.println(am);
am.put(5, "FIVE");
System.out.println(am);
System.out.println(am.get(5));
System.out.println(am.containsKey(2));
System.out.println(am.containsKey(3));
System.out.println(am.containsValue("THREE"));
System.out.println(am.keySet());
System.out.println(am.values());
}
/**
* {@inheritDoc}
*/
@Override
public Set<Entry<Integer, T>> entrySet()
{
if (entrySet == null) {
// create an entry for each element
final List<SimpleEntry> entries = new ArrayList<SimpleEntry>(array.length);
for (int i = 0; i < array.length; i++) {
entries.add(new SimpleEntry(i));
}
// create the Set instance
entrySet = new AbstractSet<Entry<Integer, T>>()
{
@Override
public Iterator<Entry<Integer, T>> iterator()
{
return new Iterator<Entry<Integer, T>>()
{
int curr = 0;
@Override
public boolean hasNext()
{
return curr < entries.size();
}
@Override
public Entry<Integer, T> next()
{
if (!hasNext()) {
throw new NoSuchElementException();
}
return entries.get(curr++);
}
@Override
public void remove()
{
throw new IllegalArgumentException();
}
};
}
@Override
public int size()
{
return entries.size();
}
};
}
return entrySet;
}
/**
* {@inheritDoc}
*/
@Override
public int size()
{
return array.length;
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsKey(Object key)
{
int index = (Integer) key - indexShift;
return (index >= 0) && (index < array.length);
}
/**
* {@inheritDoc}
*/
@Override
public T get(Object key)
{
return array[(Integer) key - indexShift];
}
/**
* {@inheritDoc}
*/
@Override
public T put(Integer key, T value)
{
int actualIndex = key - indexShift;
T old = array[actualIndex];
array[actualIndex] = value;
return old;
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode()
{
return Arrays.hashCode(array);
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj)
{
if (this == obj) {
return true;
}
if (!super.equals(obj)) {
return false;
}
if (!(obj instanceof ArrayMap<?>)) {
return false;
}
return Arrays.equals(array, ((ArrayMap<?>) obj).array);
}
/**
* Reconstruct the instance from a stream
*/
private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException
{
s.defaultReadObject();
entrySet = null;
}
/**
* Entry of the map
*/
private class SimpleEntry implements Entry<Integer, T>
{
/**
* index of {@link ArrayMap#array}
*/
final int actualIndex;
/**
* Creates an entry
*
* @param index index of {@link ArrayMap#array}
*/
private SimpleEntry(int index)
{
this.actualIndex = index;
}
/**
* {@inheritDoc}
*/
@Override
public Integer getKey()
{
return actualIndex + indexShift;
}
/**
* {@inheritDoc}
*/
@Override
public T getValue()
{
return array[actualIndex];
}
/**
* {@inheritDoc}
*/
@Override
public T setValue(T value)
{
T old = array[actualIndex];
array[actualIndex] = value;
return old;
}
/**
* {@inheritDoc}
*/
@Override
public String toString()
{
return (actualIndex + indexShift) + "=" + array[actualIndex];
}
}
}

View File

@ -0,0 +1,350 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset.utilities;
import java.util.Random;
/**
* Population count (a.k.a. Hamming distance) of a bitmap represented by an
* array of <code>int</code>.
* <p>
* Derived from <a
* href="http://dalkescientific.com/writings/diary/popcnt.c">http
* ://dalkescientific.com/writings/diary/popcnt.c</a>
*
* @author Alessandro Colantonio
* @version $Id: BitCount.java 157 2011-11-14 14:25:15Z cocciasik $
*/
public class BitCount
{
/**
* Population count
* <p>
* It counts a single word
*
* @param word word to count
*
* @return population count
*/
public static int count(int word)
{
word -= ((word >>> 1) & 0x55555555);
word = (word & 0x33333333) + ((word >>> 2) & 0x33333333);
word = (word + (word >>> 4)) & 0x0F0F0F0F;
return (word * 0x01010101) >>> 24;
}
/**
* Population count
*
* @param buffer array of <code>int</code>
*
* @return population count
*/
public static int count(int[] buffer)
{
return count(buffer, buffer.length);
}
/**
* Population count
* <p>
* It counts 24 words at a time, then 3 at a time, then 1 at a time
*
* @param buffer array of <code>int</code>
* @param n number of elements of <code>buffer</code> to count
*
* @return population count
*/
public static int count(int[] buffer, int n)
{
final int n1 = n - n % 24;
final int n2 = n - n % 3;
int cnt = 0;
int i;
for (i = 0; i < n1; i += 24) {
cnt += merging3(buffer, i);
}
for (; i < n2; i += 3) {
cnt += merging2(buffer, i);
}
cnt += popcount_fbsd2(buffer, i, n);
return cnt;
}
// used by count()
private static int merging3(int[] buffer, int x)
{
int cnt1;
int cnt2;
int cnt = 0;
for (int i = x; i < x + 24; i += 3) {
cnt1 = buffer[i];
cnt2 = buffer[i + 1];
final int w = buffer[i + 2];
cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555);
cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555);
cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333);
cnt1 += (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333);
cnt += (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F);
}
cnt = (cnt & 0x00FF00FF) + ((cnt >>> 8) & 0x00FF00FF);
cnt += cnt >>> 16;
return cnt & 0x00000FFFF;
}
// used by count()
private static int merging2(int[] buffer, int x)
{
int cnt1 = buffer[x];
int cnt2 = buffer[x + 1];
final int w = buffer[x + 2];
cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555);
cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555);
cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333);
cnt2 = (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333);
cnt1 += cnt2;
cnt1 = (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F);
cnt1 += cnt1 >>> 8;
cnt1 += cnt1 >>> 16;
return cnt1 & 0x000000FF;
}
// used by count()
private static int popcount_fbsd2(int[] data, int x, int n)
{
int cnt = 0;
for (; x < n; x++) {
cnt += count(data[x]);
}
return cnt;
}
/**
* Population count, skipping words at even positions
*
* @param buffer array of <code>int</code>
*
* @return population count
*/
public static int count_2(int[] buffer)
{
return count_2(buffer, buffer.length);
}
/**
* Population count, skipping words at even positions
* <p>
* It counts 24 words at a time, then 3 at a time, then 1 at a time
*
* @param buffer array of <code>int</code>
* @param n number of elements of <code>buffer</code> to count
*
* @return population count
*/
public static int count_2(int[] buffer, int n)
{
final int n1 = n - n % 48;
final int n2 = n - n % 6;
int cnt = 0;
int i;
for (i = 0; i < n1; i += 48) {
cnt += merging3_2(buffer, i);
}
for (; i < n2; i += 6) {
cnt += merging2_2(buffer, i);
}
cnt += popcount_fbsd2_2(buffer, i, n);
return cnt;
}
// used by count_2()
private static int merging3_2(int[] buffer, int x)
{
int cnt1;
int cnt2;
int cnt = 0;
for (int i = x; i < x + 48; i += 6) {
cnt1 = buffer[i + 1];
cnt2 = buffer[i + 3];
final int w = buffer[i + 5];
cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555);
cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555);
cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333);
cnt1 += (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333);
cnt += (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F);
}
cnt = (cnt & 0x00FF00FF) + ((cnt >>> 8) & 0x00FF00FF);
cnt += cnt >>> 16;
return cnt & 0x00000FFFF;
}
// used by count_2()
private static int merging2_2(int[] buffer, int x)
{
int cnt1 = buffer[x + 1];
int cnt2 = buffer[x + 3];
final int w = buffer[x + 5];
cnt1 = cnt1 - ((cnt1 >>> 1) & 0x55555555) + (w & 0x55555555);
cnt2 = cnt2 - ((cnt2 >>> 1) & 0x55555555) + ((w >>> 1) & 0x55555555);
cnt1 = (cnt1 & 0x33333333) + ((cnt1 >>> 2) & 0x33333333);
cnt2 = (cnt2 & 0x33333333) + ((cnt2 >>> 2) & 0x33333333);
cnt1 += cnt2;
cnt1 = (cnt1 & 0x0F0F0F0F) + ((cnt1 >>> 4) & 0x0F0F0F0F);
cnt1 += cnt1 >>> 8;
cnt1 += cnt1 >>> 16;
return cnt1 & 0x000000FF;
}
// used by count_2()
private static int popcount_fbsd2_2(int[] data, int x, int n)
{
int cnt = 0;
for (x++; x < n; x += 2) {
cnt += count(data[x]);
}
return cnt;
}
/**
* Test
*
* @param args
*/
public static void main(String[] args)
{
final int trials = 10000;
final int maxLength = 10000;
Random rnd = new Random();
final int seed = rnd.nextInt();
System.out.print("Test correctness... ");
rnd = new Random(seed);
for (int i = 0; i < trials; i++) {
int[] x = new int[rnd.nextInt(maxLength)];
for (int j = 0; j < x.length; j++) {
x[j] = rnd.nextInt(Integer.MAX_VALUE);
}
int size1 = 0;
for (int j = 0; j < x.length; j++) {
size1 += count(x[j]);
}
int size2 = count(x);
if (size1 != size2) {
System.out.println("i = " + i);
System.out.println("ERRORE!");
System.out.println(size1 + ", " + size2);
for (int j = 0; j < x.length; j++) {
System.out.format("x[%d] = %d --> %d\n", j, x[j], count(x[j]));
}
return;
}
}
System.out.println("done!");
System.out.print("Test correctness II... ");
rnd = new Random(seed);
for (int i = 0; i < trials; i++) {
int[] x = new int[rnd.nextInt(maxLength << 1)];
for (int j = 1; j < x.length; j += 2) {
x[j] = rnd.nextInt(Integer.MAX_VALUE);
}
int size1 = 0;
for (int j = 1; j < x.length; j += 2) {
size1 += count(x[j]);
}
int size2 = count_2(x);
if (size1 != size2) {
System.out.println("i = " + i);
System.out.println("ERRORE!");
System.out.println(size1 + ", " + size2);
for (int j = 1; j < x.length; j += 2) {
System.out.format("x[%d] = %d --> %d\n", j, x[j], count(x[j]));
}
return;
}
}
System.out.println("done!");
System.out.print("Test time count(): ");
rnd = new Random(seed);
long t = System.currentTimeMillis();
for (int i = 0; i < trials; i++) {
int[] x = new int[rnd.nextInt(maxLength)];
for (int j = 0; j < x.length; j++) {
x[j] = rnd.nextInt(Integer.MAX_VALUE);
}
@SuppressWarnings("unused")
int size = 0;
for (int j = 0; j < x.length; j++) {
size += count(x[j]);
}
}
System.out.println(System.currentTimeMillis() - t);
System.out.print("Test time BitCount.count(): ");
rnd = new Random(seed);
t = System.currentTimeMillis();
for (int i = 0; i < trials; i++) {
int[] x = new int[rnd.nextInt(maxLength)];
for (int j = 0; j < x.length; j++) {
x[j] = rnd.nextInt(Integer.MAX_VALUE);
}
count(x);
}
System.out.println(System.currentTimeMillis() - t);
System.out.print("Test II time count(): ");
rnd = new Random(seed);
t = System.currentTimeMillis();
for (int i = 0; i < trials; i++) {
int[] x = new int[rnd.nextInt(maxLength << 1)];
for (int j = 1; j < x.length; j += 2) {
x[j] = rnd.nextInt(Integer.MAX_VALUE);
}
@SuppressWarnings("unused")
int size = 0;
for (int j = 1; j < x.length; j += 2) {
size += count(x[j]);
}
}
System.out.println(System.currentTimeMillis() - t);
System.out.print("Test II time BitCount.count(): ");
rnd = new Random(seed);
t = System.currentTimeMillis();
for (int i = 0; i < trials; i++) {
int[] x = new int[rnd.nextInt(maxLength << 1)];
for (int j = 1; j < x.length; j += 2) {
x[j] = rnd.nextInt(Integer.MAX_VALUE);
}
count_2(x);
}
System.out.println(System.currentTimeMillis() - t);
}
}

View File

@ -0,0 +1,317 @@
package io.druid.extendedset.utilities;
import io.druid.extendedset.ExtendedSet;
import io.druid.extendedset.intset.ConciseSet;
import io.druid.extendedset.wrappers.IntegerSet;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
/**
* This class implements a {@link Map} from a key of type <code>K</code> to a
* collection contains instances of <code>I</code>.
*
* @param <K> key type
* @param <I> item type
* @param <C> {@link Collection} subclass used to collect items
*
* @author Alessandro Colantonio
* @version $Id: CollectionMap.java 152 2011-03-30 11:18:18Z cocciasik $
*/
public class CollectionMap<K, I, C extends Collection<I>> extends LinkedHashMap<K, C>
{
private static final long serialVersionUID = -2613391212228461025L;
/**
* empty collection
*/
private final C emptySet;
/**
* Initializes the map by providing an instance of the empty collection
*
* @param emptySet the empty collection
*/
public CollectionMap(C emptySet)
{
this.emptySet = emptySet;
}
/**
* Generates a new {@link CollectionMap} instance. It is an alternative to
* the constructor {@link #CollectionMap(Collection)} that reduces the code
* to write.
*
* @param <KX> key type
* @param <IX> item type
* @param <CX> {@link Collection} subclass used to collect items
* @param <EX> empty subset type
* @param emptySet the empty collection
*
* @return the new instance of {@link CollectionMap}
*/
public static <KX, IX, CX extends Collection<IX>, EX extends CX>
CollectionMap<KX, IX, CX> newCollectionMap(EX emptySet)
{
return new CollectionMap<KX, IX, CX>(emptySet);
}
/**
* Test procedure
* <p>
* Expected output:
* <pre>
* {}
* {A=[1]}
* {A=[1, 2]}
* {A=[1, 2], B=[3]}
* {A=[1, 2], B=[3, 4, 5, 6]}
* true
* true
* false
* {A=[1], B=[3, 4, 5, 6]}
* {A=[1], B=[3, 4, 5, 6]}
* {A=[1], B=[6]}
* </pre>
*
* @param args
*/
public static void main(String[] args)
{
CollectionMap<String, Integer, IntegerSet> map = newCollectionMap(new IntegerSet(new ConciseSet()));
System.out.println(map);
map.putItem("A", 1);
System.out.println(map);
map.putItem("A", 2);
System.out.println(map);
map.putItem("B", 3);
System.out.println(map);
map.putAllItems("B", Arrays.asList(4, 5, 6));
System.out.println(map);
System.out.println(map.containsItem(1));
System.out.println(map.containsItem(6));
System.out.println(map.containsItem(7));
map.removeItem("A", 2);
System.out.println(map);
map.removeItem("A", 3);
System.out.println(map);
map.removeAllItems("B", Arrays.asList(1, 2, 3, 4, 5));
System.out.println(map);
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public CollectionMap<K, I, C> clone()
{
// result
CollectionMap<K, I, C> cloned = new CollectionMap<K, I, C>(emptySet);
// clone all the entries
cloned.putAll(this);
// clone all the values
if (emptySet instanceof Cloneable) {
for (Entry<K, C> e : cloned.entrySet()) {
try {
e.setValue((C) e.getValue().getClass().getMethod("clone").invoke(e.getValue()));
}
catch (Exception ex) {
throw new RuntimeException(ex);
}
}
} else {
for (Entry<K, C> e : cloned.entrySet()) {
C copy = cloneEmptySet();
copy.addAll(e.getValue());
e.setValue(copy);
}
}
return cloned;
}
/**
* Generates an empty {@link CollectionMap} instance with the same
* collection type for values
*
* @return the empty {@link CollectionMap} instance
*/
public CollectionMap<K, I, C> empty()
{
return new CollectionMap<K, I, C>(emptySet);
}
/**
* Populates the current instance with the data from another map. In
* particular, it creates the list of keys associated to each value.
*
* @param map the input map
*/
public void mapValueToKeys(Map<I, K> map)
{
for (Entry<I, K> e : map.entrySet()) {
putItem(e.getValue(), e.getKey());
}
}
/**
* Generates a clone of the empty set
*
* @return a clone of the empty set
*/
@SuppressWarnings("unchecked")
private C cloneEmptySet()
{
try {
if (emptySet instanceof Cloneable) {
return (C) emptySet.getClass().getMethod("clone").invoke(emptySet);
}
return (C) emptySet.getClass().newInstance();
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Checks if there are some collections that contain the given item
*
* @param item item to check
*
* @return <code>true</code> if the item exists within some collections
*/
public boolean containsItem(I item)
{
for (Entry<K, C> e : entrySet()) {
if (e.getValue().contains(item)) {
return true;
}
}
return false;
}
/**
* Adds an item to the collection corresponding to the given key
*
* @param key the key for the identification of the collection
* @param item item to add
*
* @return the updated collection of items for the given key
*/
public C putItem(K key, I item)
{
C items = get(key);
if (items == null) {
put(key, items = cloneEmptySet());
}
items.add(item);
return items;
}
/**
* Adds a collection of items to the collection corresponding to the given key
*
* @param key the key for the identification of the collection
* @param c items to add
*
* @return the updated collection of items for the given key
*/
public C putAllItems(K key, Collection<? extends I> c)
{
C items = get(key);
if (c == null) {
put(key, items = cloneEmptySet());
}
items.addAll(c);
return items;
}
/**
* Removes the item from the collection corresponding to the given key
*
* @param key the key for the identification of the collection
* @param item item to remove
*
* @return the updated collection of items for the given key
*/
public C removeItem(K key, I item)
{
C items = get(key);
if (items == null) {
return null;
}
items.remove(item);
if (items.isEmpty()) {
remove(key);
}
return items;
}
/**
* Removes a collection of items from the collection corresponding to the given key
*
* @param key the key for the identification of the collection
* @param c items to remove
*
* @return the updated collection of items for the given key
*/
public C removeAllItems(K key, Collection<? extends I> c)
{
C items = get(key);
if (items == null) {
return null;
}
items.removeAll(c);
if (items.isEmpty()) {
remove(key);
}
return items;
}
/**
* Makes all collections read-only
*/
@SuppressWarnings("unchecked")
public void makeAllCollectionsUnmodifiable()
{
if (emptySet instanceof ExtendedSet) {
for (Entry<K, C> e : entrySet()) {
e.setValue((C) ((ExtendedSet) e.getValue()).unmodifiable());
}
} else if (emptySet instanceof List) {
for (Entry<K, C> e : entrySet()) {
e.setValue((C) (Collections.unmodifiableList((List<I>) e.getValue())));
}
} else if (emptySet instanceof Set) {
for (Entry<K, C> e : entrySet()) {
e.setValue((C) (Collections.unmodifiableSet((Set<I>) e.getValue())));
}
} else if (emptySet instanceof SortedSet) {
for (Entry<K, C> e : entrySet()) {
e.setValue((C) (Collections.unmodifiableSortedSet((SortedSet<I>) e.getValue())));
}
} else {
for (Entry<K, C> e : entrySet()) {
e.setValue((C) (Collections.unmodifiableCollection(e.getValue())));
}
}
}
}

View File

@ -0,0 +1,103 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset.utilities;
/**
* Hash functions for integers and integer arrays.
*
* @author Alessandro Colantonio
* @version $Id: IntHashCode.java 127 2010-12-21 20:22:12Z cocciasik $
*/
public class IntHashCode
{
/**
* Computes a hashcode for an integer
* <p>
* Inspired by Thomas Wang's function, described at <a
* href="http://www.concentric.net/~ttwang/tech/inthash.htm"
* >http://www.concentric.net/~ttwang/tech/inthash.htm</a>
*
* @param key the given integer
*
* @return the hashcode
*/
public static int hashCode(int key)
{
key = ~key + (key << 15);
key ^= key >>> 12;
key += key << 2;
key ^= key >>> 4;
key *= 2057;
key ^= key >>> 16;
return key;
}
/**
* Computes the hashcode of an array of integers
*
* @param keys the given integer array
*
* @return the hashcode
*/
public static int hashCode(int[] keys)
{
return hashCode(keys, keys.length, 0);
}
/**
* Computes the hashcode of an array of integers
* <p>
* It is based on MurmurHash3 Algorithm, described at <a
* href="http://sites.google.com/site/murmurhash/"
* >http://sites.google.com/site/murmurhash</a>
*
* @param keys the given integer array
* @param len number of elements to include, that is
* <code>len <= keys.length</code>
* @param seed initial seed
*
* @return the hashcode
*/
public static int hashCode(int[] keys, int len, int seed)
{
int h = 0x971e137b ^ seed;
int c1 = 0x95543787;
int c2 = 0x2ad7eb25;
for (int i = 0; i < len; i++) {
int k = keys[i];
k *= c1;
k = (k << 11) | (k >>> 21); // rotl k, 11
k *= c2;
h ^= k;
h = (h << 2) - h + 0x52dce729;
c1 = (c1 << 2) + c1 + 0x7b7d159c;
c2 = (c2 << 2) + c2 + 0x6bce6396;
}
h ^= len;
h ^= h >>> 16;
h *= 0x85ebca6b;
h ^= h >>> 13;
h *= 0xc2b2ae35;
h ^= h >>> 16;
return h;
}
}

View File

@ -0,0 +1,115 @@
package io.druid.extendedset.utilities;
import java.nio.IntBuffer;
import java.util.ArrayList;
/**
*/
public class IntList
{
private final ArrayList<int[]> baseLists = new ArrayList<int[]>();
private final int allocateSize;
private int maxIndex;
public IntList()
{
this(1000);
}
public IntList(final int allocateSize)
{
this.allocateSize = allocateSize;
maxIndex = -1;
}
public int length()
{
return maxIndex + 1;
}
public boolean isEmpty()
{
return (length() == 0);
}
public void add(int value)
{
set(length(), value);
}
public void set(int index, int value)
{
int subListIndex = index / allocateSize;
if (subListIndex >= baseLists.size()) {
for (int i = baseLists.size(); i <= subListIndex; ++i) {
baseLists.add(null);
}
}
int[] baseList = baseLists.get(subListIndex);
if (baseList == null) {
baseList = new int[allocateSize];
baseLists.set(subListIndex, baseList);
}
baseList[index % allocateSize] = value;
if (index > maxIndex) {
maxIndex = index;
}
}
public int get(int index)
{
if (index > maxIndex) {
throw new ArrayIndexOutOfBoundsException(index);
}
int subListIndex = index / allocateSize;
int[] baseList = baseLists.get(subListIndex);
if (baseList == null) {
return 0;
}
return baseList[index % allocateSize];
}
public int baseListCount()
{
return baseLists.size();
}
public IntBuffer getBaseList(int index)
{
final int[] array = baseLists.get(index);
if (array == null) {
return null;
}
final IntBuffer retVal = IntBuffer.wrap(array);
if (index + 1 == baseListCount()) {
retVal.limit(maxIndex - (index * allocateSize));
}
return retVal.asReadOnlyBuffer();
}
public int[] toArray()
{
int[] retVal = new int[length()];
int currIndex = 0;
for (int[] arr : baseLists) {
int min = Math.min(length() - currIndex, arr.length);
System.arraycopy(arr, 0, retVal, currIndex, min);
currIndex += min;
}
return retVal;
}
}

View File

@ -0,0 +1,689 @@
package io.druid.extendedset.utilities;
import io.druid.extendedset.intset.IntSet;
import java.util.Collection;
import java.util.Formatter;
import java.util.List;
/**
* A wrapper class for classes that implement the {@link IntSet} interface to count method calls
*
* @author Alessandro Colantonio
* @version $Id: IntSetStatistics.java 153 2011-05-30 16:39:57Z cocciasik $
*/
public class IntSetStatistics implements IntSet
{
/**
* @uml.property name="unionCount"
*/
private static long unionCount = 0;
/*
* Monitored characteristics
*/
/**
* @uml.property name="intersectionCount"
*/
private static long intersectionCount = 0;
/**
* @uml.property name="differenceCount"
*/
private static long differenceCount = 0;
/**
* @uml.property name="symmetricDifferenceCount"
*/
private static long symmetricDifferenceCount = 0;
/**
* @uml.property name="complementCount"
*/
private static long complementCount = 0;
/**
* @uml.property name="unionSizeCount"
*/
private static long unionSizeCount = 0;
/**
* @uml.property name="intersectionSizeCount"
*/
private static long intersectionSizeCount = 0;
/**
* @uml.property name="differenceSizeCount"
*/
private static long differenceSizeCount = 0;
/**
* @uml.property name="symmetricDifferenceSizeCount"
*/
private static long symmetricDifferenceSizeCount = 0;
/**
* @uml.property name="complementSizeCount"
*/
private static long complementSizeCount = 0;
/**
* @uml.property name="equalsCount"
*/
private static long equalsCount = 0;
/**
* @uml.property name="hashCodeCount"
*/
private static long hashCodeCount = 0;
/**
* @uml.property name="containsAllCount"
*/
private static long containsAllCount = 0;
/**
* @uml.property name="containsAnyCount"
*/
private static long containsAnyCount = 0;
/**
* @uml.property name="containsAtLeastCount"
*/
private static long containsAtLeastCount = 0;
/**
* instance to monitor
*
* @uml.property name="container"
* @uml.associationEnd
*/
private final IntSet container;
/*
* Statistics getters
*/
/**
* Wraps an {@link IntSet} instance with an {@link IntSetStatistics}
* instance
*
* @param container {@link IntSet} to wrap
*/
public IntSetStatistics(IntSet container)
{
this.container = extractContainer(container);
}
/**
* @return number of union operations (i.e., {@link #addAll(IntSet)} , {@link #union(IntSet)} )
*
* @uml.property name="unionCount"
*/
public static long getUnionCount() {return unionCount;}
/**
* @return number of intersection operations (i.e., {@link #retainAll(IntSet)} , {@link #intersection(IntSet)} )
*
* @uml.property name="intersectionCount"
*/
public static long getIntersectionCount() {return intersectionCount;}
/**
* @return number of difference operations (i.e., {@link #removeAll(IntSet)} , {@link #difference(IntSet)} )
*
* @uml.property name="differenceCount"
*/
public static long getDifferenceCount() {return differenceCount;}
/**
* @return number of symmetric difference operations (i.e., {@link #symmetricDifference(IntSet)} )
*
* @uml.property name="symmetricDifferenceCount"
*/
public static long getSymmetricDifferenceCount() {return symmetricDifferenceCount;}
/**
* @return number of complement operations (i.e., {@link #complement()} , {@link #complemented()} )
*
* @uml.property name="complementCount"
*/
public static long getComplementCount() {return complementCount;}
/**
* @return cardinality of union operations (i.e., {@link #addAll(IntSet)} , {@link #union(IntSet)} )
*
* @uml.property name="unionSizeCount"
*/
public static long getUnionSizeCount() {return unionSizeCount;}
/**
* @return cardinality of intersection operations (i.e., {@link #retainAll(IntSet)} , {@link #intersection(IntSet)} )
*
* @uml.property name="intersectionSizeCount"
*/
public static long getIntersectionSizeCount() {return intersectionSizeCount;}
/**
* @return cardinality of difference operations (i.e., {@link #removeAll(IntSet)} , {@link #difference(IntSet)} )
*
* @uml.property name="differenceSizeCount"
*/
public static long getDifferenceSizeCount() {return differenceSizeCount;}
/**
* @return cardinality of symmetric difference operations (i.e., {@link #symmetricDifference(IntSet)} )
*
* @uml.property name="symmetricDifferenceSizeCount"
*/
public static long getSymmetricDifferenceSizeCount() {return symmetricDifferenceSizeCount;}
/**
* @return cardinality of complement operations (i.e., {@link #complement()} , {@link #complemented()} )
*
* @uml.property name="complementSizeCount"
*/
public static long getComplementSizeCount() {return complementSizeCount;}
/**
* @return number of equality check operations (i.e., {@link #equals(Object)} )
*
* @uml.property name="equalsCount"
*/
public static long getEqualsCount() {return equalsCount;}
/**
* @return number of hash code computations (i.e., {@link #hashCode()} )
*
* @uml.property name="hashCodeCount"
*/
public static long getHashCodeCount() {return hashCodeCount;}
/**
* @return number of {@link #containsAll(IntSet)} calls
*
* @uml.property name="containsAllCount"
*/
public static long getContainsAllCount() {return containsAllCount;}
/**
* @return number of {@link #containsAny(IntSet)} calls
*
* @uml.property name="containsAnyCount"
*/
public static long getContainsAnyCount() {return containsAnyCount;}
/**
* @return number of {@link #containsAtLeast(IntSet, int)} calls
*
* @uml.property name="containsAtLeastCount"
*/
public static long getContainsAtLeastCount() {return containsAtLeastCount;}
/*
* Other statistical methods
*/
/**
* @return the sum of the cardinality of set operations
*/
public static long getSizeCheckCount()
{
return getIntersectionSizeCount()
+
getUnionSizeCount()
+ getDifferenceSizeCount()
+ getSymmetricDifferenceSizeCount()
+ getComplementSizeCount();
}
/**
* Resets all counters
*/
public static void resetCounters()
{
unionCount = intersectionCount = differenceCount = symmetricDifferenceCount = complementCount =
unionSizeCount = intersectionSizeCount = differenceSizeCount = symmetricDifferenceSizeCount = complementSizeCount =
equalsCount = hashCodeCount = containsAllCount = containsAnyCount = containsAtLeastCount = 0;
}
/**
* @return the summary information string
*/
public static String summary()
{
final StringBuilder s = new StringBuilder();
final Formatter f = new Formatter(s);
f.format("unionCount: %d\n", Long.valueOf(unionCount));
f.format("intersectionCount: %d\n", Long.valueOf(intersectionCount));
f.format("differenceCount: %d\n", Long.valueOf(differenceCount));
f.format("symmetricDifferenceCount: %d\n", Long.valueOf(symmetricDifferenceCount));
f.format("complementCount: %d\n", Long.valueOf(complementCount));
f.format("unionSizeCount: %d\n", Long.valueOf(unionSizeCount));
f.format("intersectionSizeCount: %d\n", Long.valueOf(intersectionSizeCount));
f.format("differenceSizeCount: %d\n", Long.valueOf(differenceSizeCount));
f.format("symmetricDifferenceSizeCount: %d\n", Long.valueOf(symmetricDifferenceSizeCount));
f.format("complementSizeCount: %d\n", Long.valueOf(complementSizeCount));
f.format("equalsCount: %d\n", Long.valueOf(equalsCount));
f.format("hashCodeCount: %d\n", Long.valueOf(hashCodeCount));
f.format("containsAllCount: %d\n", Long.valueOf(containsAllCount));
f.format("containsAnyCount: %d\n", Long.valueOf(containsAnyCount));
f.format("containsAtLeastCount: %d\n", Long.valueOf(containsAtLeastCount));
return s.toString();
}
/**
* Removes the {@link IntSetStatistics} wrapper
*
* @param c
*
* @return the contained {@link IntSet} instance
*/
public static IntSet extractContainer(IntSet c)
{
if (c instanceof IntSetStatistics) {
return extractContainer(((IntSetStatistics) c).container);
}
return c;
}
/*
* MONITORED METHODS
*/
/**
* {@inheritDoc}
*/
@Override
public boolean addAll(IntSet c)
{
unionCount++;
return container.addAll(extractContainer(c));
}
/**
* {@inheritDoc}
*/
@Override
public IntSet union(IntSet other)
{
unionCount++;
return new IntSetStatistics(container.union(extractContainer(other)));
}
/**
* {@inheritDoc}
*/
@Override
public boolean retainAll(IntSet c)
{
intersectionCount++;
return container.retainAll(extractContainer(c));
}
/**
* {@inheritDoc}
*/
@Override
public IntSet intersection(IntSet other)
{
intersectionCount++;
return new IntSetStatistics(container.intersection(extractContainer(other)));
}
/**
* {@inheritDoc}
*/
@Override
public boolean removeAll(IntSet c)
{
differenceCount++;
return container.removeAll(extractContainer(c));
}
/**
* {@inheritDoc}
*/
@Override
public IntSet difference(IntSet other)
{
differenceCount++;
return new IntSetStatistics(container.difference(extractContainer(other)));
}
/**
* {@inheritDoc}
*/
@Override
public IntSet symmetricDifference(IntSet other)
{
symmetricDifferenceCount++;
return container.symmetricDifference(extractContainer(other));
}
/**
* {@inheritDoc}
*/
@Override
public void complement()
{
complementCount++;
container.complement();
}
/**
* {@inheritDoc}
*/
@Override
public IntSet complemented()
{
complementCount++;
return new IntSetStatistics(container.complemented());
}
/**
* {@inheritDoc}
*/
@Override
public int unionSize(IntSet other)
{
unionSizeCount++;
return container.unionSize(extractContainer(other));
}
/**
* {@inheritDoc}
*/
@Override
public int intersectionSize(IntSet other)
{
intersectionSizeCount++;
return container.intersectionSize(extractContainer(other));
}
/**
* {@inheritDoc}
*/
@Override
public int differenceSize(IntSet other)
{
differenceSizeCount++;
return container.differenceSize(extractContainer(other));
}
/**
* {@inheritDoc}
*/
@Override
public int symmetricDifferenceSize(IntSet other)
{
symmetricDifferenceSizeCount++;
return container.symmetricDifferenceSize(extractContainer(other));
}
/**
* {@inheritDoc}
*/
@Override
public int complementSize()
{
complementSizeCount++;
return container.complementSize();
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAll(IntSet c)
{
containsAllCount++;
return container.containsAll(extractContainer(c));
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAny(IntSet other)
{
containsAnyCount++;
return container.containsAny(extractContainer(other));
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAtLeast(IntSet other, int minElements)
{
containsAtLeastCount++;
return container.containsAtLeast(extractContainer(other), minElements);
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode()
{
hashCodeCount++;
return container.hashCode();
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj)
{
equalsCount++;
return obj != null
&& ((obj instanceof IntSetStatistics)
? container.equals(extractContainer((IntSetStatistics) obj))
: container.equals(obj));
}
/*
* SIMPLE REDIRECTION
*/
/**
* {@inheritDoc}
*/
@Override
public double bitmapCompressionRatio() {return container.bitmapCompressionRatio();}
/**
* {@inheritDoc}
*/
@Override
public double collectionCompressionRatio() {return container.collectionCompressionRatio();}
/**
* {@inheritDoc}
*/
@Override
public void clear(int from, int to) {container.clear(from, to);}
/**
* {@inheritDoc}
*/
@Override
public void fill(int from, int to) {container.fill(from, to);}
/**
* {@inheritDoc}
*/
@Override
public void clear() {container.clear();}
/**
* {@inheritDoc}
*/
@Override
public boolean add(int i) {return container.add(i);}
/**
* {@inheritDoc}
*/
@Override
public boolean remove(int i) {return container.remove(i);}
/**
* {@inheritDoc}
*/
@Override
public void flip(int e) {container.flip(e);}
/**
* {@inheritDoc}
*/
@Override
public int get(int i) {return container.get(i);}
/**
* {@inheritDoc}
*/
@Override
public int indexOf(int e) {return container.indexOf(e);}
/**
* {@inheritDoc}
*/
@Override
public boolean contains(int i) {return container.contains(i);}
/**
* {@inheritDoc}
*/
@Override
public int first() {return container.first();}
/**
* {@inheritDoc}
*/
@Override
public int last() {return container.last();}
/**
* {@inheritDoc}
*/
@Override
public boolean isEmpty() {return container.isEmpty();}
/**
* {@inheritDoc}
*/
@Override
public int size() {return container.size();}
/**
* {@inheritDoc}
*/
@Override
public IntIterator iterator() {return container.iterator();}
/**
* {@inheritDoc}
*/
@Override
public IntIterator descendingIterator() {return container.descendingIterator();}
/**
* {@inheritDoc}
*/
@Override
public int[] toArray() {return container.toArray();}
/**
* {@inheritDoc}
*/
@Override
public int[] toArray(int[] a) {return container.toArray(a);}
/**
* {@inheritDoc}
*/
@Override
public int compareTo(IntSet o) {return container.compareTo(o);}
/**
* {@inheritDoc}
*/
@Override
public String toString() {return container.toString();}
/**
* {@inheritDoc}
*/
@Override
public List<? extends IntSet> powerSet() {return container.powerSet();}
/**
* {@inheritDoc}
*/
@Override
public List<? extends IntSet> powerSet(int min, int max) {return container.powerSet(min, max);}
/**
* {@inheritDoc}
*/
@Override
public int powerSetSize() {return container.powerSetSize();}
/**
* {@inheritDoc}
*/
@Override
public int powerSetSize(int min, int max) {return container.powerSetSize(min, max);}
/**
* {@inheritDoc}
*/
@Override
public double jaccardSimilarity(IntSet other) {return container.jaccardSimilarity(other);}
/**
* {@inheritDoc}
*/
@Override
public double jaccardDistance(IntSet other) {return container.jaccardDistance(other);}
/**
* {@inheritDoc}
*/
@Override
public double weightedJaccardSimilarity(IntSet other) {return container.weightedJaccardSimilarity(other);}
/**
* {@inheritDoc}
*/
@Override
public double weightedJaccardDistance(IntSet other) {return container.weightedJaccardDistance(other);}
/*
* OTHERS
*/
/**
* {@inheritDoc}
*/
@Override
public IntSet empty() {return new IntSetStatistics(container.empty());}
/**
* {@inheritDoc}
*/
@Override
public IntSet clone() {return new IntSetStatistics(container.clone());}
/**
* {@inheritDoc}
*/
@Override
public IntSet convert(int... a) {return new IntSetStatistics(container.convert(a));}
/**
* {@inheritDoc}
*/
@Override
public IntSet convert(Collection<Integer> c) {return new IntSetStatistics(container.convert(c));}
/**
* {@inheritDoc}
*/
@Override
public String debugInfo() {return "Analyzed IntSet:\n" + container.debugInfo();}
}

View File

@ -0,0 +1,869 @@
package io.druid.extendedset.utilities.random;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
/**
* <h3>MersenneTwister and MersenneTwisterFast</h3>
* <p><b>Version 13</b>, based on version MT199937(99/10/29)
* of the Mersenne Twister algorithm found at
* <a href="http://www.math.keio.ac.jp/matumoto/emt.html">
* The Mersenne Twister Home Page</a>, with the initialization
* improved using the new 2002/1/26 initialization algorithm
* By Sean Luke, October 2004.
* <p>
* <p><b>MersenneTwister</b> is a drop-in subclass replacement
* for java.util.Random. It is properly synchronized and
* can be used in a multithreaded environment. On modern VMs such
* as HotSpot, it is approximately 1/3 slower than java.util.Random.
* <p>
* <p><b>MersenneTwisterFast</b> is not a subclass of java.util.Random. It has
* the same public methods as Random does, however, and it is
* algorithmically identical to MersenneTwister. MersenneTwisterFast
* has hard-code inlined all of its methods directly, and made all of them
* final (well, the ones of consequence anyway). Further, these
* methods are <i>not</i> synchronized, so the same MersenneTwisterFast
* instance cannot be shared by multiple threads. But all this helps
* MersenneTwisterFast achieve well over twice the speed of MersenneTwister.
* java.util.Random is about 1/3 slower than MersenneTwisterFast.
* <p>
* <h3>About the Mersenne Twister</h3>
* <p>This is a Java version of the C-program for MT19937: Integer version.
* The MT19937 algorithm was created by Makoto Matsumoto and Takuji Nishimura,
* who ask: "When you use this, send an email to: matumoto@math.keio.ac.jp
* with an appropriate reference to your work". Indicate that this
* is a translation of their algorithm into Java.
* <p>
* <p><b>Reference. </b>
* Makato Matsumoto and Takuji Nishimura,
* "Mersenne Twister: A 623-Dimensionally Equidistributed Uniform
* Pseudo-Random Number Generator",
* <i>ACM Transactions on Modeling and Computer Simulation,</i>
* Vol. 8, No. 1, January 1998, pp 3--30.
* <p>
* <h3>About this Version</h3>
* <p>
* <p><b>Changes Since V12:</b> clone() method added.
* <p>
* <p><b>Changes Since V11:</b> stateEquals(...) method added. MersenneTwisterFast
* is equal to other MersenneTwisterFasts with identical state; likewise
* MersenneTwister is equal to other MersenneTwister with identical state.
* This isn't equals(...) because that requires a contract of immutability
* to compare by value.
* <p>
* <p><b>Changes Since V10:</b> A documentation error suggested that
* setSeed(int[]) required an int[] array 624 long. In fact, the array
* can be any non-zero length. The new version also checks for this fact.
* <p>
* <p><b>Changes Since V9:</b> readState(stream) and writeState(stream)
* provided.
* <p>
* <p><b>Changes Since V8:</b> setSeed(int) was only using the first 28 bits
* of the seed; it should have been 32 bits. For small-number seeds the
* behavior is identical.
* <p>
* <p><b>Changes Since V7:</b> A documentation error in MersenneTwisterFast
* (but not MersenneTwister) stated that nextDouble selects uniformly from
* the full-open interval [0,1]. It does not. nextDouble's contract is
* identical across MersenneTwisterFast, MersenneTwister, and java.util.Random,
* namely, selection in the half-open interval [0,1). That is, 1.0 should
* not be returned. A similar contract exists in nextFloat.
* <p>
* <p><b>Changes Since V6:</b> License has changed from LGPL to BSD.
* New timing information to compare against
* java.util.Random. Recent versions of HotSpot have helped Random increase
* in speed to the point where it is faster than MersenneTwister but slower
* than MersenneTwisterFast (which should be the case, as it's a less complex
* algorithm but is synchronized).
* <p>
* <p><b>Changes Since V5:</b> New empty constructor made to work the same
* as java.util.Random -- namely, it seeds based on the current time in
* milliseconds.
* <p>
* <p><b>Changes Since V4:</b> New initialization algorithms. See
* (see <a href="http://www.math.keio.ac.jp/matumoto/MT2002/emt19937ar.html"</a>
* http://www.math.keio.ac.jp/matumoto/MT2002/emt19937ar.html</a>)
* <p>
* <p>The MersenneTwister code is based on standard MT19937 C/C++
* code by Takuji Nishimura,
* with suggestions from Topher Cooper and Marc Rieffel, July 1997.
* The code was originally translated into Java by Michael Lecuyer,
* January 1999, and the original code is Copyright (c) 1999 by Michael Lecuyer.
* <p>
* <h3>Java notes</h3>
* <p>
* <p>This implementation implements the bug fixes made
* in Java 1.2's version of Random, which means it can be used with
* earlier versions of Java. See
* <a href="http://www.javasoft.com/products/jdk/1.2/docs/api/java/util/Random.html">
* the JDK 1.2 java.util.Random documentation</a> for further documentation
* on the random-number generation contracts made. Additionally, there's
* an undocumented bug in the JDK java.util.Random.nextBytes() method,
* which this code fixes.
* <p>
* <p> Just like java.util.Random, this
* generator accepts a long seed but doesn't use all of it. java.util.Random
* uses 48 bits. The Mersenne Twister instead uses 32 bits (int size).
* So it's best if your seed does not exceed the int range.
* <p>
* <p>MersenneTwister can be used reliably
* on JDK version 1.1.5 or above. Earlier Java versions have serious bugs in
* java.util.Random; only MersenneTwisterFast (and not MersenneTwister nor
* java.util.Random) should be used with them.
* <p>
* <h3>License</h3>
* <p>
* Copyright (c) 2003 by Sean Luke. <br>
* Portions copyright (c) 1993 by Michael Lecuyer. <br>
* All rights reserved. <br>
* <p>
* <p>Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* <ul>
* <li> Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* <li> Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* <li> Neither the name of the copyright owners, their employers, nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
* </ul>
* <p>THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @version 13
*/
@SuppressWarnings("serial")
public class MersenneTwister extends java.util.Random implements Cloneable
{
// Period parameters
private static final int N = 624;
private static final int M = 397;
private static final int MATRIX_A = 0x9908b0df; // private static final * constant vector a
private static final int UPPER_MASK = 0x80000000; // most significant w-r bits
private static final int LOWER_MASK = 0x7fffffff; // least significant r bits
// Tempering parameters
private static final int TEMPERING_MASK_B = 0x9d2c5680;
private static final int TEMPERING_MASK_C = 0xefc60000;
private int mt[]; // the array for the state vector
private int mti; // mti==N+1 means mt[N] is not initialized
private int mag01[];
// a good initial seed (of int size, though stored in a long)
//private static final long GOOD_SEED = 4357;
/* implemented here because there's a bug in Random's implementation
of the Gaussian code (divide by zero, and log(0), ugh!), yet its
gaussian variables are private so we can't access them here. :-( */
private double __nextNextGaussian;
private boolean __haveNextNextGaussian;
/* We're overriding all internal data, to my knowledge, so this should be okay */
/**
* Constructor using the default seed.
*/
public MersenneTwister()
{
this(System.currentTimeMillis());
}
/**
* Constructor using a given seed. Though you pass this seed in
* as a long, it's best to make sure it's actually an integer.
*
* @param seed
*/
public MersenneTwister(final long seed)
{
super(seed); /* just in case */
setSeed(seed);
}
/**
* Constructor using an array of integers as seed.
* Your array must have a non-zero length. Only the first 624 integers
* in the array are used; if the array is shorter than this then
* integers are repeatedly used in a wrap-around fashion.
*
* @param array
*/
public MersenneTwister(final int[] array)
{
super(System.currentTimeMillis()); /* pick something at random just in case */
setSeed(array);
}
/**
* Tests the code.
*
* @param args
*/
public static void main(String args[])
{
int j;
MersenneTwister r;
// CORRECTNESS TEST
// COMPARE WITH http://www.math.keio.ac.jp/matumoto/CODES/MT2002/mt19937ar.out
r = new MersenneTwister(new int[]{0x123, 0x234, 0x345, 0x456});
System.out.println("Output of MersenneTwister with new (2002/1/26) seeding mechanism");
for (j = 0; j < 1000; j++) {
// first, convert the int from signed to "unsigned"
long l = r.nextInt();
if (l < 0) {
l += 4294967296L; // max int value
}
String s = String.valueOf(l);
while (s.length() < 10) {
s = " " + s; // buffer
}
System.out.print(s + " ");
if (j % 5 == 4) {
System.out.println();
}
}
// SPEED TEST
final long SEED = 4357;
int xx;
long ms;
System.out.println("\nTime to test grabbing 100000000 ints");
r = new MersenneTwister(SEED);
ms = System.currentTimeMillis();
xx = 0;
for (j = 0; j < 100000000; j++) {
xx += r.nextInt();
}
System.out.println("Mersenne Twister: " + (System.currentTimeMillis() - ms) + " Ignore this: " + xx);
System.out.println("To compare this with java.util.Random, run this same test on MersenneTwisterFast.");
System.out.println("The comparison with Random is removed from MersenneTwister because it is a proper");
System.out.println("subclass of Random and this unfairly makes some of Random's methods un-inlinable,");
System.out.println("so it would make Random look worse than it is.");
// TEST TO COMPARE TYPE CONVERSION BETWEEN
// MersenneTwisterFast.java AND MersenneTwister.java
System.out.println("\nGrab the first 1000 booleans");
r = new MersenneTwister(SEED);
for (j = 0; j < 1000; j++) {
System.out.print(r.nextBoolean() + " ");
if (j % 8 == 7) {
System.out.println();
}
}
if (!(j % 8 == 7)) {
System.out.println();
}
System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(double)");
r = new MersenneTwister(SEED);
for (j = 0; j < 1000; j++) {
System.out.print(r.nextBoolean(j / 999.0) + " ");
if (j % 8 == 7) {
System.out.println();
}
}
if (!(j % 8 == 7)) {
System.out.println();
}
System.out.println("\nGrab 1000 booleans of increasing probability using nextBoolean(float)");
r = new MersenneTwister(SEED);
for (j = 0; j < 1000; j++) {
System.out.print(r.nextBoolean(j / 999.0f) + " ");
if (j % 8 == 7) {
System.out.println();
}
}
if (!(j % 8 == 7)) {
System.out.println();
}
byte[] bytes = new byte[1000];
System.out.println("\nGrab the first 1000 bytes using nextBytes");
r = new MersenneTwister(SEED);
r.nextBytes(bytes);
for (j = 0; j < 1000; j++) {
System.out.print(bytes[j] + " ");
if (j % 16 == 15) {
System.out.println();
}
}
if (!(j % 16 == 15)) {
System.out.println();
}
byte b;
System.out.println("\nGrab the first 1000 bytes -- must be same as nextBytes");
r = new MersenneTwister(SEED);
for (j = 0; j < 1000; j++) {
System.out.print((b = r.nextByte()) + " ");
if (b != bytes[j]) {
System.out.print("BAD ");
}
if (j % 16 == 15) {
System.out.println();
}
}
if (!(j % 16 == 15)) {
System.out.println();
}
System.out.println("\nGrab the first 1000 shorts");
r = new MersenneTwister(SEED);
for (j = 0; j < 1000; j++) {
System.out.print(r.nextShort() + " ");
if (j % 8 == 7) {
System.out.println();
}
}
if (!(j % 8 == 7)) {
System.out.println();
}
System.out.println("\nGrab the first 1000 ints");
r = new MersenneTwister(SEED);
for (j = 0; j < 1000; j++) {
System.out.print(r.nextInt() + " ");
if (j % 4 == 3) {
System.out.println();
}
}
if (!(j % 4 == 3)) {
System.out.println();
}
System.out.println("\nGrab the first 1000 ints of different sizes");
r = new MersenneTwister(SEED);
int max = 1;
for (j = 0; j < 1000; j++) {
System.out.print(r.nextInt(max) + " ");
max *= 2;
if (max <= 0) {
max = 1;
}
if (j % 4 == 3) {
System.out.println();
}
}
if (!(j % 4 == 3)) {
System.out.println();
}
System.out.println("\nGrab the first 1000 longs");
r = new MersenneTwister(SEED);
for (j = 0; j < 1000; j++) {
System.out.print(r.nextLong() + " ");
if (j % 3 == 2) {
System.out.println();
}
}
if (!(j % 3 == 2)) {
System.out.println();
}
System.out.println("\nGrab the first 1000 longs of different sizes");
r = new MersenneTwister(SEED);
long max2 = 1;
for (j = 0; j < 1000; j++) {
System.out.print(r.nextLong(max2) + " ");
max2 *= 2;
if (max2 <= 0) {
max2 = 1;
}
if (j % 4 == 3) {
System.out.println();
}
}
if (!(j % 4 == 3)) {
System.out.println();
}
System.out.println("\nGrab the first 1000 floats");
r = new MersenneTwister(SEED);
for (j = 0; j < 1000; j++) {
System.out.print(r.nextFloat() + " ");
if (j % 4 == 3) {
System.out.println();
}
}
if (!(j % 4 == 3)) {
System.out.println();
}
System.out.println("\nGrab the first 1000 doubles");
r = new MersenneTwister(SEED);
for (j = 0; j < 1000; j++) {
System.out.print(r.nextDouble() + " ");
if (j % 3 == 2) {
System.out.println();
}
}
if (!(j % 3 == 2)) {
System.out.println();
}
System.out.println("\nGrab the first 1000 gaussian doubles");
r = new MersenneTwister(SEED);
for (j = 0; j < 1000; j++) {
System.out.print(r.nextGaussian() + " ");
if (j % 3 == 2) {
System.out.println();
}
}
if (!(j % 3 == 2)) {
System.out.println();
}
}
/**
* {@inheritDoc}
*/
@Override
public Object clone() throws CloneNotSupportedException
{
MersenneTwister f = (MersenneTwister) (super.clone());
f.mt = mt.clone();
f.mag01 = mag01.clone();
return f;
}
/**
* @param o
*
* @return ?
*/
public boolean stateEquals(Object o)
{
if (o == this) {
return true;
}
if (o == null || !(o instanceof MersenneTwister)) {
return false;
}
MersenneTwister other = (MersenneTwister) o;
if (mti != other.mti) {
return false;
}
for (int x = 0; x < mag01.length; x++) {
if (mag01[x] != other.mag01[x]) {
return false;
}
}
for (int x = 0; x < mt.length; x++) {
if (mt[x] != other.mt[x]) {
return false;
}
}
return true;
}
/**
* Reads the entire state of the MersenneTwister RNG from the stream
*
* @param stream
*
* @throws IOException
*/
public void readState(DataInputStream stream) throws IOException
{
int len = mt.length;
for (int x = 0; x < len; x++) {
mt[x] = stream.readInt();
}
len = mag01.length;
for (int x = 0; x < len; x++) {
mag01[x] = stream.readInt();
}
mti = stream.readInt();
__nextNextGaussian = stream.readDouble();
__haveNextNextGaussian = stream.readBoolean();
}
/**
* Writes the entire state of the MersenneTwister RNG to the stream
*
* @param stream
*
* @throws IOException
*/
public void writeState(DataOutputStream stream) throws IOException
{
int len = mt.length;
for (int x = 0; x < len; x++) {
stream.writeInt(mt[x]);
}
len = mag01.length;
for (int x = 0; x < len; x++) {
stream.writeInt(mag01[x]);
}
stream.writeInt(mti);
stream.writeDouble(__nextNextGaussian);
stream.writeBoolean(__haveNextNextGaussian);
}
/**
* Initialize the pseudo random number generator. Don't
* pass in a long that's bigger than an int (Mersenne Twister
* only uses the first 32 bits for its seed).
*/
@Override
synchronized public void setSeed(final long seed)
{
// it's always good style to call super
super.setSeed(seed);
// Due to a bug in java.util.Random clear up to 1.2, we're
// doing our own Gaussian variable.
__haveNextNextGaussian = false;
mt = new int[N];
mag01 = new int[2];
mag01[0] = 0x0;
mag01[1] = MATRIX_A;
mt[0] = (int) (seed & 0xffffffff);
for (mti = 1; mti < N; mti++) {
mt[mti] =
(1812433253 * (mt[mti - 1] ^ (mt[mti - 1] >>> 30)) + mti);
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
/* In the previous versions, MSBs of the seed affect */
/* only MSBs of the array mt[]. */
/* 2002/01/09 modified by Makoto Matsumoto */
mt[mti] &= 0xffffffff;
/* for >32 bit machines */
}
}
/**
* Sets the seed of the MersenneTwister using an array of integers.
* Your array must have a non-zero length. Only the first 624 integers
* in the array are used; if the array is shorter than this then
* integers are repeatedly used in a wrap-around fashion.
*
* @param array
*/
synchronized public void setSeed(final int[] array)
{
if (array.length == 0) {
throw new IllegalArgumentException("Array length must be greater than zero");
}
int i, j, k;
setSeed(19650218);
i = 1;
j = 0;
k = (N > array.length ? N : array.length);
for (; k != 0; k--) {
mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1664525)) + array[j] + j; /* non linear */
mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */
i++;
j++;
if (i >= N) {
mt[0] = mt[N - 1];
i = 1;
}
if (j >= array.length) {
j = 0;
}
}
for (k = N - 1; k != 0; k--) {
mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >>> 30)) * 1566083941)) - i; /* non linear */
mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */
i++;
if (i >= N) {
mt[0] = mt[N - 1];
i = 1;
}
}
mt[0] = 0x80000000; /* MSB is 1; assuring non-zero initial array */
}
/* If you've got a truly old version of Java, you can omit these
two next methods. */
/**
* Returns an integer with <i>bits</i> bits filled with a random number.
*/
@Override
synchronized protected int next(final int bits)
{
int y;
if (mti >= N) // generate N words at one time
{
int kk;
@SuppressWarnings("hiding")
final int[] mt = this.mt; // locals are slightly faster
@SuppressWarnings("hiding")
final int[] mag01 = this.mag01; // locals are slightly faster
for (kk = 0; kk < N - M; kk++) {
y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
mt[kk] = mt[kk + M] ^ (y >>> 1) ^ mag01[y & 0x1];
}
for (; kk < N - 1; kk++) {
y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ mag01[y & 0x1];
}
y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ mag01[y & 0x1];
mti = 0;
}
y = mt[mti++];
y ^= y >>> 11; // TEMPERING_SHIFT_U(y)
y ^= (y << 7) & TEMPERING_MASK_B; // TEMPERING_SHIFT_S(y)
y ^= (y << 15) & TEMPERING_MASK_C; // TEMPERING_SHIFT_T(y)
y ^= (y >>> 18); // TEMPERING_SHIFT_L(y)
return y >>> (32 - bits); // hope that's right!
}
private synchronized void writeObject(final ObjectOutputStream out)
throws IOException
{
// just so we're synchronized.
out.defaultWriteObject();
}
private synchronized void readObject(final ObjectInputStream in)
throws IOException, ClassNotFoundException
{
// just so we're synchronized.
in.defaultReadObject();
}
/**
* This method is missing from jdk 1.0.x and below. JDK 1.1
* includes this for us, but what the heck.
*/
@Override
public boolean nextBoolean() {return next(1) != 0;}
/**
* This generates a coin flip with a probability <tt>probability</tt>
* of returning true, else returning false. <tt>probability</tt> must
* be between 0.0 and 1.0, inclusive. Not as precise a random real
* event as nextBoolean(double), but twice as fast. To explicitly
* use this, remember you may need to cast to float first.
*
* @param probability
*
* @return ?
*/
public boolean nextBoolean(final float probability)
{
if (probability < 0.0f || probability > 1.0f) {
throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive.");
}
if (probability == 0.0f) {
return false; // fix half-open issues
} else if (probability == 1.0f) {
return true; // fix half-open issues
}
return nextFloat() < probability;
}
/**
* This generates a coin flip with a probability <tt>probability</tt>
* of returning true, else returning false. <tt>probability</tt> must
* be between 0.0 and 1.0, inclusive.
*
* @param probability
*
* @return ?
*/
public boolean nextBoolean(final double probability)
{
if (probability < 0.0 || probability > 1.0) {
throw new IllegalArgumentException("probability must be between 0.0 and 1.0 inclusive.");
}
if (probability == 0.0) {
return false; // fix half-open issues
} else if (probability == 1.0) {
return true; // fix half-open issues
}
return nextDouble() < probability;
}
/**
* This method is missing from JDK 1.1 and below. JDK 1.2
* includes this for us, but what the heck.
*/
@Override
public int nextInt(final int n)
{
if (n <= 0) {
throw new IllegalArgumentException("n must be > 0");
}
if ((n & -n) == n) {
return (int) ((n * (long) next(31)) >> 31);
}
int bits, val;
do {
bits = next(31);
val = bits % n;
}
while (bits - val + (n - 1) < 0);
return val;
}
/**
* This method is for completness' sake.
* Returns a long drawn uniformly from 0 to n-1. Suffice it to say,
* n must be > 0, or an IllegalArgumentException is raised.
*
* @param n
*
* @return ?
*/
public long nextLong(final long n)
{
if (n <= 0) {
throw new IllegalArgumentException("n must be > 0");
}
long bits, val;
do {
bits = (nextLong() >>> 1);
val = bits % n;
}
while (bits - val + (n - 1) < 0);
return val;
}
/**
* A bug fix for versions of JDK 1.1 and below. JDK 1.2 fixes
* this for us, but what the heck.
*
* @return ?
*/
@Override
public double nextDouble()
{
return (((long) next(26) << 27) + next(27))
/ (double) (1L << 53);
}
/**
* A bug fix for versions of JDK 1.1 and below. JDK 1.2 fixes
* this for us, but what the heck.
*/
@Override
public float nextFloat()
{
return next(24) / ((float) (1 << 24));
}
/**
* A bug fix for all versions of the JDK. The JDK appears to
* use all four bytes in an integer as independent byte values!
* Totally wrong. I've submitted a bug report.
*/
@Override
public void nextBytes(final byte[] bytes)
{
for (int x = 0; x < bytes.length; x++) {
bytes[x] = (byte) next(8);
}
}
/**
* For completeness' sake, though it's not in java.util.Random.
*
* @return ?
*/
public char nextChar()
{
// chars are 16-bit UniCode values
return (char) (next(16));
}
/**
* For completeness' sake, though it's not in java.util.Random.
*
* @return ?
*/
public short nextShort()
{
return (short) (next(16));
}
/**
* For completeness' sake, though it's not in java.util.Random.
*
* @return ?
*/
public byte nextByte()
{
return (byte) (next(8));
}
// }
/**
* A bug fix for all JDK code including 1.2. nextGaussian can theoretically
* ask for the log of 0 and divide it by 0! See Java bug
* <a href="http://developer.java.sun.com/developer/bugParade/bugs/4254501.html">
* http://developer.java.sun.com/developer/bugParade/bugs/4254501.html</a>
*
* @return ?
*/
@Override
synchronized public double nextGaussian()
{
if (__haveNextNextGaussian) {
__haveNextNextGaussian = false;
return __nextNextGaussian;
}
// else
// {
double v1, v2, s;
do {
v1 = 2 * nextDouble() - 1; // between -1.0 and 1.0
v2 = 2 * nextDouble() - 1; // between -1.0 and 1.0
s = v1 * v1 + v2 * v2;
} while (s >= 1 || s == 0);
double multiplier = /*Strict*/Math.sqrt(-2 * /*Strict*/Math.log(s) / s);
__nextNextGaussian = v2 * multiplier;
__haveNextNextGaussian = true;
return v1 * multiplier;
}
}

View File

@ -0,0 +1,885 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset.wrappers;
import io.druid.extendedset.AbstractExtendedSet;
import io.druid.extendedset.ExtendedSet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.SortedSet;
/**
* {@link ExtendedSet}-based class internally managed by an instance of any
* class implementing {@link Collection}
*
* @param <T> the type of elements maintained by this set
*
* @author Alessandro Colantonio
* @version $Id$
*/
public class GenericExtendedSet<T extends Comparable<T>> extends AbstractExtendedSet<T>
{
/**
* class implementing {@link Collection} that is used to collect elements
*/
private final Class<? extends Collection> setClass;
/**
* elements of the set
*/
private /*final*/ Collection<T> elements;
/**
* Empty-set constructor
*
* @param setClass {@link Collection}-derived class
*/
@SuppressWarnings("unchecked")
public GenericExtendedSet(Class<? extends Collection> setClass)
{
this.setClass = setClass;
try {
elements = setClass.newInstance();
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* {@inheritDoc}
*/
@Override
public double bitmapCompressionRatio()
{
throw new UnsupportedOperationException();
}
/**
* {@inheritDoc}
*/
@Override
public double collectionCompressionRatio()
{
return isEmpty() ? 0D : 1D;
}
/**
* {@inheritDoc}
*/
@Override
public GenericExtendedSet<T> empty()
{
return new GenericExtendedSet<T>(setClass);
}
/**
* {@inheritDoc}
*/
@Override
public ExtendedIterator<T> iterator()
{
// prepare the sorted set
final Collection<T> sorted;
if (elements instanceof SortedSet<?> || elements instanceof List<?>) {
//NOTE: SortedSet.comparator() is null
sorted = elements;
} else {
sorted = new ArrayList<T>(elements);
Collections.sort((List<T>) sorted);
}
// iterate over the sorted set
return new ExtendedIterator<T>()
{
final Iterator<T> itr = sorted.iterator();
T current;
{
current = itr.hasNext() ? itr.next() : null;
}
@Override
public void skipAllBefore(T element)
{
while (element.compareTo(current) > 0) {
next();
}
}
@Override
public boolean hasNext()
{
return current != null;
}
@Override
public T next()
{
if (!hasNext()) {
throw new NoSuchElementException();
}
T prev = current;
current = itr.hasNext() ? itr.next() : null;
return prev;
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
/**
* {@inheritDoc}
*/
@Override
public ExtendedIterator<T> descendingIterator()
{
// prepare the sorted set
final Collection<T> sorted;
//TODO
// if (elements instanceof SortedSet<?> || elements instanceof List<?>) {
// //NOTE: SortedSet.comparator() is null
// sorted = elements;
// } else {
sorted = new ArrayList<T>(elements);
Collections.sort((List<T>) sorted, Collections.reverseOrder());
// }
// iterate over the sorted set
return new ExtendedIterator<T>()
{
final Iterator<T> itr = sorted.iterator();
T current;
{
current = itr.hasNext() ? itr.next() : null;
}
@Override
public void skipAllBefore(T element)
{
while (element.compareTo(current) > 0) {
next();
}
}
@Override
public boolean hasNext()
{
return current != null;
}
@Override
public T next()
{
if (!hasNext()) {
throw new NoSuchElementException();
}
T prev = current;
current = itr.hasNext() ? itr.next() : null;
return prev;
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public GenericExtendedSet<T> clone()
{
// NOTE: do not use super.clone() since it is 10 times slower!
GenericExtendedSet<T> c = empty();
if (elements instanceof Cloneable) {
try {
c.elements = (Collection<T>) elements.getClass().getMethod("clone").invoke(elements);
}
catch (Exception e) {
throw new RuntimeException(e);
}
} else {
c.elements.addAll(elements);
}
return c;
}
/**
* {@inheritDoc}
*/
@Override
public String debugInfo()
{
return setClass.getSimpleName() + ": " + elements.toString();
}
/*
* Collection methods
*/
/**
* {@inheritDoc}
*/
@Override
public boolean add(T e)
{
if (elements instanceof List<?>) {
final List<T> l = (List<T>) elements;
int pos = Collections.binarySearch(l, e);
if (pos >= 0) {
return false;
}
l.add(-(pos + 1), e);
return true;
}
return elements.add(e);
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public boolean remove(Object o)
{
if (elements instanceof List<?>) {
try {
final List<T> l = (List<T>) elements;
int pos = Collections.binarySearch(l, (T) o);
if (pos < 0) {
return false;
}
l.remove(pos);
return true;
}
catch (ClassCastException e) {
return false;
}
}
return elements.remove(o);
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public boolean contains(Object o)
{
if (elements instanceof List<?>) {
try {
return Collections.binarySearch((List<T>) elements, (T) o) >= 0;
}
catch (ClassCastException e) {
return false;
}
}
return elements.contains(o);
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public boolean containsAll(Collection<?> c)
{
if (isEmpty() || c == null || c.isEmpty()) {
return false;
}
if (this == c) {
return true;
}
if (elements instanceof List<?>
&& c instanceof GenericExtendedSet<?>
&& ((GenericExtendedSet<?>) c).elements instanceof List<?>) {
Iterator<T> thisItr = elements.iterator();
Iterator<T> otherItr = ((GenericExtendedSet<T>) c).elements.iterator();
while (thisItr.hasNext() && otherItr.hasNext()) {
T thisValue = thisItr.next();
T otherValue = otherItr.next();
int r;
while ((r = otherValue.compareTo(thisValue)) > 0) {
if (!thisItr.hasNext()) {
return false;
}
thisValue = thisItr.next();
}
if (r < 0) {
return false;
}
}
return !otherItr.hasNext();
}
return elements.containsAll(c);
}
/**
* {@inheritDoc}
*/
@Override
public boolean addAll(Collection<? extends T> c)
{
if (elements instanceof List<?>) {
//TODO: copiare codice di union
Collection<T> res = union(c).elements;
boolean r = !res.equals(elements);
elements = res;
return r;
}
return elements.addAll(c);
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public boolean retainAll(Collection<?> c)
{
if (elements instanceof List<?>) {
try {
//TODO: copiare codice di intersection
Collection<T> res = intersection((Collection<T>) c).elements;
boolean r = !res.equals(elements);
elements = res;
return r;
}
catch (ClassCastException e) {
return false;
}
}
return elements.retainAll(c);
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public boolean removeAll(Collection<?> c)
{
if (elements instanceof List<?>) {
try {
//TODO: copiare codice di difference
Collection<T> res = difference((Collection<T>) c).elements;
boolean r = !res.equals(elements);
elements = res;
return r;
}
catch (ClassCastException e) {
return false;
}
}
return elements.removeAll(c);
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object o)
{
return o instanceof GenericExtendedSet<?> && ((GenericExtendedSet<?>) o).elements.equals(elements);
}
/**
* {@inheritDoc}
*/
@Override
public int size() {return elements.size();}
/**
* {@inheritDoc}
*/
@Override
public boolean isEmpty() {return elements.isEmpty();}
/**
* {@inheritDoc}
*/
@Override
public void clear() {elements.clear();}
/**
* {@inheritDoc}
*/
@Override
public int hashCode() {return elements.hashCode();}
/*
* SortedSet methods
*/
/**
* {@inheritDoc}
*/
@Override
public Comparator<? super T> comparator()
{
return null;
}
/**
* {@inheritDoc}
*/
@Override
public T first()
{
if (elements instanceof SortedSet<?>) {
return ((SortedSet<T>) elements).first();
}
if (elements instanceof List<?>) {
return ((List<T>) elements).get(0);
}
return super.first();
}
/**
* {@inheritDoc}
*/
@Override
public T last()
{
if (elements instanceof SortedSet<?>) {
return ((SortedSet<T>) elements).last();
}
if (elements instanceof List<?>) {
return ((List<T>) elements).get(elements.size() - 1);
}
return super.last();
}
/**
* {@inheritDoc}
*/
@Override
public ExtendedSet<T> headSet(T toElement)
{
if (elements instanceof SortedSet<?>) {
GenericExtendedSet<T> c = empty();
c.elements = ((SortedSet<T>) elements).headSet(toElement);
return c;
}
return super.headSet(toElement);
}
/**
* {@inheritDoc}
*/
@Override
public ExtendedSet<T> tailSet(T fromElement)
{
if (elements instanceof SortedSet<?>) {
GenericExtendedSet<T> c = empty();
c.elements = ((SortedSet<T>) elements).tailSet(fromElement);
return c;
}
return super.headSet(fromElement);
}
/**
* {@inheritDoc}
*/
@Override
public ExtendedSet<T> subSet(T fromElement, T toElement)
{
if (elements instanceof SortedSet<?>) {
GenericExtendedSet<T> c = empty();
c.elements = ((SortedSet<T>) elements).subSet(fromElement, toElement);
return c;
}
return super.headSet(toElement);
}
/*
* ExtendedSet methods
*/
/**
* {@inheritDoc}
*/
@Override
public int intersectionSize(Collection<? extends T> other)
{
if (isEmpty() || other == null || other.isEmpty()) {
return 0;
}
if (this == other) {
return size();
}
if (elements instanceof List<?>
&& other instanceof GenericExtendedSet<?>
&& ((GenericExtendedSet<?>) other).elements instanceof List<?>) {
int res = 0;
Iterator<T> thisItr = elements.iterator();
@SuppressWarnings("unchecked")
Iterator<T> otherItr = ((GenericExtendedSet<T>) other).elements.iterator();
while (thisItr.hasNext() && otherItr.hasNext()) {
T thisValue = thisItr.next();
T otherValue = otherItr.next();
int r = thisValue.compareTo(otherValue);
while (r != 0) {
while ((r = thisValue.compareTo(otherValue)) > 0) {
if (!otherItr.hasNext()) {
return res;
}
otherValue = otherItr.next();
}
if (r == 0) {
break;
}
while ((r = otherValue.compareTo(thisValue)) > 0) {
if (!thisItr.hasNext()) {
return res;
}
thisValue = thisItr.next();
}
}
res++;
}
return res;
}
return super.intersectionSize(other);
}
/**
* {@inheritDoc}
*/
@Override
public GenericExtendedSet<T> intersection(Collection<? extends T> other)
{
if (isEmpty() || other == null || other.isEmpty()) {
return empty();
}
if (this == other) {
return clone();
}
if (elements instanceof List<?>
&& other instanceof GenericExtendedSet<?>
&& ((GenericExtendedSet<?>) other).elements instanceof List<?>) {
GenericExtendedSet<T> res = empty();
Iterator<T> thisItr = elements.iterator();
@SuppressWarnings("unchecked")
Iterator<T> otherItr = ((GenericExtendedSet<T>) other).elements.iterator();
while (thisItr.hasNext() && otherItr.hasNext()) {
T thisValue = thisItr.next();
T otherValue = otherItr.next();
int r = thisValue.compareTo(otherValue);
while (r != 0) {
while ((r = thisValue.compareTo(otherValue)) > 0) {
if (!otherItr.hasNext()) {
return res;
}
otherValue = otherItr.next();
}
if (r == 0) {
break;
}
while ((r = otherValue.compareTo(thisValue)) > 0) {
if (!thisItr.hasNext()) {
return res;
}
thisValue = thisItr.next();
}
}
res.elements.add(thisValue);
}
return res;
}
GenericExtendedSet<T> clone = clone();
clone.elements.retainAll(other);
return clone;
}
/**
* {@inheritDoc}
*/
@Override
public GenericExtendedSet<T> union(Collection<? extends T> other)
{
if (this == other || other == null || other.isEmpty()) {
return clone();
}
if (isEmpty()) {
GenericExtendedSet<T> res = empty();
res.elements.addAll(other);
return res;
}
if (elements instanceof List<?>
&& other instanceof GenericExtendedSet<?>
&& ((GenericExtendedSet<?>) other).elements instanceof List<?>) {
GenericExtendedSet<T> res = empty();
Iterator<T> thisItr = elements.iterator();
@SuppressWarnings("unchecked")
Iterator<T> otherItr = ((GenericExtendedSet<T>) other).elements.iterator();
mainLoop:
while (thisItr.hasNext() && otherItr.hasNext()) {
T thisValue = thisItr.next();
T otherValue = otherItr.next();
int r = thisValue.compareTo(otherValue);
while (r != 0) {
while ((r = thisValue.compareTo(otherValue)) > 0) {
res.elements.add(otherValue);
if (!otherItr.hasNext()) {
res.elements.add(thisValue);
break mainLoop;
}
otherValue = otherItr.next();
}
if (r == 0) {
break;
}
while ((r = otherValue.compareTo(thisValue)) > 0) {
res.elements.add(thisValue);
if (!thisItr.hasNext()) {
res.elements.add(otherValue);
break mainLoop;
}
thisValue = thisItr.next();
}
}
res.elements.add(thisValue);
}
while (thisItr.hasNext()) {
res.elements.add(thisItr.next());
}
while (otherItr.hasNext()) {
res.elements.add(otherItr.next());
}
return res;
}
GenericExtendedSet<T> clone = clone();
for (T e : other) {
clone.add(e);
}
return clone;
}
/**
* {@inheritDoc}
*/
@Override
public GenericExtendedSet<T> difference(Collection<? extends T> other)
{
if (isEmpty() || this == other) {
return empty();
}
if (other == null || other.isEmpty()) {
return clone();
}
if (elements instanceof List<?>
&& other instanceof GenericExtendedSet<?>
&& ((GenericExtendedSet<?>) other).elements instanceof List<?>) {
GenericExtendedSet<T> res = empty();
Iterator<T> thisItr = elements.iterator();
@SuppressWarnings("unchecked")
Iterator<T> otherItr = ((GenericExtendedSet<T>) other).elements.iterator();
mainLoop:
while (thisItr.hasNext() && otherItr.hasNext()) {
T thisValue = thisItr.next();
T otherValue = otherItr.next();
int r = thisValue.compareTo(otherValue);
while (r != 0) {
while ((r = thisValue.compareTo(otherValue)) > 0) {
if (!otherItr.hasNext()) {
res.elements.add(thisValue);
break mainLoop;
}
otherValue = otherItr.next();
}
if (r == 0) {
break;
}
while ((r = otherValue.compareTo(thisValue)) > 0) {
res.elements.add(thisValue);
if (!thisItr.hasNext()) {
break mainLoop;
}
thisValue = thisItr.next();
}
}
}
while (thisItr.hasNext()) {
res.elements.add(thisItr.next());
}
return res;
}
GenericExtendedSet<T> clone = clone();
clone.elements.removeAll(other);
return clone;
}
/**
* {@inheritDoc}
*/
@Override
public GenericExtendedSet<T> symmetricDifference(Collection<? extends T> other)
{
if (this == other || other == null || other.isEmpty()) {
return clone();
}
if (isEmpty()) {
GenericExtendedSet<T> res = empty();
res.elements.addAll(other);
return res;
}
if (elements instanceof List<?>
&& other instanceof GenericExtendedSet<?>
&& ((GenericExtendedSet<?>) other).elements instanceof List<?>) {
GenericExtendedSet<T> res = empty();
Iterator<T> thisItr = elements.iterator();
@SuppressWarnings("unchecked")
Iterator<T> otherItr = ((GenericExtendedSet<T>) other).elements.iterator();
mainLoop:
while (thisItr.hasNext() && otherItr.hasNext()) {
T thisValue = thisItr.next();
T otherValue = otherItr.next();
int r = thisValue.compareTo(otherValue);
while (r != 0) {
while ((r = thisValue.compareTo(otherValue)) > 0) {
res.elements.add(otherValue);
if (!otherItr.hasNext()) {
res.elements.add(thisValue);
break mainLoop;
}
otherValue = otherItr.next();
}
if (r == 0) {
break;
}
while ((r = otherValue.compareTo(thisValue)) > 0) {
res.elements.add(thisValue);
if (!thisItr.hasNext()) {
res.elements.add(otherValue);
break mainLoop;
}
thisValue = thisItr.next();
}
}
}
while (thisItr.hasNext()) {
res.elements.add(thisItr.next());
}
while (otherItr.hasNext()) {
res.elements.add(otherItr.next());
}
return res;
}
GenericExtendedSet<T> clone = union(other);
clone.removeAll(intersection(other));
return clone;
}
/**
* {@inheritDoc}
*/
@Override
public void complement()
{
throw new UnsupportedOperationException();
}
/**
* {@inheritDoc}
*/
@Override
public ExtendedSet<T> unmodifiable()
{
GenericExtendedSet<T> c = empty();
c.elements = Collections.unmodifiableCollection(elements);
return c;
}
/**
* {@inheritDoc}
*/
@Override
public void fill(T from, T to)
{
throw new UnsupportedOperationException();
}
/**
* {@inheritDoc}
*/
@Override
public GenericExtendedSet<T> convert(Collection<?> c)
{
GenericExtendedSet<T> res = (GenericExtendedSet<T>) super.convert(c);
if (res.elements instanceof List<?>) {
Collections.sort((List<T>) res.elements);
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public GenericExtendedSet<T> convert(Object... e)
{
GenericExtendedSet<T> res = (GenericExtendedSet<T>) super.convert(e);
if (res.elements instanceof List<?>) {
Collections.sort((List<T>) res.elements);
}
return res;
}
}

View File

@ -0,0 +1,741 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset.wrappers;
import io.druid.extendedset.AbstractExtendedSet;
import io.druid.extendedset.ExtendedSet;
import io.druid.extendedset.intset.IntSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* An {@link ExtendedSet} implementation that maps each element of the universe (i.e., the collection of all possible elements) to an integer referred to as its "index".
*
* @param < T > the type of elements maintained by this set
*
* @author Alessandro Colantonio
* @version $Id: IndexedSet.java 154 2011-05-30 22:19:24Z cocciasik $
* @see ExtendedSet
* @see AbstractExtendedSet
*/
public class IndexedSet<T> extends AbstractExtendedSet<T> implements java.io.Serializable
{
/**
* generated serial ID
*/
private static final long serialVersionUID = -2386771695765773453L;
// indices
/**
* @uml.property name="indices"
* @uml.associationEnd
*/
private final IntSet indices;
// mapping to translate items to indices and vice-versa
private final Map<T, Integer> itemToIndex;
private final T[] indexToItem;
/**
* Creates an empty {@link IndexedSet} based on a given collection that
* represents the set of <i>all</i> possible items that can be added to the
* {@link IndexedSet} instance.
* <p>
* <b>VERY IMPORTANT!</b> to correctly work and effectively reduce the
* memory allocation, new instances of {@link IndexedSet} <i>must</i> be
* created through the {@link #clone()} or {@link #empty()} methods and
* <i>not</i> by calling many times this constructor with the same
* collection for <code>universe</code>!
*
* @param indices {@link IntSet} instance used for internal representation
* @param universe collection of <i>all</i> possible items. Order will be
* preserved.
*/
@SuppressWarnings("unchecked")
public IndexedSet(IntSet indices, final Collection<T> universe)
{
// NOTE: this procedure removes duplicates while keeping the order
indexToItem = universe instanceof Set ? (T[]) universe.toArray() : (T[]) (new LinkedHashSet<T>(universe)).toArray();
itemToIndex = new HashMap<T, Integer>(Math.max((int) (indexToItem.length / .75f) + 1, 16));
for (int i = 0; i < indexToItem.length; i++) {
itemToIndex.put(indexToItem[i], Integer.valueOf(i));
}
this.indices = indices;
}
/**
* Creates a {@link IndexedSet} instance from a given universe
* mapping
*
* @param itemToIndex universe item-to-index mapping
* @param indexToItem universe index-to-item mapping
* @param indices initial item set
*/
private IndexedSet(Map<T, Integer> itemToIndex, T[] indexToItem, IntSet indices)
{
this.itemToIndex = itemToIndex;
this.indexToItem = indexToItem;
this.indices = indices;
}
/**
* A shortcut for <code>new IndexedSet&lt;T&gt;(itemToIndex, indexToItem, indices)</code>
*/
private IndexedSet<T> createFromIndices(IntSet indx)
{
return new IndexedSet<T>(itemToIndex, indexToItem, indx);
}
/**
* Checks if the given collection is a instance of {@link IndexedSet} with
* the same index mappings
*
* @param c collection to check
*
* @return <code>true</code> if the given collection is a instance of
* {@link IndexedSet} with the same index mappings
*/
private boolean hasSameIndices(Collection<?> c)
{
// since indices are always re-created through constructor and
// referenced through clone(), it is sufficient to check just only one
// mapping table
return (c instanceof IndexedSet) && (indexToItem == ((IndexedSet) c).indexToItem);
}
/**
* {@inheritDoc}
*/
@Override
public IndexedSet<T> clone()
{
return createFromIndices(indices.clone());
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj)
{
if (this == obj) {
return true;
}
if (obj == null || !(obj instanceof Collection<?>)) {
return false;
}
IndexedSet<?> other = convert((Collection<?>) obj);
return this.indexToItem == other.indexToItem
&& this.itemToIndex == other.itemToIndex
&& this.indices.equals(other.indices);
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode()
{
return indices.hashCode();
}
/**
* {@inheritDoc}
*/
@Override
public int compareTo(ExtendedSet<T> o)
{
return indices.compareTo(convert(o).indices);
}
/**
* {@inheritDoc}
*/
@Override
public Comparator<? super T> comparator()
{
return new Comparator<T>()
{
@Override
public int compare(T o1, T o2)
{
// compare elements according to the universe ordering
return itemToIndex.get(o1).compareTo(itemToIndex.get(o2));
}
};
}
/**
* {@inheritDoc}
*/
@Override
public T first()
{
return indexToItem[indices.first()];
}
/**
* {@inheritDoc}
*/
@Override
public T last()
{
return indexToItem[indices.last()];
}
/**
* {@inheritDoc}
*/
@Override
public boolean add(T e)
{
Integer index = itemToIndex.get(e);
if (index == null) {
throw new IllegalArgumentException("element not in the current universe");
}
return indices.add(index.intValue());
}
/**
* {@inheritDoc}
*/
@Override
public boolean addAll(Collection<? extends T> c)
{
return c != null && !c.isEmpty() && indices.addAll(convert(c).indices);
}
/**
* {@inheritDoc}
*/
@Override
public void clear()
{
indices.clear();
}
/**
* {@inheritDoc}
*/
@Override
public void flip(T e)
{
indices.flip(itemToIndex.get(e).intValue());
}
/**
* {@inheritDoc}
*/
@Override
public boolean contains(Object o)
{
if (o == null) {
return false;
}
Integer index = itemToIndex.get(o);
return index != null && indices.contains(index.intValue());
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAll(Collection<?> c)
{
return c == null || indices.containsAll(convert(c).indices);
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAny(Collection<? extends T> other)
{
return other == null || indices.containsAny(convert(other).indices);
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAtLeast(Collection<? extends T> other, int minElements)
{
return other != null && !other.isEmpty() && indices.containsAtLeast(convert(other).indices, minElements);
}
/**
* {@inheritDoc}
*/
@Override
public boolean isEmpty()
{
return indices.isEmpty();
}
/**
* {@inheritDoc}
*/
@Override
public ExtendedIterator<T> iterator()
{
return new ExtendedIterator<T>()
{
final IntSet.IntIterator itr = indices.iterator();
@Override
public boolean hasNext() {return itr.hasNext();}
@Override
public T next() {return indexToItem[itr.next()];}
@Override
public void skipAllBefore(T element) {itr.skipAllBefore(itemToIndex.get(element).intValue());}
@Override
public void remove() {itr.remove();}
};
}
/**
* {@inheritDoc}
*/
@Override
public ExtendedIterator<T> descendingIterator()
{
return new ExtendedIterator<T>()
{
final IntSet.IntIterator itr = indices.descendingIterator();
@Override
public boolean hasNext() {return itr.hasNext();}
@Override
public T next() {return indexToItem[itr.next()];}
@Override
public void skipAllBefore(T element) {itr.skipAllBefore(itemToIndex.get(element).intValue());}
@Override
public void remove() {itr.remove();}
};
}
/**
* {@inheritDoc}
*/
@Override
public boolean remove(Object o)
{
if (o == null) {
return false;
}
Integer index = itemToIndex.get(o);
return index != null && indices.remove(index.intValue());
}
/**
* {@inheritDoc}
*/
@Override
public boolean removeAll(Collection<?> c)
{
return c != null && !c.isEmpty() && indices.removeAll(convert(c).indices);
}
/**
* {@inheritDoc}
*/
@Override
public boolean retainAll(Collection<?> c)
{
if (isEmpty()) {
return false;
}
if (c == null || c.isEmpty()) {
indices.clear();
return true;
}
return indices.retainAll(convert(c).indices);
}
/**
* {@inheritDoc}
*/
@Override
public int size()
{
return indices.size();
}
/**
* {@inheritDoc}
*/
@Override
public IndexedSet<T> intersection(Collection<? extends T> other)
{
if (other == null) {
return empty();
}
return createFromIndices(indices.intersection(convert(other).indices));
}
/**
* {@inheritDoc}
*/
@Override
public IndexedSet<T> union(Collection<? extends T> other)
{
if (other == null) {
return clone();
}
return createFromIndices(indices.union(convert(other).indices));
}
/**
* {@inheritDoc}
*/
@Override
public IndexedSet<T> difference(Collection<? extends T> other)
{
if (other == null) {
return clone();
}
return createFromIndices(indices.difference(convert(other).indices));
}
/**
* {@inheritDoc}
*/
@Override
public IndexedSet<T> symmetricDifference(Collection<? extends T> other)
{
if (other == null) {
return clone();
}
return createFromIndices(indices.symmetricDifference(convert(other).indices));
}
/**
* {@inheritDoc}
*/
@Override
public IndexedSet<T> complemented()
{
return createFromIndices(indices.complemented());
}
/**
* {@inheritDoc}
*/
@Override
public void complement()
{
indices.complement();
}
/**
* {@inheritDoc}
*/
@Override
public int intersectionSize(Collection<? extends T> other)
{
if (other == null) {
return 0;
}
return indices.intersectionSize(convert(other).indices);
}
/**
* {@inheritDoc}
*/
@Override
public int unionSize(Collection<? extends T> other)
{
if (other == null) {
return size();
}
return indices.unionSize(convert(other).indices);
}
/**
* {@inheritDoc}
*/
@Override
public int symmetricDifferenceSize(Collection<? extends T> other)
{
if (other == null) {
return size();
}
return indices.symmetricDifferenceSize(convert(other).indices);
}
/**
* {@inheritDoc}
*/
@Override
public int differenceSize(Collection<? extends T> other)
{
if (other == null) {
return size();
}
return indices.differenceSize(convert(other).indices);
}
/**
* {@inheritDoc}
*/
@Override
public int complementSize()
{
return indices.complementSize();
}
/**
* Returns the collection of all possible elements
*
* @return the collection of all possible elements
*/
public IndexedSet<T> universe()
{
IntSet allItems = indices.empty();
allItems.fill(0, indexToItem.length - 1);
return createFromIndices(allItems);
}
/**
* Returns the index of the given item
*
* @param item
*
* @return the index of the given item
*/
public Integer absoluteIndexOf(T item)
{
return itemToIndex.get(item);
}
/**
* Returns the item corresponding to the given index
*
* @param i index
*
* @return the item
*/
public T absoluteGet(int i)
{
return indexToItem[i];
}
/**
* Returns the set of indices. Modifications to this set are reflected to
* this {@link IndexedSet} instance. Trying to perform operation on
* out-of-bound indices will throw an {@link IllegalArgumentException}
* exception.
*
* @return the index set
*
* @see #absoluteGet(int)
* @see #absoluteIndexOf(Object)
*/
public IntSet indices()
{
return indices;
}
/**
* {@inheritDoc}
*/
@Override
public IndexedSet<T> empty()
{
return createFromIndices(indices.empty());
}
/**
* {@inheritDoc}
*/
@Override
public double bitmapCompressionRatio()
{
return indices.bitmapCompressionRatio();
}
/**
* {@inheritDoc}
*/
@Override
public double collectionCompressionRatio()
{
return indices.collectionCompressionRatio();
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public IndexedSet<T> convert(Collection<?> c)
{
if (c == null) {
return empty();
}
// useless to convert...
if (hasSameIndices(c)) {
return (IndexedSet<T>) c;
}
// NOTE: cannot call super.convert(c) because of loop
IndexedSet<T> res = empty();
for (T t : (Collection<T>) c) {
res.add(t);
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public IndexedSet<T> convert(Object... e)
{
return (IndexedSet<T>) super.convert(e);
}
/**
* {@inheritDoc}
*/
@Override
public List<? extends IndexedSet<T>> powerSet()
{
return powerSet(1, Integer.MAX_VALUE);
}
/**
* {@inheritDoc}
*/
@Override
public List<? extends IndexedSet<T>> powerSet(int min, int max)
{
List<? extends IntSet> ps = indices.powerSet(min, max);
List<IndexedSet<T>> res = new ArrayList<IndexedSet<T>>(ps.size());
for (IntSet s : ps) {
res.add(createFromIndices(s));
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public String debugInfo()
{
return String.format("items = %s\nitemToIndex = %s\nindexToItem = %s\n",
indices.debugInfo(), itemToIndex.toString(), Arrays.toString(indexToItem)
);
}
/**
* {@inheritDoc}
*/
@Override
public double jaccardSimilarity(ExtendedSet<T> other)
{
return indices.jaccardSimilarity(convert(other).indices);
}
//TODO
// /**
// * {@inheritDoc}
// */
// @Override
// public IndexedSet<T> unmodifiable() {
// return createFromIndices(indices.unmodifiable());
// }
//
// /**
// * {@inheritDoc}
// */
// @Override
// public IndexedSet<T> subSet(T fromElement, T toElement) {
// return createFromIndices(indices.subSet(itemToIndex.get(fromElement), itemToIndex.get(toElement)));
// }
//
// /**
// * {@inheritDoc}
// */
// @Override
// public IndexedSet<T> headSet(T toElement) {
// return createFromIndices(indices.headSet(itemToIndex.get(toElement)));
// }
//
// /**
// * {@inheritDoc}
// */
// @Override
// public IndexedSet<T> tailSet(T fromElement) {
// return createFromIndices(indices.tailSet(itemToIndex.get(fromElement)));
// }
/**
* {@inheritDoc}
*/
@Override
public T get(int i)
{
return indexToItem[indices.get(i)];
}
/**
* {@inheritDoc}
*/
@Override
public int indexOf(T e)
{
return indices.indexOf(itemToIndex.get(e).intValue());
}
/**
* {@inheritDoc}
*/
@Override
public void clear(T from, T to)
{
indices.clear(itemToIndex.get(from).intValue(), itemToIndex.get(to).intValue());
}
/**
* {@inheritDoc}
*/
@Override
public void fill(T from, T to)
{
indices.fill(itemToIndex.get(from).intValue(), itemToIndex.get(to).intValue());
}
}

View File

@ -0,0 +1,580 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset.wrappers;
import io.druid.extendedset.AbstractExtendedSet;
import io.druid.extendedset.ExtendedSet;
import io.druid.extendedset.intset.IntSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
/**
* This class provides a "wrapper" for any {@link IntSet} instance in order to be used as an {@link ExtendedSet} instance.
*
* @author Alessandro Colantonio
* @version $Id: IntegerSet.java 153 2011-05-30 16:39:57Z cocciasik $
*/
public class IntegerSet extends AbstractExtendedSet<Integer>
{
/**
* the collection of <code>int</code> numbers
*
* @uml.property name="items"
* @uml.associationEnd
*/
private final IntSet items;
/**
* Wraps an instance of {@link IntSet}
*
* @param items the {@link IntSet} to wrap
*/
public IntegerSet(IntSet items)
{
this.items = items;
}
/**
* @return the internal integer representation
*/
public IntSet intSet()
{
return items;
}
/**
* Converts a generic collection of {@link Integer} instances to a
* {@link IntSet} instance. If the given collection is an
* {@link IntegerSet} instance, it returns the contained
* {@link #items} object.
*
* @param c the generic collection of {@link Integer} instances
*
* @return the resulting {@link IntSet} instance
*/
private IntSet toIntSet(Collection<?> c)
{
// nothing to convert
if (c == null) {
return null;
}
if (c instanceof IntegerSet) {
return ((IntegerSet) c).items;
}
// extract integers from the given collection
IntSet res = items.empty();
List<Integer> sorted = new ArrayList<Integer>(c.size());
for (Object i : c) {
try {
sorted.add((Integer) i);
}
catch (ClassCastException e) {
// do nothing
}
}
Collections.sort(sorted);
for (Integer i : sorted) {
res.add(i.intValue());
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public boolean addAll(Collection<? extends Integer> c)
{
return items.addAll(toIntSet(c));
}
/**
* {@inheritDoc}
*/
@Override
public double bitmapCompressionRatio()
{
return items.bitmapCompressionRatio();
}
/**
* {@inheritDoc}
*/
@Override
public void clear(Integer from, Integer to)
{
items.clear(from.intValue(), to.intValue());
}
/**
* {@inheritDoc}
*/
@Override
public IntegerSet clone()
{
// NOTE: do not use super.clone() since it is 10 times slower!
return new IntegerSet(items.clone());
}
/**
* {@inheritDoc}
*/
@Override
public double collectionCompressionRatio()
{
return items.collectionCompressionRatio();
}
/**
* {@inheritDoc}
*/
@Override
public int compareTo(ExtendedSet<Integer> o)
{
return items.compareTo(toIntSet(o));
}
/**
* {@inheritDoc}
*/
@Override
public IntegerSet complemented()
{
return new IntegerSet(items.complemented());
}
/**
* {@inheritDoc}
*/
@Override
public int complementSize()
{
return items.complementSize();
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAny(Collection<? extends Integer> other)
{
return items.containsAny(toIntSet(other));
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAtLeast(Collection<? extends Integer> other, int minElements)
{
return items.containsAtLeast(toIntSet(other), minElements);
}
/**
* {@inheritDoc}
*/
@Override
public IntegerSet convert(Collection<?> c)
{
return new IntegerSet(toIntSet(c));
}
/**
* {@inheritDoc}
*/
@Override
public IntegerSet convert(Object... e)
{
return convert(Arrays.asList(e));
}
/**
* {@inheritDoc}
*/
@Override
public String debugInfo()
{
return getClass().getSimpleName() + "\n" + items.debugInfo();
}
/**
* {@inheritDoc}
*/
@Override
public ExtendedIterator<Integer> descendingIterator()
{
return new ExtendedIterator<Integer>()
{
final IntSet.IntIterator itr = items.descendingIterator();
@Override
public void remove() {itr.remove();}
@Override
public Integer next() {return Integer.valueOf(itr.next());}
@Override
public boolean hasNext() {return itr.hasNext();}
@Override
public void skipAllBefore(Integer element) {itr.skipAllBefore(element.intValue());}
};
}
/**
* {@inheritDoc}
*/
@Override
public IntegerSet difference(Collection<? extends Integer> other)
{
return new IntegerSet(items.difference(toIntSet(other)));
}
/**
* {@inheritDoc}
*/
@Override
public int differenceSize(Collection<? extends Integer> other)
{
return items.differenceSize(toIntSet(other));
}
/**
* {@inheritDoc}
*/
@Override
public IntegerSet empty()
{
return new IntegerSet(items.empty());
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (!(o instanceof IntegerSet)) {
return false;
}
return items.equals(((IntegerSet) o).items);
}
/**
* {@inheritDoc}
*/
@Override
public void fill(Integer from, Integer to)
{
items.fill(from.intValue(), to.intValue());
}
/**
* {@inheritDoc}
*/
@Override
public Integer first()
{
return Integer.valueOf(items.first());
}
/**
* {@inheritDoc}
*/
@Override
public void flip(Integer e)
{
items.flip(e.intValue());
}
/**
* {@inheritDoc}
*/
@Override
public Integer get(int i)
{
return Integer.valueOf(items.get(i));
}
/**
* {@inheritDoc}
*/
@Override
public int indexOf(Integer e)
{
return items.indexOf(e.intValue());
}
/**
* {@inheritDoc}
*/
@Override
public IntegerSet intersection(Collection<? extends Integer> other)
{
return new IntegerSet(items.intersection(toIntSet(other)));
}
/**
* {@inheritDoc}
*/
@Override
public int intersectionSize(Collection<? extends Integer> other)
{
return items.intersectionSize(toIntSet(other));
}
/**
* {@inheritDoc}
*/
@Override
public ExtendedIterator<Integer> iterator()
{
return new ExtendedIterator<Integer>()
{
final IntSet.IntIterator itr = items.iterator();
@Override
public void remove() {itr.remove();}
@Override
public Integer next() {return Integer.valueOf(itr.next());}
@Override
public boolean hasNext() {return itr.hasNext();}
@Override
public void skipAllBefore(Integer element) {itr.skipAllBefore(element.intValue());}
};
}
/**
* {@inheritDoc}
*/
@Override
public Integer last()
{
return Integer.valueOf(items.last());
}
/**
* {@inheritDoc}
*/
@Override
public List<? extends IntegerSet> powerSet()
{
return powerSet(1, Integer.MAX_VALUE);
}
/**
* {@inheritDoc}
*/
@Override
public List<? extends IntegerSet> powerSet(int min, int max)
{
List<? extends IntSet> ps = items.powerSet(min, max);
List<IntegerSet> res = new ArrayList<IntegerSet>(ps.size());
for (IntSet s : ps) {
res.add(new IntegerSet(s));
}
return res;
}
/**
* {@inheritDoc}
*/
@Override
public boolean removeAll(Collection<?> c)
{
return items.removeAll(toIntSet(c));
}
/**
* {@inheritDoc}
*/
@Override
public boolean retainAll(Collection<?> c)
{
return items.retainAll(toIntSet(c));
}
/**
* {@inheritDoc}
*/
@Override
public IntegerSet symmetricDifference(Collection<? extends Integer> other)
{
return new IntegerSet(items.symmetricDifference(toIntSet(other)));
}
/**
* {@inheritDoc}
*/
@Override
public int symmetricDifferenceSize(Collection<? extends Integer> other)
{
return items.symmetricDifferenceSize(toIntSet(other));
}
/**
* {@inheritDoc}
*/
@Override
public IntegerSet union(Collection<? extends Integer> other)
{
return new IntegerSet(items.union(toIntSet(other)));
}
/**
* {@inheritDoc}
*/
@Override
public int unionSize(Collection<? extends Integer> other)
{
return items.unionSize(toIntSet(other));
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode()
{
return items.hashCode();
}
/**
* {@inheritDoc}
*/
@Override
public void complement()
{
items.complement();
}
/**
* {@inheritDoc}
*/
@Override
public Comparator<? super Integer> comparator()
{
return null;
}
/**
* {@inheritDoc}
*/
@Override
public boolean add(Integer e)
{
return items.add(e.intValue());
}
/**
* {@inheritDoc}
*/
@Override
public void clear()
{
items.clear();
}
/**
* {@inheritDoc}
*/
@Override
public boolean contains(Object o)
{
return o instanceof Integer && items.contains(((Integer) o).intValue());
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsAll(Collection<?> c)
{
return items.containsAll(toIntSet(c));
}
/**
* {@inheritDoc}
*/
@Override
public boolean isEmpty()
{
return items.isEmpty();
}
/**
* {@inheritDoc}
*/
@Override
public boolean remove(Object o)
{
return o instanceof Integer && items.remove(((Integer) o).intValue());
}
/**
* {@inheritDoc}
*/
@Override
public int size()
{
return items.size();
}
/**
* {@inheritDoc}
*/
@Override
public String toString()
{
// NOTE: by not calling super.toString(), we avoid to iterate over new
// Integer instances, thus avoiding to waste time and memory with garbage
// collection
return items.toString();
}
/**
* {@inheritDoc}
*/
@Override
public double jaccardSimilarity(ExtendedSet<Integer> other)
{
return items.jaccardSimilarity(toIntSet(other));
}
/**
* {@inheritDoc}
*/
@Override
public double weightedJaccardSimilarity(ExtendedSet<Integer> other)
{
return items.weightedJaccardSimilarity(toIntSet(other));
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,106 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset.wrappers.matrix;
/**
* A class for representing a single transaction-item relationship. This class
* is mainly used in {@link PairSet} to iterate over the cells of a
* binary matrix.
*
* @param <T> transaction type
* @param <I> item type
*
* @author Alessandro Colantonio
* @version $Id: Pair.java 140 2011-02-07 21:30:29Z cocciasik $
* @see PairSet
*/
public class Pair<T, I> implements java.io.Serializable
{
/**
* generated ID
*/
private static final long serialVersionUID = 328985131584539749L;
/**
* the transaction
*/
public final T transaction;
/**
* the item
*/
public final I item;
/**
* Creates a new transaction-item pair
*
* @param transaction
* @param item
*/
public Pair(T transaction, I item)
{
this.transaction = transaction;
this.item = item;
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode()
{
// 524287 * i = (i << 19) - i, where 524287 is prime.
// This hash function avoids transactions and items to overlap,
// since "item" can often stay in 32 - 19 = 13 bits. Therefore, it is
// better than multiplying by 31.
final int hi = item.hashCode();
final int ht = transaction.hashCode();
return (hi << 19) - hi + ht;
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj)
{
if (obj == null) {
return false;
}
if (this == obj) {
return true;
}
if (!(obj instanceof Pair<?, ?>)) {
return false;
}
@SuppressWarnings("unchecked")
Pair<T, I> other = (Pair<T, I>) obj;
return transaction.equals(other.transaction) && item.equals(other.item);
}
/**
* {@inheritDoc}
*/
@Override
public String toString()
{
return "(" + transaction + ", " + item + ")";
}
}

View File

@ -0,0 +1,448 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset.wrappers.matrix;
import java.io.Serializable;
import java.util.AbstractCollection;
import java.util.AbstractMap;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
/**
* An class that associates a value to each pair within a {@link PairSet} instance. It is not as fast as {@link HashMap} , but requires much less memory.
*
* @param < T > transaction type
* @param < I > item type
* @param < V > type of the value to associate
*
* @author Alessandro Colantonio
* @version $Id: PairMap.java 153 2011-05-30 16:39:57Z cocciasik $
* @see PairSet
*/
public class PairMap<T, I, V> extends AbstractMap<Pair<T, I>, V> implements Serializable, Cloneable
{
/**
* generated serial ID
*/
private static final long serialVersionUID = 4699094886888004702L;
/**
* all existing keys
*
* @uml.property name="keys"
* @uml.associationEnd
*/
private final PairSet<T, I> keys;
/**
* values related to existing keys, according to the ordering provided by {@link #keys}
*/
private final ArrayList<V> values;
/**
* Creates an empty map
*
* @param keys {@link PairSet} instance internally used to store indices. If
* not empty, {@link #get(Object)} will return <code>null</code>
* for each existing pair if we do not also put a value.
*/
public PairMap(PairSet<T, I> keys)
{
this.keys = keys;
values = new ArrayList<V>(keys.size());
for (int i = 0; i < keys.size(); i++) {
values.add(null);
}
}
/**
* {@inheritDoc}
*/
@Override
public void clear()
{
keys.clear();
values.clear();
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsKey(Object key)
{
return keys.contains(key);
}
/**
* {@inheritDoc}
*/
@Override
public boolean containsValue(Object value)
{
return values.contains(value);
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public V get(Object key)
{
if (key == null || !(key instanceof Pair<?, ?>)) {
return null;
}
int index = keys.indexOf((Pair<T, I>) key);
if (index < 0) {
return null;
}
return values.get(index);
}
/**
* {@inheritDoc}
*/
@Override
public boolean isEmpty()
{
return keys.isEmpty();
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public V put(Pair<T, I> key, V value)
{
boolean isNew = keys.add(key);
int index = keys.indexOf(key);
Object old = null;
if (isNew) {
values.add(index, value);
} else {
old = values.set(index, value);
}
return (V) old;
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("unchecked")
@Override
public V remove(Object key)
{
if (key == null || !(key instanceof Pair<?, ?>)) {
return null;
}
int index = keys.indexOf((Pair<T, I>) key);
if (index < 0) {
return null;
}
keys.remove(key);
return values.remove(index);
}
/**
* {@inheritDoc}
*/
@Override
public int size()
{
return keys.size();
}
/**
* {@inheritDoc}
*/
@Override
public PairMap<T, I, V> clone()
{
// NOTE: do not use super.clone() since it is 10 times slower!
PairMap<T, I, V> cloned = new PairMap<T, I, V>(keys.clone());
cloned.values.clear();
cloned.values.addAll(values);
return cloned;
}
/**
* {@inheritDoc}
*/
@Override
public Set<Pair<T, I>> keySet()
{
return new AbstractSet<Pair<T, I>>()
{
@Override
public boolean add(Pair<T, I> e)
{
throw new UnsupportedOperationException();
}
@Override
public void clear()
{
PairMap.this.clear();
}
@Override
public boolean contains(Object o)
{
return keys.contains(o);
}
@Override
public boolean containsAll(Collection<?> c)
{
return keys.containsAll(c);
}
@Override
public boolean isEmpty()
{
return keys.isEmpty();
}
@Override
public Iterator<Pair<T, I>> iterator()
{
return new Iterator<Pair<T, I>>()
{
Iterator<Pair<T, I>> itr = keys.iterator();
@Override
public boolean hasNext()
{
return itr.hasNext();
}
@Override
public Pair<T, I> next()
{
return itr.next();
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
@Override
public boolean remove(Object o)
{
throw new UnsupportedOperationException();
}
@Override
public int size()
{
return keys.size();
}
};
}
/**
* {@inheritDoc}
*/
@Override
public Collection<V> values()
{
return new AbstractCollection<V>()
{
@Override
public boolean add(V e)
{
throw new UnsupportedOperationException();
}
@Override
public void clear()
{
PairMap.this.clear();
}
@Override
public boolean contains(Object o)
{
return values.contains(o);
}
@Override
public boolean isEmpty()
{
return keys.isEmpty();
}
@Override
public Iterator<V> iterator()
{
return new Iterator<V>()
{
Iterator<V> itr = values.iterator();
@Override
public boolean hasNext()
{
return itr.hasNext();
}
@Override
public V next()
{
return itr.next();
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
@Override
public boolean remove(Object o)
{
throw new UnsupportedOperationException();
}
@Override
public int size()
{
return values.size();
}
};
}
/**
* {@inheritDoc}
*/
@Override
public Set<Entry<Pair<T, I>, V>> entrySet()
{
return new AbstractSet<Entry<Pair<T, I>, V>>()
{
@Override
public boolean add(Entry<Pair<T, I>, V> e)
{
V res = PairMap.this.put(e.getKey(), e.getValue());
return res != e.getValue();
}
@Override
public void clear()
{
PairMap.this.clear();
}
@Override
public boolean contains(Object o)
{
return o != null
&& o instanceof Entry<?, ?>
&& PairMap.this.containsKey(((Entry<?, ?>) o).getKey())
&& PairMap.this.containsValue(((Entry<?, ?>) o).getValue());
}
@Override
public boolean isEmpty()
{
return keys.isEmpty();
}
@Override
public Iterator<Entry<Pair<T, I>, V>> iterator()
{
return new Iterator<Entry<Pair<T, I>, V>>()
{
final Iterator<Pair<T, I>> keyItr = keys.iterator();
int valueIndex = -1;
@Override
public boolean hasNext()
{
return keyItr.hasNext();
}
@Override
public Entry<Pair<T, I>, V> next()
{
final Pair<T, I> key = keyItr.next();
valueIndex++;
return new Entry<Pair<T, I>, V>()
{
@Override
public Pair<T, I> getKey()
{
return key;
}
@Override
public V getValue()
{
return values.get(valueIndex);
}
@Override
public V setValue(V value)
{
return values.set(valueIndex, value);
}
@Override
public String toString()
{
return "{" + getKey() + "=" + getValue() + "}";
}
};
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
};
}
@Override
public boolean remove(Object o)
{
throw new UnsupportedOperationException();
}
@Override
public int size()
{
return keys.size();
}
};
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,496 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset;
import io.druid.extendedset.intset.ArraySet;
import io.druid.extendedset.intset.ConciseSet;
import io.druid.extendedset.intset.FastSet;
import io.druid.extendedset.wrappers.GenericExtendedSet;
import io.druid.extendedset.wrappers.IntegerSet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
//import it.uniroma3.mat.extendedset.intset.Concise2Set;
/**
* Class for performance evaluation.
*
* @author Alessandro Colantonio
* @version $Id: Performance.java 155 2011-05-30 22:27:00Z cocciasik $
*/
public class Performance
{
/**
* number of times to repeat each test
*/
private final static int REPETITIONS = 5;
/**
* minimum element
*/
private final static int SHIFT = 1000;
/**
* test results
*/
private final static Map<String, Map<Class<?>, Double>> TIME_VALUES = new TreeMap<String, Map<Class<?>, Double>>();
/**
* time measurement, in nanoseconds
*/
private static long lastExecTime = -1;
// private static class IntegerConcise2Set extends IntegerSet {IntegerConcise2Set() {super(new Concise2Set());}}
// private static class IntegerWAHSet extends IntegerSet {IntegerWAHSet() {super(new WAHSet());}}
/**
* Start time measurement
*/
private static void startTimer()
{
lastExecTime = System.nanoTime();
}
/**
* Stop time measurement
*
* @param c class being tested
* @param name method name
* @param div division factor (elapsed time and allocated memory will be
* divided by this number)
*/
private static void endTimer(Class<?> c, String name, long div)
{
// final time
double t = ((double) (System.nanoTime() - lastExecTime)) / div;
Map<Class<?>, Double> measure = TIME_VALUES.get(name);
if (measure == null) {
TIME_VALUES.put(name, measure = new HashMap<Class<?>, Double>());
}
Double old = measure.get(c);
if (old == null || old > t) {
measure.put(c, t);
}
}
/**
* Perform the time test
*
* @param classToTest class of the {@link Collection} instance to test
* @param leftOperand collection of integers representing the left operand
* {@link Collection}
* @param rightOperand collection of integers representing the right operand
* {@link Collection}
*/
@SuppressWarnings("unchecked")
private static void testClass(
Class<?> classToTest,
Collection<Integer> leftOperand,
Collection<Integer> rightOperand
)
{
// collections used for the test cases
Collection<Integer>[] cAddAndRemove = new Collection[REPETITIONS];
Collection<Integer>[] cAddAll = new Collection[REPETITIONS];
Collection<Integer>[] cRemoveAll = new Collection[REPETITIONS];
Collection<Integer>[] cRetainAll = new Collection[REPETITIONS];
Collection<Integer>[] cRighOperand = new Collection[REPETITIONS];
IntegerSet[] cLeftOperand = new IntegerSet[REPETITIONS];
IntegerSet[] cUnionResults = new IntegerSet[REPETITIONS];
IntegerSet[] cDifferenceResults = new IntegerSet[REPETITIONS];
IntegerSet[] cIntersectionResults = new IntegerSet[REPETITIONS];
// CREATION
for (int i = 0; i < REPETITIONS; i++) {
try {
cAddAndRemove[i] = (Collection) classToTest.newInstance();
cAddAll[i] = (Collection) classToTest.newInstance();
cRemoveAll[i] = (Collection) classToTest.newInstance();
cRetainAll[i] = (Collection) classToTest.newInstance();
cRighOperand[i] = (Collection) classToTest.newInstance();
cLeftOperand[i] = (IntegerSet) classToTest.newInstance();
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
// APPEND
for (int i = 0; i < REPETITIONS; i++) {
startTimer();
for (Integer x : rightOperand) {
cRighOperand[i].add(x);
}
for (Integer x : leftOperand) {
cAddAndRemove[i].add(x);
cLeftOperand[i].add(x);
cAddAll[i].add(x);
cRetainAll[i].add(x);
cRemoveAll[i].add(x);
}
endTimer(classToTest, "00) append()", (5 * leftOperand.size() + rightOperand.size()));
}
// List<Integer> xxx = new ArrayList<Integer>(rightOperand);
// List<Integer> yyy = new ArrayList<Integer>(leftOperand);
// Collections.shuffle(xxx);
// Collections.shuffle(yyy);
// for (int i = 0; i < REPETITIONS; i++) {
// cRighOperand[i].clear();
// cAddAndRemove[i].clear();
// cLeftOperand[i].clear();
// cAddAll[i].clear();
// cRetainAll[i].clear();
// cRemoveAll[i].clear();
// }
//
// // ADDITION
// for (int i = 0; i < REPETITIONS; i++) {
// startTimer();
// for (Integer x : xxx)
// cRighOperand[i].add(x);
// for (Integer x : yyy) {
// cAddAndRemove[i].add(x);
// cLeftOperand[i].add(x);
// cAddAll[i].add(x);
// cRetainAll[i].add(x);
// cRemoveAll[i].add(x);
// }
// endTimer(classToTest, "01) add()", (5 * leftOperand.size() + rightOperand.size()));
// }
// REMOVAL
for (int i = 0; i < REPETITIONS; i++) {
startTimer();
for (Integer x : rightOperand) {
cAddAndRemove[i].remove(x);
}
endTimer(classToTest, "02) remove()", rightOperand.size());
}
// CONTAINS
for (int i = 0; i < REPETITIONS; i++) {
startTimer();
for (Integer x : rightOperand) {
cAddAll[i].contains(x);
}
endTimer(classToTest, "03) contains()", rightOperand.size());
}
// CONTAINS ALL
for (int i = 0; i < REPETITIONS; i++) {
startTimer();
cAddAll[i].containsAll(cRighOperand[i]);
endTimer(classToTest, "04) containsAll()", 1);
}
// UNION
for (int i = 0; i < REPETITIONS; i++) {
startTimer();
cAddAll[i].addAll(cRighOperand[i]);
endTimer(classToTest, "05) addAll()", 1);
}
// DIFFERENCE
for (int i = 0; i < REPETITIONS; i++) {
startTimer();
cRemoveAll[i].removeAll(cRighOperand[i]);
endTimer(classToTest, "06) removeAll()", 1);
}
// INTERSECTION
for (int i = 0; i < REPETITIONS; i++) {
startTimer();
cRetainAll[i].retainAll(cRighOperand[i]);
endTimer(classToTest, "07) retainAll()", 1);
}
// UNION
for (int i = 0; i < REPETITIONS; i++) {
startTimer();
cUnionResults[i] = cLeftOperand[i].union(cRighOperand[i]);
endTimer(classToTest, "08) union()", 1);
}
// DIFFERENCE
for (int i = 0; i < REPETITIONS; i++) {
startTimer();
cDifferenceResults[i] = cLeftOperand[i].difference(cRighOperand[i]);
endTimer(classToTest, "09) difference()", 1);
}
// INTERSECTION
for (int i = 0; i < REPETITIONS; i++) {
startTimer();
cIntersectionResults[i] = cLeftOperand[i].intersection(cRighOperand[i]);
endTimer(classToTest, "10) intersection()", 1);
}
}
/**
* Summary information
*/
private static void printSummary(int cardinality, double density, Class<?>[] classes)
{
for (Entry<String, Map<Class<?>, Double>> e : TIME_VALUES.entrySet()) {
// method name
System.out.format(Locale.ENGLISH, "%7d\t%.4f\t", cardinality, density);
System.out.print(e.getKey());
for (Class<?> c : classes) {
Double op = e.getValue().get(c);
System.out.format("\t%12d", (op == null ? 0 : op.intValue()));
}
System.out.println();
}
}
/**
* TEST
*
* @param args
*/
public static void main(String[] args)
{
boolean calcMemory = false;
boolean calcTime = true;
boolean calcUniform = true;
boolean calcMarkovian = false;
boolean calcZipfian = false;
int minCardinality = 10000;
int maxCardinality = 10000;
/*
* MEMORY
*/
for (int i = 0; calcMemory && i < 3; i++) {
System.out.println();
switch (i) {
case 0:
if (!calcUniform) {
continue;
}
System.out.println("#MEMORY UNIFORM");
break;
case 1:
if (!calcMarkovian) {
continue;
}
System.out.println("#MEMORY MARKOVIAN");
break;
case 2:
if (!calcZipfian) {
continue;
}
System.out.println("#MEMORY ZIPFIAN");
break;
default:
throw new RuntimeException("unexpected");
}
System.out.println("#cardinality\tdensity\tFastSet\tConciseSet\tWAHSet\tConcise2Set");
for (int cardinality = minCardinality; cardinality <= maxCardinality; cardinality *= 10) {
for (double density = .0001; density < 1D; density *= 1.7) {
System.out.format(Locale.ENGLISH, "%7d\t%.4f\t", cardinality, density);
Collection<Integer> integers;
switch (i) {
case 0:
integers = new RandomNumbers.Uniform(cardinality, density, SHIFT).generate();
break;
case 1:
integers = new RandomNumbers.Markovian(cardinality, density, SHIFT).generate();
break;
case 2:
integers = new RandomNumbers.Zipfian(cardinality, density, SHIFT, 2).generate();
break;
default:
throw new RuntimeException("unexpected");
}
IntegerSet s0 = new IntegerSet(new FastSet());
s0.addAll(integers);
System.out.format("%7d\t", (int) (s0.collectionCompressionRatio() * cardinality));
IntegerSet s1 = new IntegerSet(new ConciseSet());
s1.addAll(integers);
System.out.format("%7d\t", (int) (s1.collectionCompressionRatio() * cardinality));
IntegerSet s2 = new IntegerSet(new WAHSet());
s2.addAll(integers);
System.out.format("%7d\t", (int) (s2.collectionCompressionRatio() * cardinality));
// IntegerSet s3 = new IntegerSet(new Concise2Set());
// s3.addAll(integers);
// System.out.format("%7d\n", (int) (s3.collectionCompressionRatio() * cardinality));
}
}
}
Class<?>[] classes = new Class[]{
// ArrayList.class,
// LinkedList.class,
// ArrayListSet.class,
// LinkedListSet.class,
// HashSet.class,
// TreeSet.class,
IntegerArraySet.class,
IntegerFastSet.class,
// IntegerHashSet.class,
// IntegerWAHSet.class,
IntegerConciseSet.class,
// IntegerConcise2Set.class,
};
/*
* TIME
*/
for (int i = 0; calcTime && i < 3; i++) {
System.out.println();
switch (i) {
case 0:
if (!calcUniform) {
continue;
}
System.out.println("#TIME UNIFORM");
break;
case 1:
if (!calcMarkovian) {
continue;
}
System.out.println("#TIME MARKOVIAN");
break;
case 2:
if (!calcZipfian) {
continue;
}
System.out.println("#TIME ZIPFIAN");
break;
default:
throw new RuntimeException("unexpected");
}
System.out.print("#cardinality\tdensity\toperation");
for (Class<?> c : classes) {
System.out.print("\t" + c.getSimpleName());
}
System.out.println();
for (int cardinality = minCardinality; cardinality <= maxCardinality; cardinality *= 10) {
RandomNumbers r;
switch (i) {
case 0:
r = new RandomNumbers.Uniform(cardinality, 0.5, SHIFT);
break;
case 1:
r = new RandomNumbers.Markovian(cardinality, 0.5, SHIFT);
break;
case 2:
r = new RandomNumbers.Zipfian(cardinality, 0.5, SHIFT, 2);
break;
default:
throw new RuntimeException("unexpected");
}
Collection<Integer> x = r.generate(), y = r.generate();
for (Class<?> c : classes) {
testClass(c, x, y);
testClass(c, x, y);
}
for (double density = .0001; density < 1D; density *= 1.2) {
// for (double density = .0001; density < 1D; density *= 1.7) {
// for (double density = .0041; density < 1D; density *= 1.7) {
// for (double density = 0.8272; density > 0.00005; density /= 1.7) {
switch (i) {
case 0:
r = new RandomNumbers.Uniform(cardinality, density, SHIFT);
break;
case 1:
r = new RandomNumbers.Markovian(cardinality, density, SHIFT);
break;
case 2:
r = new RandomNumbers.Zipfian(cardinality, density, SHIFT, 2);
break;
default:
throw new RuntimeException("unexpected");
}
x = r.generate();
y = r.generate();
for (Class<?> c : classes) {
testClass(c, x, y);
}
printSummary(cardinality, density, classes);
TIME_VALUES.clear();
}
}
}
System.out.println("\nDone!");
}
/* test classes */
private static class WAHSet extends ConciseSet
{
private static final long serialVersionUID = -5048707825606872979L;
WAHSet() {super(true);}
}
private static class IntegerArraySet extends IntegerSet
{
IntegerArraySet() {super(new ArraySet());}
}
// private static class IntegerHashSet extends IntegerSet {IntegerHashSet() {super(new HashIntSet());}}
private static class IntegerFastSet extends IntegerSet
{
IntegerFastSet() {super(new FastSet());}
}
private static class IntegerConciseSet extends IntegerSet
{
IntegerConciseSet() {super(new ConciseSet());}
}
/**
* Class to test the sorted array
*/
@SuppressWarnings("unused")
private static class ArrayListSet extends GenericExtendedSet<Integer>
{
ArrayListSet()
{
super(ArrayList.class);
}
}
/**
* Class to test the sorted linked lists
*/
@SuppressWarnings("unused")
private static class LinkedListSet extends GenericExtendedSet<Integer>
{
LinkedListSet()
{
super(LinkedList.class);
}
}
}

View File

@ -0,0 +1,242 @@
/*
* (c) 2010 Alessandro Colantonio
* <mailto:colanton@mat.uniroma3.it>
* <http://ricerca.mat.uniroma3.it/users/colanton>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.extendedset;
import io.druid.extendedset.utilities.random.MersenneTwister;
import java.util.Collection;
import java.util.Random;
import java.util.SortedSet;
import java.util.TreeSet;
/**
* Generation of random integer sets
*
* @author Alessandro Colantonio
* @version $Id: RandomNumbers.java 142 2011-02-15 23:12:28Z cocciasik $
*/
public abstract class RandomNumbers
{
/**
* pseudo-random number generator
*/
final private static Random RND = new MersenneTwister();
/**
* the smallest integer
*/
protected final int min;
/**
* number of elements within the set
*/
protected final int cardinality;
/**
* cardinality to range (i.e., <code>{@link #max} - {@link #min} + 1</code>) ratio
*/
protected final double density;
/**
* Initializes internal data
*
* @param cardinality number of elements of the set (i.e., result of
* {@link Collection#size()} )
* @param density cardinality to range ratio
* @param min the smallest integer
*/
private RandomNumbers(int cardinality, double density, int min)
{
// parameter check
if (cardinality < 0) {
throw new IllegalArgumentException("cardinality < 0: " + cardinality);
}
if (density < 0D) {
throw new IllegalArgumentException("density < 0: " + density);
}
if (density > 1D) {
throw new IllegalArgumentException("density > 1: " + density);
}
this.cardinality = cardinality;
this.density = density;
this.min = min;
}
/**
* Test
*
* @param args
*/
public static void main(String[] args)
{
int size = 100;
System.out.println(new Uniform(size, 0.1, 0).generate());
System.out.println(new Uniform(size, 0.9, 0).generate());
System.out.println(new Zipfian(size, 0.1, 0, 2).generate());
System.out.println(new Zipfian(size, 0.9, 0, 2).generate());
System.out.println(new Markovian(size, 0.1, 0).generate());
System.out.println(new Markovian(size, 0.9, 0).generate());
}
/**
* Next integer, according to the given probability distribution
*
* @return next pseudo-random integer
*/
protected abstract int next();
/**
* Generates the integer set of pseudo-random numbers
*
* @return the integer set
*/
public SortedSet<Integer> generate()
{
SortedSet<Integer> res = new TreeSet<Integer>();
while (res.size() < cardinality) {
res.add(next());
}
return res;
}
/**
* Integral numbers with uniform distribution.
* <p>
* The maximum number will be <code>(cardinality / density) - 1</code>,
* while the average gap between two consecutive numbers will be
* <code>density * cardinality</code>.
*/
public static class Uniform extends RandomNumbers
{
/**
* the greatest integer
*/
private final int max;
/**
* Initializes internal data
*
* @param cardinality number of elements of the set (i.e., result of
* {@link Collection#size()} )
* @param density cardinality to range ratio
* @param min the smallest integer
*/
public Uniform(int cardinality, double density, int min)
{
super(cardinality, density, min);
max = min + (int) (Math.round(cardinality / density)) - 1;
}
/**
* {@inheritDoc}
*/
@Override
public int next()
{
return min + RND.nextInt(max - min + 1);
}
}
/**
* Integral numbers with Zipfian (power-law) distribution.
* <p>
* The maximum number will be <code>(cardinality / density) - 1</code>,
* while the average gap between two consecutive numbers will be
* <code>density * cardinality</code>. However, integers will be
* concentrated around the minimum value.
*/
public static class Zipfian extends RandomNumbers
{
/**
* the greatest integer
*/
private final int max;
/**
* power-law exponent
*/
private final int k;
/**
* Initializes internal data
*
* @param cardinality number of elements of the set (i.e., result of
* {@link Collection#size()} )
* @param density cardinality to range ratio
* @param min the smallest integer
* @param k power-law exponent
*/
public Zipfian(int cardinality, double density, int min, int k)
{
super(cardinality, density, min);
this.k = k;
max = min + (int) (Math.round(cardinality / density)) - 1;
}
/**
* {@inheritDoc}
*/
@Override
public int next()
{
return min + (int) ((max - min + 1) * Math.pow(RND.nextDouble(), k));
}
}
/**
* Integral numbers with Markovian distribution. The data will present
* sequences of subsequent integers followed by "gaps". In this case,
* <code>cardinality</code> indicates the probability of switching from a
* sequence to a gap, and vice-versa. For example, <code>density = 0</code>
* means a set made up of one long sequence of numbers, while
* <code>density = 1</code> means a set made up of all odd (or even)
* integers.
*/
public static class Markovian extends RandomNumbers
{
private boolean skip = false;
private int next = min;
/**
* @param cardinality number of elements of the set (i.e., result of
* {@link Collection#size()} )
* @param density cardinality to range ratio
* @param min the smallest integer
*/
public Markovian(int cardinality, double density, int min)
{
super(cardinality, density, min);
}
/**
* {@inheritDoc}
*/
@Override
public int next()
{
while (skip ^= RND.nextDouble() < density) {
next++;
}
return min + next++;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -91,6 +91,7 @@
<module>aws-common</module> <module>aws-common</module>
<module>java-util</module> <module>java-util</module>
<module>bytebuffer-collections</module> <module>bytebuffer-collections</module>
<module>extendedset</module>
<!-- Core extensions --> <!-- Core extensions -->
<module>extensions-core/avro-extensions</module> <module>extensions-core/avro-extensions</module>
<module>extensions-core/datasketches</module> <module>extensions-core/datasketches</module>

View File

@ -27,7 +27,7 @@ import io.druid.collections.bitmap.BitmapFactory;
import io.druid.collections.bitmap.ConciseBitmapFactory; import io.druid.collections.bitmap.ConciseBitmapFactory;
import io.druid.collections.bitmap.ImmutableBitmap; import io.druid.collections.bitmap.ImmutableBitmap;
import io.druid.collections.bitmap.WrappedImmutableConciseBitmap; import io.druid.collections.bitmap.WrappedImmutableConciseBitmap;
import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; import io.druid.extendedset.intset.ImmutableConciseSet;
/** /**
*/ */