diff --git a/src/changes/changes.xml b/src/changes/changes.xml index e6b0b5bda..d9b34adce 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -75,6 +75,9 @@ Added "ListUtils#select" and "ListUtils#selectRejected" methods. + + Fixed performance issue in "SetUniqueList#retainAll" method for large collections. + diff --git a/src/main/java/org/apache/commons/collections/list/SetUniqueList.java b/src/main/java/org/apache/commons/collections/list/SetUniqueList.java index 9e0d5d27f..537c96a43 100644 --- a/src/main/java/org/apache/commons/collections/list/SetUniqueList.java +++ b/src/main/java/org/apache/commons/collections/list/SetUniqueList.java @@ -29,372 +29,409 @@ import org.apache.commons.collections.iterators.AbstractListIteratorDecorator; import org.apache.commons.collections.set.UnmodifiableSet; /** - * Decorates a List to ensure that no duplicates are present - * much like a Set. + * Decorates a List to ensure that no duplicates are present much + * like a Set. *

- * The List interface makes certain assumptions/requirements. - * This implementation breaks these in certain ways, but this is merely the - * result of rejecting duplicates. - * Each violation is explained in the method, but it should not affect you. - * Bear in mind that Sets require immutable objects to function correctly. + * The List interface makes certain assumptions/requirements. This + * implementation breaks these in certain ways, but this is merely the result of + * rejecting duplicates. Each violation is explained in the method, but it + * should not affect you. Bear in mind that Sets require immutable objects to + * function correctly. *

* The {@link org.apache.commons.collections.set.ListOrderedSet ListOrderedSet} * class provides an alternative approach, by wrapping an existing Set and * retaining insertion order in the iterator. *

* This class is Serializable from Commons Collections 3.1. - * + * * @since 3.0 * @version $Id$ */ public class SetUniqueList extends AbstractSerializableListDecorator { - /** Serialization version */ - private static final long serialVersionUID = 7196982186153478694L; + /** Serialization version */ + private static final long serialVersionUID = 7196982186153478694L; - /** - * Internal Set to maintain uniqueness. - */ - protected final Set set; + /** + * Internal Set to maintain uniqueness. + */ + protected final Set set; - /** - * Factory method to create a SetList using the supplied list to retain order. - *

- * If the list contains duplicates, these are removed (first indexed one kept). - * A HashSet is used for the set behaviour. - * - * @param the element type - * @param list the list to decorate, must not be null - * @return a new {@link SetUniqueList} - * @throws IllegalArgumentException if list is null - */ - public static SetUniqueList setUniqueList(List list) { - if (list == null) { - throw new IllegalArgumentException("List must not be null"); - } - if (list.isEmpty()) { - return new SetUniqueList(list, new HashSet()); - } - List temp = new ArrayList(list); - list.clear(); - SetUniqueList sl = new SetUniqueList(list, new HashSet()); - sl.addAll(temp); - return sl; - } + /** + * Factory method to create a SetList using the supplied list to retain + * order. + *

+ * If the list contains duplicates, these are removed (first indexed one + * kept). A HashSet is used for the set behaviour. + * + * @param + * the element type + * @param list + * the list to decorate, must not be null + * @return a new {@link SetUniqueList} + * @throws IllegalArgumentException + * if list is null + */ + public static SetUniqueList setUniqueList(List list) { + if (list == null) { + throw new IllegalArgumentException("List must not be null"); + } + if (list.isEmpty()) { + return new SetUniqueList(list, new HashSet()); + } + List temp = new ArrayList(list); + list.clear(); + SetUniqueList sl = new SetUniqueList(list, new HashSet()); + sl.addAll(temp); + return sl; + } - //----------------------------------------------------------------------- - /** - * Constructor that wraps (not copies) the List and specifies the set to use. - *

- * The set and list must both be correctly initialised to the same elements. - * - * @param set the set to decorate, must not be null - * @param list the list to decorate, must not be null - * @throws IllegalArgumentException if set or list is null - */ - protected SetUniqueList(List list, Set set) { - super(list); - if (set == null) { - throw new IllegalArgumentException("Set must not be null"); - } - this.set = set; - } + // ----------------------------------------------------------------------- + /** + * Constructor that wraps (not copies) the List and specifies the set to + * use. + *

+ * The set and list must both be correctly initialised to the same elements. + * + * @param set + * the set to decorate, must not be null + * @param list + * the list to decorate, must not be null + * @throws IllegalArgumentException + * if set or list is null + */ + protected SetUniqueList(List list, Set set) { + super(list); + if (set == null) { + throw new IllegalArgumentException("Set must not be null"); + } + this.set = set; + } - //----------------------------------------------------------------------- - /** - * Gets an unmodifiable view as a Set. - * - * @return an unmodifiable set view - */ - public Set asSet() { - return UnmodifiableSet.unmodifiableSet(set); - } + // ----------------------------------------------------------------------- + /** + * Gets an unmodifiable view as a Set. + * + * @return an unmodifiable set view + */ + public Set asSet() { + return UnmodifiableSet.unmodifiableSet(set); + } - //----------------------------------------------------------------------- - /** - * Adds an element to the list if it is not already present. - *

- * (Violation) - * The List interface requires that this method returns - * true always. However this class may return false - * because of the Set behaviour. - * - * @param object the object to add - * @return true if object was added - */ - @Override - public boolean add(E object) { - // gets initial size - final int sizeBefore = size(); + // ----------------------------------------------------------------------- + /** + * Adds an element to the list if it is not already present. + *

+ * (Violation) The List interface requires that this + * method returns true always. However this class may return + * false because of the Set behaviour. + * + * @param object + * the object to add + * @return true if object was added + */ + @Override + public boolean add(E object) { + // gets initial size + final int sizeBefore = size(); - // adds element if unique - add(size(), object); + // adds element if unique + add(size(), object); - // compares sizes to detect if collection changed - return (sizeBefore != size()); - } + // compares sizes to detect if collection changed + return (sizeBefore != size()); + } - /** - * Adds an element to a specific index in the list if it is not already present. - *

- * (Violation) - * The List interface makes the assumption that the element is - * always inserted. This may not happen with this implementation. - * - * @param index the index to insert at - * @param object the object to add - */ - @Override - public void add(int index, E object) { - // adds element if it is not contained already - if (set.contains(object) == false) { - super.add(index, object); - set.add(object); - } - } + /** + * Adds an element to a specific index in the list if it is not already + * present. + *

+ * (Violation) The List interface makes the assumption + * that the element is always inserted. This may not happen with this + * implementation. + * + * @param index + * the index to insert at + * @param object + * the object to add + */ + @Override + public void add(int index, E object) { + // adds element if it is not contained already + if (set.contains(object) == false) { + super.add(index, object); + set.add(object); + } + } - /** - * Adds a collection of objects to the end of the list avoiding duplicates. - *

- * Only elements that are not already in this list will be added, and - * duplicates from the specified collection will be ignored. - *

- * (Violation) - * The List interface makes the assumption that the elements - * are always inserted. This may not happen with this implementation. - * - * @param coll the collection to add in iterator order - * @return true if this collection changed - */ - @Override - public boolean addAll(Collection coll) { - return addAll(size(), coll); - } + /** + * Adds a collection of objects to the end of the list avoiding duplicates. + *

+ * Only elements that are not already in this list will be added, and + * duplicates from the specified collection will be ignored. + *

+ * (Violation) The List interface makes the assumption + * that the elements are always inserted. This may not happen with this + * implementation. + * + * @param coll + * the collection to add in iterator order + * @return true if this collection changed + */ + @Override + public boolean addAll(Collection coll) { + return addAll(size(), coll); + } - /** - * Adds a collection of objects a specific index in the list avoiding - * duplicates. - *

- * Only elements that are not already in this list will be added, and - * duplicates from the specified collection will be ignored. - *

- * (Violation) - * The List interface makes the assumption that the elements - * are always inserted. This may not happen with this implementation. - * - * @param index the index to insert at - * @param coll the collection to add in iterator order - * @return true if this collection changed - */ - @Override - public boolean addAll(int index, Collection coll) { - final List temp = new ArrayList(); - for (E e : coll) { - if (set.add(e)) { - temp.add(e); - } - } - return super.addAll(index, temp); - } + /** + * Adds a collection of objects a specific index in the list avoiding + * duplicates. + *

+ * Only elements that are not already in this list will be added, and + * duplicates from the specified collection will be ignored. + *

+ * (Violation) The List interface makes the assumption + * that the elements are always inserted. This may not happen with this + * implementation. + * + * @param index + * the index to insert at + * @param coll + * the collection to add in iterator order + * @return true if this collection changed + */ + @Override + public boolean addAll(int index, Collection coll) { + final List temp = new ArrayList(); + for (E e : coll) { + if (set.add(e)) { + temp.add(e); + } + } + return super.addAll(index, temp); + } - //----------------------------------------------------------------------- - /** - * Sets the value at the specified index avoiding duplicates. - *

- * The object is set into the specified index. - * Afterwards, any previous duplicate is removed - * If the object is not already in the list then a normal set occurs. - * If it is present, then the old version is removed. - * - * @param index the index to insert at - * @param object the object to set - * @return the previous object - */ - @Override - public E set(int index, E object) { - int pos = indexOf(object); - E removed = super.set(index, object); + // ----------------------------------------------------------------------- + /** + * Sets the value at the specified index avoiding duplicates. + *

+ * The object is set into the specified index. Afterwards, any previous + * duplicate is removed If the object is not already in the list then a + * normal set occurs. If it is present, then the old version is removed. + * + * @param index + * the index to insert at + * @param object + * the object to set + * @return the previous object + */ + @Override + public E set(int index, E object) { + int pos = indexOf(object); + E removed = super.set(index, object); - if (pos != -1 && pos != index) { - // the object is already in the uniq list - // (and it hasn't been swapped with itself) - super.remove(pos); // remove the duplicate by index - } + if (pos != -1 && pos != index) { + // the object is already in the uniq list + // (and it hasn't been swapped with itself) + super.remove(pos); // remove the duplicate by index + } - set.add(object); // add the new item to the unique set - set.remove(removed); // remove the item deleted by the set + set.add(object); // add the new item to the unique set + set.remove(removed); // remove the item deleted by the set - return removed; // return the item deleted by the set - } + return removed; // return the item deleted by the set + } - @Override - public boolean remove(Object object) { - boolean result = set.remove(object); - if (result) { - super.remove(object); - } - return result; - } + @Override + public boolean remove(Object object) { + boolean result = set.remove(object); + if (result) { + super.remove(object); + } + return result; + } - @Override - public E remove(int index) { - E result = super.remove(index); - set.remove(result); - return result; - } + @Override + public E remove(int index) { + E result = super.remove(index); + set.remove(result); + return result; + } - @Override - public boolean removeAll(Collection coll) { - boolean result = false; - for (Iterator it = coll.iterator(); it.hasNext();) { - result |= remove(it.next()); - } - return result; - } + @Override + public boolean removeAll(Collection coll) { + boolean result = false; + for (Iterator it = coll.iterator(); it.hasNext();) { + result |= remove(it.next()); + } + return result; + } - @Override - public boolean retainAll(Collection coll) { - boolean result = super.retainAll(coll); - set.retainAll(coll); - return result; - } + @Override + public boolean retainAll(Collection coll) { + Set setRetainAll = new HashSet(); + for (Iterator it = coll.iterator(); it.hasNext();) { + Object next = it.next(); + if (set.contains(next)) { + setRetainAll.add(next); + } + } + if (setRetainAll.size() == set.size()) { + return false; + } + if (setRetainAll.size() == 0) { + clear(); + } else { + for (Iterator it = iterator(); it.hasNext();) { + if (!setRetainAll.contains(it.next())) { + it.remove(); + } + } + } + return true; + } - @Override - public void clear() { - super.clear(); - set.clear(); - } + @Override + public void clear() { + super.clear(); + set.clear(); + } - @Override - public boolean contains(Object object) { - return set.contains(object); - } + @Override + public boolean contains(Object object) { + return set.contains(object); + } - @Override - public boolean containsAll(Collection coll) { - return set.containsAll(coll); - } + @Override + public boolean containsAll(Collection coll) { + return set.containsAll(coll); + } - @Override - public Iterator iterator() { - return new SetListIterator(super.iterator(), set); - } + @Override + public Iterator iterator() { + return new SetListIterator(super.iterator(), set); + } - @Override - public ListIterator listIterator() { - return new SetListListIterator(super.listIterator(), set); - } + @Override + public ListIterator listIterator() { + return new SetListListIterator(super.listIterator(), set); + } - @Override - public ListIterator listIterator(int index) { - return new SetListListIterator(super.listIterator(index), set); - } + @Override + public ListIterator listIterator(int index) { + return new SetListListIterator(super.listIterator(index), set); + } - @Override - public List subList(int fromIndex, int toIndex) { - List superSubList = super.subList(fromIndex, toIndex); - Set subSet = createSetBasedOnList(set, superSubList); - return new SetUniqueList(superSubList, subSet); - } + @Override + public List subList(int fromIndex, int toIndex) { + List superSubList = super.subList(fromIndex, toIndex); + Set subSet = createSetBasedOnList(set, superSubList); + return new SetUniqueList(superSubList, subSet); + } - /** - * Create a new {@link Set} with the same type as the provided {@code set} - * and populate it with all elements of {@code list}. - * - * @param set the {@link Set} to be used as return type, must not be null - * @param list the {@link List} to populate the {@link Set} - * @return a new {@link Set} populated with all elements of the provided {@link List} - */ - @SuppressWarnings("unchecked") - protected Set createSetBasedOnList(Set set, List list) { - Set subSet; - if (set.getClass().equals(HashSet.class)) { - subSet = new HashSet(list.size()); - } else { - try { - subSet = set.getClass().newInstance(); - } catch (InstantiationException ie) { - subSet = new HashSet(); - } catch (IllegalAccessException iae) { - subSet = new HashSet(); - } - } - subSet.addAll(list); - return subSet; - } + /** + * Create a new {@link Set} with the same type as the provided {@code set} + * and populate it with all elements of {@code list}. + * + * @param set + * the {@link Set} to be used as return type, must not be null + * @param list + * the {@link List} to populate the {@link Set} + * @return a new {@link Set} populated with all elements of the provided + * {@link List} + */ + @SuppressWarnings("unchecked") + protected Set createSetBasedOnList(Set set, List list) { + Set subSet; + if (set.getClass().equals(HashSet.class)) { + subSet = new HashSet(list.size()); + } else { + try { + subSet = set.getClass().newInstance(); + } catch (InstantiationException ie) { + subSet = new HashSet(); + } catch (IllegalAccessException iae) { + subSet = new HashSet(); + } + } + subSet.addAll(list); + return subSet; + } - //----------------------------------------------------------------------- - /** - * Inner class iterator. - */ - static class SetListIterator extends AbstractIteratorDecorator { + // ----------------------------------------------------------------------- + /** + * Inner class iterator. + */ + static class SetListIterator extends AbstractIteratorDecorator { - protected final Set set; - protected E last = null; + protected final Set set; + protected E last = null; - protected SetListIterator(Iterator it, Set set) { - super(it); - this.set = set; - } + protected SetListIterator(Iterator it, Set set) { + super(it); + this.set = set; + } - @Override - public E next() { - last = super.next(); - return last; - } + @Override + public E next() { + last = super.next(); + return last; + } - @Override - public void remove() { - super.remove(); - set.remove(last); - last = null; - } - } + @Override + public void remove() { + super.remove(); + set.remove(last); + last = null; + } + } - /** - * Inner class iterator. - */ - static class SetListListIterator extends AbstractListIteratorDecorator { + /** + * Inner class iterator. + */ + static class SetListListIterator extends + AbstractListIteratorDecorator { - protected final Set set; - protected E last = null; + protected final Set set; + protected E last = null; - protected SetListListIterator(ListIterator it, Set set) { - super(it); - this.set = set; - } + protected SetListListIterator(ListIterator it, Set set) { + super(it); + this.set = set; + } - @Override - public E next() { - last = super.next(); - return last; - } + @Override + public E next() { + last = super.next(); + return last; + } - @Override - public E previous() { - last = super.previous(); - return last; - } + @Override + public E previous() { + last = super.previous(); + return last; + } - @Override - public void remove() { - super.remove(); - set.remove(last); - last = null; - } + @Override + public void remove() { + super.remove(); + set.remove(last); + last = null; + } - @Override - public void add(E object) { - if (set.contains(object) == false) { - super.add(object); - set.add(object); - } - } + @Override + public void add(E object) { + if (set.contains(object) == false) { + super.add(object); + set.add(object); + } + } - @Override - public void set(E object) { - throw new UnsupportedOperationException("ListIterator does not support set"); - } - } + @Override + public void set(E object) { + throw new UnsupportedOperationException( + "ListIterator does not support set"); + } + } } diff --git a/src/test/java/org/apache/commons/collections/list/SetUniqueListTest.java b/src/test/java/org/apache/commons/collections/list/SetUniqueListTest.java index 38f5d4dc6..f0a51fa78 100644 --- a/src/test/java/org/apache/commons/collections/list/SetUniqueListTest.java +++ b/src/test/java/org/apache/commons/collections/list/SetUniqueListTest.java @@ -523,6 +523,79 @@ public class SetUniqueListTest extends AbstractListTest { assertFalse(subUniqueList.contains("World")); // fails } + @SuppressWarnings("unchecked") + public void testRetainAll() { + List list = new ArrayList(10); + SetUniqueList uniqueList = SetUniqueList.setUniqueList(list); + for (int i = 0; i < 10; ++i) { + uniqueList.add((E)Integer.valueOf(i)); + } + + Collection retained = new ArrayList(5); + for (int i = 0; i < 5; ++i) { + retained.add((E)Integer.valueOf(i * 2)); + } + + assertTrue(uniqueList.retainAll(retained)); + assertEquals(5, uniqueList.size()); + assertTrue(uniqueList.contains(Integer.valueOf(0))); + assertTrue(uniqueList.contains(Integer.valueOf(2))); + assertTrue(uniqueList.contains(Integer.valueOf(4))); + assertTrue(uniqueList.contains(Integer.valueOf(6))); + assertTrue(uniqueList.contains(Integer.valueOf(8))); + } + + @SuppressWarnings("unchecked") + public void testRetainAllWithInitialList() { + // initialized with empty list + List list = new ArrayList(10); + for (int i = 0; i < 5; ++i) { + list.add((E)Integer.valueOf(i)); + } + SetUniqueList uniqueList = SetUniqueList.setUniqueList(list); + for (int i = 5; i < 10; ++i) { + uniqueList.add((E)Integer.valueOf(i)); + } + + Collection retained = new ArrayList(5); + for (int i = 0; i < 5; ++i) { + retained.add((E)Integer.valueOf(i * 2)); + } + + assertTrue(uniqueList.retainAll(retained)); + assertEquals(5, uniqueList.size()); + assertTrue(uniqueList.contains(Integer.valueOf(0))); + assertTrue(uniqueList.contains(Integer.valueOf(2))); + assertTrue(uniqueList.contains(Integer.valueOf(4))); + assertTrue(uniqueList.contains(Integer.valueOf(6))); + assertTrue(uniqueList.contains(Integer.valueOf(8))); + } + + /* + * test case for https://issues.apache.org/jira/browse/COLLECTIONS-427 + */ + public void testRetainAllCollections427() { + int size = 50000; + ArrayList list = new ArrayList(); + for (int i = 0; i < size; i++) { + list.add(i); + } + SetUniqueList uniqueList = SetUniqueList.setUniqueList(list); + ArrayList toRetain = new ArrayList(); + for (int i = size; i < 2*size; i++) { + toRetain.add(i); + } + + long start = System.currentTimeMillis(); + uniqueList.retainAll(toRetain); + long stop = System.currentTimeMillis(); + + // make sure retainAll completes under 5 seconds + // TODO if test is migrated to JUnit 4, add a Timeout rule. + // http://kentbeck.github.com/junit/javadoc/latest/org/junit/rules/Timeout.html + assertTrue((stop - start) < 5000); + } + @SuppressWarnings("serial") class SetUniqueList307 extends SetUniqueList { public SetUniqueList307(List list, Set set) {