[COLLECTIONS-404] moved to comparators.sequence package, cleanup.

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/collections/trunk@1361677 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Thomas Neidhart 2012-07-15 10:12:33 +00:00
parent 203a81890d
commit bb8bb80328
16 changed files with 496 additions and 491 deletions

View File

@ -72,6 +72,10 @@
<name>Geir Magnusson</name>
<id>geirm</id>
</developer>
<developer>
<name>Luc Maisonobe</name>
<id>luc</id>
</developer>
<developer>
<name>Craig McClanahan</name>
<id>craigmcc</id>
@ -299,6 +303,9 @@
<contributor>
<name>Joe Raysa</name>
</contributor>
<contributor>
<name>Jordane Sarda</name>
</contributor>
<contributor>
<name>Thomas Schapitz</name>
</contributor>

View File

@ -14,25 +14,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.list.difference;
package org.apache.commons.collections.comparators.sequence;
/** This interface should be implemented by user object to walk
/**
* This interface should be implemented by user object to walk
* through {@link EditScript EditScript} objects.
* <p>Users should implement this interface in order to walk through
* <p>
* Users should implement this interface in order to walk through
* the {@link EditScript EditScript} object created by the comparison
* of two sequences. This is a direct application of the visitor
* design pattern. The {@link EditScript#visit EditScript.visit}
* method takes an object implementing this interface as an argument,
* it will perform the loop over all commands in the script and the
* proper methods of the user class will be called as the commands are
* encountered.</p>
* <p>The implementation of the user visitor class will depend on the
* encountered.
* <p>
* The implementation of the user visitor class will depend on the
* need. Here are two examples.
* </p>
*
* <p>
* The first example is a visitor that build the longest common
* subsequence:
@ -65,8 +63,6 @@ package org.apache.commons.collections.list.difference;
*
* }
* </pre>
* </p>
*
* <p>
* The second example is a visitor that shows the commands and the way
* they transform the first sequence into the second one:
@ -117,31 +113,30 @@ package org.apache.commons.collections.list.difference;
*
* }
* </pre>
* </p>
*
* @since 4.0
* @author Jordane Sarda
* @author Luc Maisonobe
* @version $Id$
*/
public interface CommandVisitor<T> {
/** Method called when an insert command is encountered.
* @param object object to insert (this object comes from the
* second sequence)
/**
* Method called when an insert command is encountered.
*
* @param object object to insert (this object comes from the second sequence)
*/
void visitInsertCommand(T object);
/** Method called when a keep command is encountered.
* @param object object to keep (this object comes from the
* first sequence)
/**
* Method called when a keep command is encountered.
*
* @param object object to keep (this object comes from the first sequence)
*/
void visitKeepCommand(T object);
/** Method called when a delete command is encountered.
* @param object object to delete (this object comes from the
* first sequence)
/**
* Method called when a delete command is encountered.
*
* @param object object to delete (this object comes from the first sequence)
*/
void visitDeleteCommand(T object);

View File

@ -14,41 +14,39 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.list.difference;
/** Command representing the deletion of one object of the first sequence.
* When one object of the first sequence has no corresponding object
* in the second sequence at the right place, the {@link EditScript
* edit script} transforming the first sequence into the second
* sequence uses an instance of this class to represent the deletion
* of this object. The objects embedded in these type of commands
* always come from the first sequence.
package org.apache.commons.collections.comparators.sequence;
/**
* Command representing the deletion of one object of the first sequence.
* <p>
* When one object of the first sequence has no corresponding object in the
* second sequence at the right place, the {@link EditScript edit script}
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the deletion of this object. The objects embedded in
* these type of commands always come from the first sequence.
*
* @see SequencesComparator
* @see EditScript
*
* @since 4.0
* @author Jordane Sarda
* @author Luc Maisonobe
* @version $Id$
*/
public class DeleteCommand<T> extends EditCommand<T> {
/** Simple constructor.
* Creates a new instance of DeleteCommand
* @param object the object of the first sequence that should be deleted
/**
* Simple constructor. Creates a new instance of {@link DeleteCommand}.
*
* @param object the object of the first sequence that should be deleted
*/
public DeleteCommand(T object) {
super(object);
}
/** Accept a visitor.
* When a <code>DeleteCommand</code> accepts a visitor, it calls
* its {@link CommandVisitor#visitDeleteCommand
* visitDeleteCommand} method.
* @param visitor the visitor to be accepted
/**
* Accept a visitor. When a <code>DeleteCommand</code> accepts a visitor, it calls
* its {@link CommandVisitor#visitDeleteCommand visitDeleteCommand} method.
*
* @param visitor the visitor to be accepted
*/
@Override
public void accept(CommandVisitor<T> visitor) {

View File

@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.comparators.sequence;
/**
* Abstract base class for all commands used to transform an objects sequence
* into another one.
* <p>
* When two objects sequences are compared through the
* {@link SequencesComparator#getScript SequencesComparator.getScript} method,
* the result is provided has a {@link EditScript script} containing the commands
* that progressively transform the first sequence into the second one.
* <p>
* There are only three types of commands, all of which are subclasses of this
* abstract class. Each command is associated with one object belonging to at
* least one of the sequences. These commands are {@link InsertCommand
* InsertCommand} which correspond to an object of the second sequence being
* inserted into the first sequence, {@link DeleteCommand DeleteCommand} which
* correspond to an object of the first sequence being removed and
* {@link KeepCommand KeepCommand} which correspond to an object of the first
* sequence which <code>equals</code> an object in the second sequence. It is
* guaranteed that comparison is always performed this way (i.e. the
* <code>equals</code> method of the object from the first sequence is used and
* the object passed as an argument comes from the second sequence) ; this can
* be important if subclassing is used for some elements in the first sequence
* and the <code>equals</code> method is specialized.
*
* @see SequencesComparator
* @see EditScript
*
* @since 4.0
* @version $Id$
*/
public abstract class EditCommand<T> {
/**
* Simple constructor. Creates a new instance of EditCommand
*
* @param object reference to the object associated with this command, this
* refers to an element of one of the sequences being compared
*/
protected EditCommand(T object) {
this.object = object;
}
/**
* Accept a visitor.
* <p>
* This method is invoked for each commands belonging to
* an {@link EditScript EditScript}, in order to implement the visitor design pattern
*
* @param visitor the visitor to be accepted
*/
public abstract void accept(CommandVisitor<T> visitor);
/** Object on which the command should be applied. */
protected T object;
}

View File

@ -14,36 +14,33 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.list.difference;
package org.apache.commons.collections.comparators.sequence;
import java.util.ArrayList;
import java.util.List;
/**
* This class gathers all the {@link EditCommand commands} needed to
* transform one objects sequence into another objects sequence.
* <p>An edit script is the most general view of the differences
* between two sequences. It is built as the result of the comparison
* between two sequences by the {@link SequencesComparator
* SequencesComparator} class. The user can walk through it using
* the <em>visitor</em> design pattern.</p>
* <p>It is guaranteed that the objects embedded in the {@link
* InsertCommand insert commands} come from the second sequence and
* that the objects embedded in either the {@link DeleteCommand delete
* commands} or {@link KeepCommand keep commands} come from the first
* sequence. This can be important if subclassing is used for some
* elements in the first sequence and the <code>equals</code> method
* is specialized.</p>
* This class gathers all the {@link EditCommand commands} needed to transform
* one objects sequence into another objects sequence.
* <p>
* An edit script is the most general view of the differences between two
* sequences. It is built as the result of the comparison between two sequences
* by the {@link SequencesComparator SequencesComparator} class. The user can
* walk through it using the <em>visitor</em> design pattern.
* <p>
* It is guaranteed that the objects embedded in the {@link InsertCommand insert
* commands} come from the second sequence and that the objects embedded in
* either the {@link DeleteCommand delete commands} or {@link KeepCommand keep
* commands} come from the first sequence. This can be important if subclassing
* is used for some elements in the first sequence and the <code>equals</code>
* method is specialized.
*
* @see SequencesComparator
* @see EditCommand
* @see CommandVisitor
* @see ReplacementsHandler
*
*
* @since 4.0
* @author Jordane Sarda
* @author Luc Maisonobe
* @version $Id$
*/
public class EditScript<T> {
@ -57,46 +54,53 @@ public class EditScript<T> {
/** Number of modifications. */
private int modifications;
/** Simple constructor.
* Creates a new empty script.
/**
* Simple constructor. Creates a new empty script.
*/
public EditScript() {
commands = new ArrayList<EditCommand<T>>();
lcsLength = 0;
commands = new ArrayList<EditCommand<T>>();
lcsLength = 0;
modifications = 0;
}
/** Add a keep command to the script.
* @param command command to add
/**
* Add a keep command to the script.
*
* @param command command to add
*/
public void append(KeepCommand<T> command) {
commands.add(command);
++lcsLength;
}
/** Add an insert command to the script.
* @param command command to add
/**
* Add an insert command to the script.
*
* @param command command to add
*/
public void append(InsertCommand<T> command) {
commands.add(command);
++modifications;
}
/** Add a delete command to the script.
* @param command command to add
/**
* Add a delete command to the script.
*
* @param command command to add
*/
public void append(DeleteCommand<T> command) {
commands.add(command);
++modifications;
}
/** Visit the script.
* The script implements the <em>visitor</em> design pattern, this
* method is the entry point to which the user supplies its own
* visitor, the script will be responsible to drive it through the
* commands in order and call the appropriate method as each
* command is encountered.
* @param visitor the visitor that will visit all commands in turn
/**
* Visit the script. The script implements the <em>visitor</em> design
* pattern, this method is the entry point to which the user supplies its
* own visitor, the script will be responsible to drive it through the
* commands in order and call the appropriate method as each command is
* encountered.
*
* @param visitor the visitor that will visit all commands in turn
*/
public void visit(CommandVisitor<T> visitor) {
for (EditCommand<T> command : commands) {
@ -104,19 +108,22 @@ public class EditScript<T> {
}
}
/** Get the length of the Longest Common Subsequence (LCS).
* The length of the longest common subsequence is the number of
* {@link KeepCommand keep commands} in the script.
/**
* Get the length of the Longest Common Subsequence (LCS). The length of the
* longest common subsequence is the number of {@link KeepCommand keep
* commands} in the script.
*
* @return length of the Longest Common Subsequence
*/
public int getLCSLength() {
return lcsLength;
}
/** Get the number of effective modifications.
* The number of effective modification is the number of {@link
* DeleteCommand delete} and {@link InsertCommand insert} commands
* in the script.
/**
* Get the number of effective modifications. The number of effective
* modification is the number of {@link DeleteCommand delete} and
* {@link InsertCommand insert} commands in the script.
*
* @return number of effective modifications
*/
public int getModifications() {

View File

@ -14,42 +14,41 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.list.difference;
/** Command representing the insertion of one object of the second sequence.
* When one object of the second sequence has no corresponding object
* in the first sequence at the right place, the {@link EditScript
* edit script} transforming the first sequence into the second
* sequence uses an instance of this class to represent the insertion
* of this object. The objects embedded in these type of commands
* always come from the second sequence.
package org.apache.commons.collections.comparators.sequence;
/**
* Command representing the insertion of one object of the second sequence.
* <p>
* When one object of the second sequence has no corresponding object in the
* first sequence at the right place, the {@link EditScript edit script}
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the insertion of this object. The objects embedded in
* these type of commands always come from the second sequence.
*
* @see SequencesComparator
* @see EditScript
*
* @since 4.0
* @author Jordane Sarda
* @author Luc Maisonobe
* @version $Id$
*/
public class InsertCommand<T> extends EditCommand<T> {
/** Simple constructor.
* Creates a new instance of InsertCommand
* @param object the object of the second sequence that should be inserted
/**
* Simple constructor. Creates a new instance of InsertCommand
*
* @param object the object of the second sequence that should be inserted
*/
public InsertCommand(T object) {
super(object);
}
/** Accept a visitor.
* When an <code>InsertCommand</code> accepts a visitor, it calls
* its {@link CommandVisitor#visitInsertCommand
* visitInsertCommand} method.
* @param visitor the visitor to be accepted
*/
/**
* Accept a visitor. When an <code>InsertCommand</code> accepts a visitor,
* it calls its {@link CommandVisitor#visitInsertCommand visitInsertCommand}
* method.
*
* @param visitor the visitor to be accepted
*/
@Override
public void accept(CommandVisitor<T> visitor) {
visitor.visitInsertCommand(object);

View File

@ -14,44 +14,42 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.list.difference;
/** Command representing the keeping of one object present in both sequences.
* When one object of the first sequence <code>equals</code> another
* objects in the second sequence at the right place, the {@link
* EditScript edit script} transforming the first sequence into the
* second sequence uses an instance of this class to represent the
* keeping of this object. The objects embedded in these type of
* commands always come from the first sequence.
package org.apache.commons.collections.comparators.sequence;
/**
* Command representing the keeping of one object present in both sequences.
* <p>
* When one object of the first sequence <code>equals</code> another objects in
* the second sequence at the right place, the {@link EditScript edit script}
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the keeping of this object. The objects embedded in
* these type of commands always come from the first sequence.
*
* @see SequencesComparator
* @see EditScript
*
* @since 4.0
* @author Jordane Sarda
* @author Luc Maisonobe
* @version $Id$
*/
public class KeepCommand<T> extends EditCommand<T> {
/** Simple constructor.
* Creates a new instance of KeepCommand
* @param object the object belonging to both sequences (the
* object is a reference to the instance in the first sequence
* which is known to be equal to an instance in the second
* sequence)
/**
* Simple constructor. Creates a new instance of KeepCommand
*
* @param object the object belonging to both sequences (the object is a
* reference to the instance in the first sequence which is known
* to be equal to an instance in the second sequence)
*/
public KeepCommand(T object) {
super(object);
}
/** Accept a visitor.
* When a <code>KeepCommand</code> accepts a visitor, it calls
* its {@link CommandVisitor#visitKeepCommand visitKeepCommand} method.
* @param visitor the visitor to be accepted
*/
/**
* Accept a visitor. When a <code>KeepCommand</code> accepts a visitor, it
* calls its {@link CommandVisitor#visitKeepCommand visitKeepCommand} method.
*
* @param visitor the visitor to be accepted
*/
@Override
public void accept(CommandVisitor<T> visitor) {
visitor.visitKeepCommand(object);

View File

@ -14,42 +14,38 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.list.difference;
package org.apache.commons.collections.comparators.sequence;
import java.util.ArrayList;
import java.util.List;
/**
* This class handles sequences of replacements resulting from a
* comparison.
* <p>The comparison of two objects sequences leads to the
* identification of common parts and parts which only belong to the
* first or to the second sequence. The common parts appear in the
* edit script in the form of <em>keep</em> commands, they can be considered
* as synchronization objects between the two sequences. These
* synchronization objects split the two sequences in synchronized
* sub-sequences. The first sequence can be transformed into the second
* one by replacing each synchronized sub-sequence of the first
* sequence by the corresponding sub-sequence of the second
* sequence. This is a synthetic way to see an {@link EditScript edit
* script}, replacing individual {@link DeleteCommand delete}, {@link
* KeepCommand keep} and {@link InsertCommand insert} commands by
* fewer replacements acting on complete sub-sequences.</p>
* <p>This class is devoted to perform this interpretation. It visits
* an {@link EditScript edit script} (because it implements the {@link
* CommandVisitor CommandVisitor} interface) and calls a user-supplied
* handler implementing the {@link ReplacementsHandler
* ReplacementsHandler} interface to process the sub-sequences.</p>
* This class handles sequences of replacements resulting from a comparison.
* <p>
* The comparison of two objects sequences leads to the identification of common
* parts and parts which only belong to the first or to the second sequence. The
* common parts appear in the edit script in the form of <em>keep</em> commands,
* they can be considered as synchronization objects between the two sequences.
* These synchronization objects split the two sequences in synchronized
* sub-sequences. The first sequence can be transformed into the second one by
* replacing each synchronized sub-sequence of the first sequence by the
* corresponding sub-sequence of the second sequence. This is a synthetic way to
* see an {@link EditScript edit script}, replacing individual
* {@link DeleteCommand delete}, {@link KeepCommand keep} and
* {@link InsertCommand insert} commands by fewer replacements acting on
* complete sub-sequences.
* <p>
* This class is devoted to perform this interpretation. It visits an
* {@link EditScript edit script} (because it implements the
* {@link CommandVisitor CommandVisitor} interface) and calls a user-supplied
* handler implementing the {@link ReplacementsHandler ReplacementsHandler}
* interface to process the sub-sequences.
*
* @see ReplacementsHandler
* @see EditScript
* @see SequencesComparator
*
* @since 4.0
* @author Luc Maisonobe
* @author Jordane Sarda
* @version $Id$
*/
public class ReplacementsFinder<T> implements CommandVisitor<T> {
@ -61,10 +57,10 @@ public class ReplacementsFinder<T> implements CommandVisitor<T> {
/** Handler to call when synchronized sequences are found. */
private ReplacementsHandler<T> handler;
/** Simple constructor.
* Creates a new instance of ReplacementsFinder
* @param handler handler to call when synchronized sequences are
* found
/**
* Simple constructor. Creates a new instance of {@link ReplacementsFinder}.
*
* @param handler handler to call when synchronized sequences are found
*/
public ReplacementsFinder(ReplacementsHandler<T> handler) {
pendingInsertions = new ArrayList<T>();
@ -73,18 +69,22 @@ public class ReplacementsFinder<T> implements CommandVisitor<T> {
this.handler = handler;
}
/** Add an object to the pending insertions set.
* @param object object to insert
/**
* Add an object to the pending insertions set.
*
* @param object object to insert
*/
public void visitInsertCommand(T object) {
pendingInsertions.add(object);
}
/** Handle a synchronization object.
* <p>When a synchronization object is identified, the pending
* insertions and pending deletions sets are provided to the user
* handler as subsequences.</p>
* @param object synchronization object detected
/**
* Handle a synchronization object.
* <p>
* When a synchronization object is identified, the pending insertions and
* pending deletions sets are provided to the user handler as subsequences.
*
* @param object synchronization object detected
*/
public void visitKeepCommand(T object) {
if (pendingDeletions.isEmpty() && pendingInsertions.isEmpty()) {
@ -97,8 +97,10 @@ public class ReplacementsFinder<T> implements CommandVisitor<T> {
}
}
/** Add an object to the pending deletions set.
* @param object object to delete
/**
* Add an object to the pending deletions set.
*
* @param object object to delete
*/
public void visitDeleteCommand(T object) {
pendingDeletions.add(object);

View File

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.comparators.sequence;
import java.util.List;
/**
* This interface is devoted to handle synchronized replacement sequences.
*
* @see ReplacementsFinder
* @since 4.0
* @version $Id$
*/
public interface ReplacementsHandler<T> {
/**
* Handle two synchronized sequences.
* <p>
* This method is called by a {@link ReplacementsFinder ReplacementsFinder}
* instance when it has synchronized two sub-sequences of object arrays
* being compared, and at least one of the sequences is non-empty. Since the
* sequences are synchronized, the objects before the two sub-sequences are
* equals (if they exist). This property also holds for the objects after
* the two sub-sequences.
* <p>
* The replacement is defined as replacing the <code>from</code>
* sub-sequence into the <code>to</code> sub-sequence.
*
* @param skipped number of tokens skipped since the last call (i.e. number of
* tokens that were in both sequences), this number should be strictly positive
* except on the very first call where it can be zero (if the first object of
* the two sequences are different)
* @param from sub-sequence of objects coming from the first sequence
* @param to sub-sequence of objects coming from the second sequence
*/
public void handleReplacement(int skipped, List<T> from, List<T> to);
}

View File

@ -14,49 +14,43 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.list.difference;
package org.apache.commons.collections.comparators.sequence;
import java.util.List;
/**
* This class allows to compare two objects sequences.
* <p>The two sequences can hold any object type, as only the
* <code>equals</code> method is used to compare the elements of the
* sequences. It is guaranteed that the comparisons will always be
* done as <code>o1.equals(o2)</code> where <code>o1</code> belongs to
* the first sequence and <code>o2</code> belongs to the second
* sequence. This can be important if subclassing is used for some
* elements in the first sequence and the <code>equals</code> method
* is specialized.</p>
* <p>Comparison can be seen from two points of view: either as
* giving the smallest modification allowing to transform the first
* sequence into the second one, or as giving the longest sequence
* which is a subsequence of both initial sequences. The
* <code>equals</code> method is used to compare objects, so any
* object can be put into sequences. Modifications include deleting,
* inserting or keeping one object, starting from the beginning of the
* first sequence.</p>
* <p>This class implements the comparison algorithm, which is the
* very efficient algorithm from Eugene W. Myers <a
* href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">An
* O(ND) Difference Algorithm and Its Variations</a>. This algorithm
* produces the shortest possible {@link
* org.apache.commons.collections.list.difference.EditScript edit script}
* containing all the {@link
* org.apache.commons.collections.list.difference.EditCommand commands} needed to
* transform the first sequence into the second one.</p>
* <p>
* The two sequences can hold any object type, as only the <code>equals</code>
* method is used to compare the elements of the sequences. It is guaranteed
* that the comparisons will always be done as <code>o1.equals(o2)</code> where
* <code>o1</code> belongs to the first sequence and <code>o2</code> belongs to
* the second sequence. This can be important if subclassing is used for some
* elements in the first sequence and the <code>equals</code> method is
* specialized.
* <p>
* Comparison can be seen from two points of view: either as giving the smallest
* modification allowing to transform the first sequence into the second one, or
* as giving the longest sequence which is a subsequence of both initial
* sequences. The <code>equals</code> method is used to compare objects, so any
* object can be put into sequences. Modifications include deleting, inserting
* or keeping one object, starting from the beginning of the first sequence.
* <p>
* This class implements the comparison algorithm, which is the very efficient
* algorithm from Eugene W. Myers
* <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">
* An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces
* the shortest possible
* {@link org.apache.commons.collections.list.difference.EditScript edit script}
* containing all the
* {@link org.apache.commons.collections.list.difference.EditCommand commands}
* needed to transform the first sequence into the second one.
*
* @see EditScript
* @see EditCommand
* @see CommandVisitor
*
* @since 4.0
* @author Jordane Sarda
* @version $Id$
*/
public class SequencesComparator<T> {
@ -71,16 +65,21 @@ public class SequencesComparator<T> {
private int[] vDown;
private int[] vUp;
/** Simple constructor.
* <p>Creates a new instance of SequencesComparator</p>
* <p>It is <em>guaranteed</em> that the comparisons will always be
* done as <code>o1.equals(o2)</code> where <code>o1</code> belongs
* to the first sequence and <code>o2</code> belongs to the second
* sequence. This can be important if subclassing is used for some
* elements in the first sequence and the <code>equals</code> method
* is specialized.</p>
* @param sequence1 first sequence to be compared
* @param sequence2 second sequence to be compared
/**
* Simple constructor.
* <p>
* Creates a new instance of SequencesComparator.
* <p>
* It is <em>guaranteed</em> that the comparisons will always be done as
* <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
* sequence and <code>o2</code> belongs to the second sequence. This can be
* important if subclassing is used for some elements in the first sequence
* and the <code>equals</code> method is specialized.
*
* @param sequence1
* first sequence to be compared
* @param sequence2
* second sequence to be compared
*/
public SequencesComparator(List<T> sequence1, List<T> sequence2) {
this.sequence1 = sequence1;
@ -89,14 +88,15 @@ public class SequencesComparator<T> {
int size = sequence1.size() + sequence2.size() + 2;
vDown = new int[size];
vUp = new int[size];
}
/** Build a snake.
* @param start the value of the start of the snake
* @param diag the value of the diagonal of the snake
* @param end1 the value of the end of the first sequence to be compared
* @param end2 the value of the end of the second sequence to be compared
/**
* Build a snake.
*
* @param start the value of the start of the snake
* @param diag the value of the diagonal of the snake
* @param end1 the value of the end of the first sequence to be compared
* @param end2 the value of the end of the second sequence to be compared
* @return the snake built
*/
private Snake buildSnake(int start, int diag, int end1, int end2) {
@ -109,22 +109,25 @@ public class SequencesComparator<T> {
return new Snake(start, end, diag);
}
/** Get the middle snake corresponding to two subsequences of the
/**
* Get the middle snake corresponding to two subsequences of the
* main sequences.
* <p>
* The snake is found using the MYERS Algorithm (this algorithms has
* also been implemented in the GNU diff program). This algorithm is
* explained in Eugene Myers article: <a
* href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">An
* O(ND) Difference Algorithm and Its Variations</a>.
* @param start1 the begin of the first sequence to be compared
* @param end1 the end of the first sequence to be compared
* @param start2 the begin of the second sequence to be compared
* explained in Eugene Myers article:
* <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
* An O(ND) Difference Algorithm and Its Variations</a>.
*
* @param start1 the begin of the first sequence to be compared
* @param end1 the end of the first sequence to be compared
* @param start2 the begin of the second sequence to be compared
* @param end2 the end of the second sequence to be compared
* @return the middle snake
*/
private Snake getMiddleSnake(int start1, int end1, int start2, int end2) {
// Myers Algorithm
//Initialisations
// Initialisations
int m = end1 - start1;
int n = end2 - start2;
if ((m == 0) || (n == 0)) {
@ -193,14 +196,15 @@ public class SequencesComparator<T> {
// this should not happen
throw new RuntimeException("Internal Error");
}
/** Build an edit script.
* @param start1 the begin of the first sequence to be compared
* @param end1 the end of the first sequence to be compared
* @param start2 the begin of the second sequence to be compared
/**
* Build an edit script.
*
* @param start1 the begin of the first sequence to be compared
* @param end1 the end of the first sequence to be compared
* @param start2 the begin of the second sequence to be compared
* @param end2 the end of the second sequence to be compared
* @param script the edited script
*/
@ -245,16 +249,18 @@ public class SequencesComparator<T> {
}
}
/** Get the edit script script.
* <p>It is guaranteed that the objects embedded in the {@link
* InsertCommand insert commands} come from the second sequence and
* that the objects embedded in either the {@link DeleteCommand
* delete commands} or {@link KeepCommand keep commands} come from
* the first sequence. This can be important if subclassing is used
* for some elements in the first sequence and the
* <code>equals</code> method is specialized.</p>
/**
* Get the edit script script.
* <p>
* It is guaranteed that the objects embedded in the {@link InsertCommand
* insert commands} come from the second sequence and that the objects
* embedded in either the {@link DeleteCommand delete commands} or
* {@link KeepCommand keep commands} come from the first sequence. This can
* be important if subclassing is used for some elements in the first
* sequence and the <code>equals</code> method is specialized.
*
* @return the edit script resulting from the comparison of the two
* sequences
* sequences
*/
public EditScript<T> getScript() {
EditScript<T> script = new EditScript<T>();

View File

@ -14,20 +14,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.list.difference;
package org.apache.commons.collections.comparators.sequence;
/**
* This class is a simple placeholder to hold the end part of a path
* under construction in a {@link SequencesComparator
* SequencesComparator}.
* <p>A snake is an internal structure used in Eugene W. Myers
* algorithm (<a
* href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">An
* O(ND) Difference Algorithm and Its Variations</a>).</p>
* under construction in a {@link SequencesComparator SequencesComparator}.
* <p>
* A snake is an internal structure used in Eugene W. Myers algorithm
* (<a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">
* An O(ND) Difference Algorithm and Its Variations</a>).
*
* @since 4.0
* @author Jordane Sarda
* @version $Id$
*/
public class Snake {
@ -41,8 +38,8 @@ public class Snake {
/** Diagonal number. */
private int diag;
/** Simple constructor.
* Creates a new instance of Snake with default indices
/**
* Simple constructor. Creates a new instance of Snake with default indices.
*/
public Snake() {
start = -1;
@ -50,11 +47,12 @@ public class Snake {
diag = 0;
}
/** Simple constructor.
* Creates a new instance of Snake with specified indices
* @param start start index of the snake
* @param end end index of the snake
* @param diag diagonal number
/**
* Simple constructor. Creates a new instance of Snake with specified indices.
*
* @param start start index of the snake
* @param end end index of the snake
* @param diag diagonal number
*/
public Snake(int start, int end, int diag) {
this.start = start;
@ -62,21 +60,27 @@ public class Snake {
this.diag = diag;
}
/** Get the start index of the snake.
/**
* Get the start index of the snake.
*
* @return start index of the snake
*/
public int getStart() {
return start;
}
/** Get the end index of the snake.
/**
* Get the end index of the snake.
*
* @return end index of the snake
*/
public int getEnd() {
return end;
}
/** Get the diagonal number of the snake.
/**
* Get the diagonal number of the snake.
*
* @return diagonal number of the snake
*/
public int getDiag() {

View File

@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This package provides classes to compare two objects sequences.
* <p>
* The two sequences can hold any object type, as only the
* <code>equals</code> method is used to compare the elements of the
* sequences. It is guaranteed that the comparisons will always be done
* as <code>o1.equals(o2)</code> where <code>o1</code> belongs to the
* first sequence and <code>o2</code> belongs to the second
* sequence. This can be important if subclassing is used for some
* elements in the first sequence and the <code>equals</code> method is
* specialized.
* <p>
* Comparison can be seen from two points of view: either as giving the
* smallest modification allowing to transform the first sequence into
* the second one, or as giving the longest sequence which is a
* subsequence of both initial sequences. The <code>equals</code> method
* is used to compare objects, so any object can be put into
* sequences. Modifications include deleting, inserting or keeping one
* object, starting from the beginning of the first sequence. Like most
* algorithms of the same type, objects transpositions are not
* supported. This means that if a sequence <code>(A, B)</code> is
* compared to <code>(B, A)</code>, the result will be either the
* sequence of three commands <code>delete A</code>, <code>keep B</code>,
* <code>insert A</code> or the sequence <code>insert B</code>,
* <code>keep A</code>, <code>delete B</code>.
* <p>
* The package uses a very efficient comparison algorithm designed by
* Eugene W. Myers and described in his paper: <a
* href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">An O(ND)
* Difference Algorithm and Its Variations</a>. This algorithm produces
* the shortest possible {@link
* org.apache.commons.collections.list.difference.EditScript edit script} containing
* all the {@link org.apache.commons.collections.list.difference.EditCommand
* commands} needed to transform the first sequence into the second
* one. The entry point for the user to this algorithm is the {@link
* org.apache.commons.collections.list.difference.SequencesComparator
* SequencesComparator} class.
* <p>
* As explained in Gene Myers paper, the edit script is equivalent to all
* other representations and contains all the needed information either
* to perform the transformation, of course, or to retrieve the longest
* common subsequence for example.
* <p>
* If the user needs a very fine grained access to the comparison result,
* he needs to go through this script by providing a visitor implementing
* the {@link org.apache.commons.collections.list.difference.CommandVisitor
* CommandVisitor} interface.
* <p>
* Sometimes however, a more synthetic approach is needed. If the user
* prefers to see the differences between the two sequences as global
* <code>replacement</code> operations acting on complete subsequences of
* the original sequences, he will provide an object implementing the
* simple {@link org.apache.commons.collections.list.difference.ReplacementsHandler
* ReplacementsHandler} interface, using an instance of the {@link
* org.apache.commons.collections.list.difference.ReplacementsFinder
* ReplacementsFinder} class as a command converting layer between his
* object and the edit script. The number of objects which are common to
* both initial arrays and hence are skipped between each call to the user
* {@link org.apache.commons.collections.list.difference.ReplacementsHandler#handleReplacement
* handleReplacement} method is also provided. This allows the user to keep
* track of the current index in both arrays if he needs so.
*
* @version $Id$
*/
package org.apache.commons.collections.comparators.sequence;

View File

@ -1,76 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.list.difference;
/** Abstract base class for all commands used to transform an objects
* sequence into another one.
* <p>When two objects sequences are compared through the {@link
* SequencesComparator#getScript SequencesComparator.getScript}
* method, the result is provided has a {@link EditScript script}
* containing the commands that progressively transform the first
* sequence into the second one.</p>
* <p>There are only three types of commands, all of which are
* subclasses of this abstract class. Each command is associated with
* one object belonging to at least one of the sequences. These
* commands are {@link InsertCommand InsertCommand} which correspond
* to an object of the second sequence beeing inserted into the first
* sequence, {@link DeleteCommand DeleteCommand} which correspond to
* an object of the first sequence beeing removed and {@link
* KeepCommand KeepCommand} which correspond to an object of the first
* sequence which <code>equals</code> an object in the second
* sequence. It is guaranteed that comparison is always performed this
* way (i.e. the <code>equals</code> method of the object from the
* first sequence is used and the object passed as an argument comes
* from the second sequence) ; this can be important if subclassing is
* used for some elements in the first sequence and the
* <code>equals</code> method is specialized.</p>
* @see SequencesComparator
* @see EditScript
* @since 4.0
* @author Jordane Sarda
* @author Luc Maisonobe
* @version $Id$
*/
public abstract class EditCommand<T> {
/** Simple constructor.
* Creates a new instance of EditCommand
* @param object reference to the object associated with this
* command, this refers to an element of one of the sequences
* beeing compared
*/
protected EditCommand(T object) {
this.object = object;
}
/** Accept a visitor.
* This method is invoked for each commands belonging to an {@link
* EditScript EditScript}, in order to implement the visitor
* design pattern
* @param visitor the visitor to be accepted
*/
public abstract void accept(CommandVisitor<T> visitor);
/** Object on which the command should be applied. */
protected T object;
}

View File

@ -1,48 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.list.difference;
import java.util.List;
/** This interface is devoted to handle synchronized replacement sequences.
* @see ReplacementsFinder
* @since 4.0
* @author Luc Maisonobe
* @version $Id$
*/
public interface ReplacementsHandler<T> {
/** Handle two synchronized sequences.
* <p>This method is called by a {@link ReplacementsFinder
* ReplacementsFinder} instance when it has synchronized two
* sub-sequences of object arrays being compared, and at least one
* of the sequences is non-empty. Since the sequences are
* synchronized, the objects before the two sub-sequences are equals
* (if they exist). This property also holds for the objects after the
* two sub-sequences.</p>
* <p>The replacement is defined as replacing the <code>from</code>
* sub-sequence into the <code>to</code> sub-sequence.</p>
* @param skipped number of tokens skipped since the last call (i.e.
* number of tokens that were in both sequences), this number should
* be strictly positive except on the very first call where it can be
* zero (if the first object of the two sequences are different)
* @param from sub-sequence of objects coming from the first sequence
* @param to sub-sequence of objects coming from the second sequence
*/
public void handleReplacement(int skipped, List<T> from, List<T> to);
}

View File

@ -1,93 +0,0 @@
<!-- $Id$ -->
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<body>
This package provides classes to compare two objects sequences.
<p>
The two sequences can hold any object type, as only the
<code>equals</code> method is used to compare the elements of the
sequences. It is guaranteed that the comparisons will always be done
as <code>o1.equals(o2)</code> where <code>o1</code> belongs to the
first sequence and <code>o2</code> belongs to the second
sequence. This can be important if subclassing is used for some
elements in the first sequence and the <code>equals</code> method is
specialized.
</p>
<p>
Comparison can be seen from two points of view: either as giving the
smallest modification allowing to transform the first sequence into
the second one, or as giving the longest sequence which is a
subsequence of both initial sequences. The <code>equals</code> method
is used to compare objects, so any object can be put into
sequences. Modifications include deleting, inserting or keeping one
object, starting from the beginning of the first sequence. Like most
algorithms of the same type, objects transpositions are not
supported. This means that if a sequence <code>(A, B)</code> is
compared to <code>(B, A)</code>, the result will be either the
sequence of three commands <code>delete A</code>, <code>keep B</code>,
<code>insert A</code> or the sequence <code>insert B</code>,
<code>keep A</code>, <code>delete B</code>.
</p>
<p>
The package uses a very efficient comparison algorithm designed by
Eugene W. Myers and described in his paper: <a
href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">An O(ND)
Difference Algorithm and Its Variations</a>. This algorithm produces
the shortest possible {@link
org.apache.commons.collections.list.difference.EditScript edit script} containing
all the {@link org.apache.commons.collections.list.difference.EditCommand
commands} needed to transform the first sequence into the second
one. The entry point for the user to this algorithm is the {@link
org.apache.commons.collections.list.difference.SequencesComparator
SequencesComparator} class.
</p>
<p>
As explained in Gene Myers paper, the edit script is equivalent to all
other representations and contains all the needed information either
to perform the transformation, of course, or to retrieve the longest
common subsequence for example.
</p>
<p>
If the user needs a very fine grained access to the comparison result,
he needs to go through this script by providing a visitor implementing
the {@link org.apache.commons.collections.list.difference.CommandVisitor
CommandVisitor} interface.
</p>
<p>
Sometimes however, a more synthetic approach is needed. If the user
prefers to see the differences between the two sequences as global
<code>replacement</code> operations acting on complete subsequences of
the original sequences, he will provide an object implementing the
simple {@link org.apache.commons.collections.list.difference.ReplacementsHandler
ReplacementsHandler} interface, using an instance of the {@link
org.apache.commons.collections.list.difference.ReplacementsFinder
ReplacementsFinder} class as a command converting layer between his
object and the edit script. The number of objects which are common to
both initial arrays and hence are skipped between each call to the user
{@link
org.apache.commons.collections.list.difference.ReplacementsHandler#handleReplacement
handleReplacement} method is also provided. This allows the user to keep
track of the current index in both arrays if he needs so.
</p>
</body>

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections.list.difference;
package org.apache.commons.collections.comparators.sequence;
import java.util.ArrayList;
import java.util.Arrays;