Further optimization of LANG-935:

- Avoid toString of the replacement sequence by doing it once.
- Avoid calculating the maximum when not needed.
- Fixup comment for greedy algorithm

Thanks to Fabian Lange.



git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1669520 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Benedikt Ritter 2015-03-27 08:03:41 +00:00
parent a211ef288c
commit 2f62425c31
1 changed files with 20 additions and 24 deletions

View File

@ -29,7 +29,7 @@
*/ */
public class LookupTranslator extends CharSequenceTranslator { public class LookupTranslator extends CharSequenceTranslator {
private final HashMap<String, CharSequence> lookupMap; private final HashMap<String, String> lookupMap;
private final HashSet<Character> prefixSet; private final HashSet<Character> prefixSet;
private final int shortest; private final int shortest;
private final int longest; private final int longest;
@ -37,21 +37,20 @@ public class LookupTranslator extends CharSequenceTranslator {
/** /**
* Define the lookup table to be used in translation * Define the lookup table to be used in translation
* *
* Note that, as of Lang 3.1, the key to the lookup table is converted to a * Note that, as of Lang 3.1, the key to the lookup table is converted to a
* java.lang.String, while the value remains as a java.lang.CharSequence. * java.lang.String. This is because we need the key to support hashCode and
* This is because we need the key to support hashCode and equals(Object), * equals(Object), allowing it to be the key for a HashMap. See LANG-882.
* allowing it to be the key for a HashMap. See LANG-882.
* *
* @param lookup CharSequence[][] table of size [*][2] * @param lookup CharSequence[][] table of size [*][2]
*/ */
public LookupTranslator(final CharSequence[]... lookup) { public LookupTranslator(final CharSequence[]... lookup) {
lookupMap = new HashMap<String, CharSequence>(); lookupMap = new HashMap<String, String>();
prefixSet = new HashSet<Character>(); prefixSet = new HashSet<Character>();
int _shortest = Integer.MAX_VALUE; int _shortest = Integer.MAX_VALUE;
int _longest = 0; int _longest = 0;
if (lookup != null) { if (lookup != null) {
for (final CharSequence[] seq : lookup) { for (final CharSequence[] seq : lookup) {
this.lookupMap.put(seq[0].toString(), seq[1]); this.lookupMap.put(seq[0].toString(), seq[1].toString());
this.prefixSet.add(seq[0].charAt(0)); this.prefixSet.add(seq[0].charAt(0));
final int sz = seq[0].length(); final int sz = seq[0].length();
if (sz < _shortest) { if (sz < _shortest) {
@ -71,24 +70,21 @@ public LookupTranslator(final CharSequence[]... lookup) {
*/ */
@Override @Override
public int translate(final CharSequence input, final int index, final Writer out) throws IOException { public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
int max = longest; // check if translation exists for the input at position index
if (index + longest > input.length()) { if (prefixSet.contains(input.charAt(index))) {
max = input.length() - index; int max = longest;
} if (index + longest > input.length()) {
max = input.length() - index;
}
// implement greedy algorithm by trying maximum match first
for (int i = max; i >= shortest; i--) {
final CharSequence subSeq = input.subSequence(index, index + i);
final String result = lookupMap.get(subSeq.toString());
if (!prefixSet.contains(input.charAt(index))) { if (result != null) {
// no translation exists for the input at position index out.write(result);
return 0; return i;
} }
// descend so as to get a greedy algorithm
for (int i = max; i >= shortest; i--) {
final CharSequence subSeq = input.subSequence(index, index + i);
final CharSequence result = lookupMap.get(subSeq.toString());
if (result != null) {
out.write(result.toString());
return i;
} }
} }
return 0; return 0;