Further optimization of LANG-935:
- Avoid toString of the replacement sequence by doing it once. - Avoid calculating the maximum when not needed. - Fixup comment for greedy algorithm Thanks to Fabian Lange. git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1669520 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a211ef288c
commit
2f62425c31
|
@ -29,7 +29,7 @@
|
||||||
*/
|
*/
|
||||||
public class LookupTranslator extends CharSequenceTranslator {
|
public class LookupTranslator extends CharSequenceTranslator {
|
||||||
|
|
||||||
private final HashMap<String, CharSequence> lookupMap;
|
private final HashMap<String, String> lookupMap;
|
||||||
private final HashSet<Character> prefixSet;
|
private final HashSet<Character> prefixSet;
|
||||||
private final int shortest;
|
private final int shortest;
|
||||||
private final int longest;
|
private final int longest;
|
||||||
|
@ -37,21 +37,20 @@ public class LookupTranslator extends CharSequenceTranslator {
|
||||||
/**
|
/**
|
||||||
* Define the lookup table to be used in translation
|
* Define the lookup table to be used in translation
|
||||||
*
|
*
|
||||||
* Note that, as of Lang 3.1, the key to the lookup table is converted to a
|
* Note that, as of Lang 3.1, the key to the lookup table is converted to a
|
||||||
* java.lang.String, while the value remains as a java.lang.CharSequence.
|
* java.lang.String. This is because we need the key to support hashCode and
|
||||||
* This is because we need the key to support hashCode and equals(Object),
|
* equals(Object), allowing it to be the key for a HashMap. See LANG-882.
|
||||||
* allowing it to be the key for a HashMap. See LANG-882.
|
|
||||||
*
|
*
|
||||||
* @param lookup CharSequence[][] table of size [*][2]
|
* @param lookup CharSequence[][] table of size [*][2]
|
||||||
*/
|
*/
|
||||||
public LookupTranslator(final CharSequence[]... lookup) {
|
public LookupTranslator(final CharSequence[]... lookup) {
|
||||||
lookupMap = new HashMap<String, CharSequence>();
|
lookupMap = new HashMap<String, String>();
|
||||||
prefixSet = new HashSet<Character>();
|
prefixSet = new HashSet<Character>();
|
||||||
int _shortest = Integer.MAX_VALUE;
|
int _shortest = Integer.MAX_VALUE;
|
||||||
int _longest = 0;
|
int _longest = 0;
|
||||||
if (lookup != null) {
|
if (lookup != null) {
|
||||||
for (final CharSequence[] seq : lookup) {
|
for (final CharSequence[] seq : lookup) {
|
||||||
this.lookupMap.put(seq[0].toString(), seq[1]);
|
this.lookupMap.put(seq[0].toString(), seq[1].toString());
|
||||||
this.prefixSet.add(seq[0].charAt(0));
|
this.prefixSet.add(seq[0].charAt(0));
|
||||||
final int sz = seq[0].length();
|
final int sz = seq[0].length();
|
||||||
if (sz < _shortest) {
|
if (sz < _shortest) {
|
||||||
|
@ -71,24 +70,21 @@ public LookupTranslator(final CharSequence[]... lookup) {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
|
public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
|
||||||
int max = longest;
|
// check if translation exists for the input at position index
|
||||||
if (index + longest > input.length()) {
|
if (prefixSet.contains(input.charAt(index))) {
|
||||||
max = input.length() - index;
|
int max = longest;
|
||||||
}
|
if (index + longest > input.length()) {
|
||||||
|
max = input.length() - index;
|
||||||
|
}
|
||||||
|
// implement greedy algorithm by trying maximum match first
|
||||||
|
for (int i = max; i >= shortest; i--) {
|
||||||
|
final CharSequence subSeq = input.subSequence(index, index + i);
|
||||||
|
final String result = lookupMap.get(subSeq.toString());
|
||||||
|
|
||||||
if (!prefixSet.contains(input.charAt(index))) {
|
if (result != null) {
|
||||||
// no translation exists for the input at position index
|
out.write(result);
|
||||||
return 0;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
// descend so as to get a greedy algorithm
|
|
||||||
for (int i = max; i >= shortest; i--) {
|
|
||||||
final CharSequence subSeq = input.subSequence(index, index + i);
|
|
||||||
final CharSequence result = lookupMap.get(subSeq.toString());
|
|
||||||
|
|
||||||
if (result != null) {
|
|
||||||
out.write(result.toString());
|
|
||||||
return i;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in New Issue