LUCENE-4448: speedups for AnalyzingSuggester

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1391698 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-09-28 23:49:49 +00:00
parent f2f91bae46
commit e2540f1a04
2 changed files with 11 additions and 2 deletions

View File

@ -83,6 +83,7 @@ public class TokenStreamToAutomaton {
* automaton where arcs are bytes from each term. */
public Automaton toAutomaton(TokenStream in) throws IOException {
final Automaton a = new Automaton();
boolean deterministic = true;
final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class);
final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
@ -132,6 +133,11 @@ public class TokenStreamToAutomaton {
}
}
positions.freeBefore(pos);
} else {
// note: this isn't necessarily true. its just that we aren't surely det.
// we could optimize this further (e.g. buffer and sort synonyms at a position)
// but thats probably overkill. this is cheap and dirty
deterministic = false;
}
final int endPos = pos + posLengthAtt.getPositionLength();
@ -161,7 +167,7 @@ public class TokenStreamToAutomaton {
}
//toDot(a);
a.setDeterministic(deterministic);
return a;
}

View File

@ -45,6 +45,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.BasicOperations;
import org.apache.lucene.util.automaton.SpecialOperations;
import org.apache.lucene.util.automaton.State;
import org.apache.lucene.util.automaton.Transition;
@ -246,6 +247,7 @@ public class AnalyzingSuggester extends Lookup {
// but because we are going in reverse topo sort
// it will not add any SEP/HOLE transitions:
state.addEpsilon(t.getDest());
a.setDeterministic(false);
t = null;
}
} else if (t.getMin() == TokenStreamToAutomaton.HOLE) {
@ -263,6 +265,7 @@ public class AnalyzingSuggester extends Lookup {
// but because we are going in reverse topo sort
// it will not add any SEP/HOLE transitions:
state.addEpsilon(t.getDest());
a.setDeterministic(false);
t = null;
}
if (t != null) {
@ -504,7 +507,7 @@ public class AnalyzingSuggester extends Lookup {
// TODO: we can optimize this somewhat by determinizing
// while we convert
automaton = Automaton.minimize(automaton);
BasicOperations.determinize(automaton);
final CharsRef spare = new CharsRef();