mirror of https://github.com/apache/lucene.git
LUCENE-4448: speedups for AnalyzingSuggester
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1391698 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f2f91bae46
commit
e2540f1a04
|
@ -83,6 +83,7 @@ public class TokenStreamToAutomaton {
|
|||
* automaton where arcs are bytes from each term. */
|
||||
public Automaton toAutomaton(TokenStream in) throws IOException {
|
||||
final Automaton a = new Automaton();
|
||||
boolean deterministic = true;
|
||||
|
||||
final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class);
|
||||
final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
|
||||
|
@ -132,6 +133,11 @@ public class TokenStreamToAutomaton {
|
|||
}
|
||||
}
|
||||
positions.freeBefore(pos);
|
||||
} else {
|
||||
// note: this isn't necessarily true. its just that we aren't surely det.
|
||||
// we could optimize this further (e.g. buffer and sort synonyms at a position)
|
||||
// but thats probably overkill. this is cheap and dirty
|
||||
deterministic = false;
|
||||
}
|
||||
|
||||
final int endPos = pos + posLengthAtt.getPositionLength();
|
||||
|
@ -161,7 +167,7 @@ public class TokenStreamToAutomaton {
|
|||
}
|
||||
|
||||
//toDot(a);
|
||||
|
||||
a.setDeterministic(deterministic);
|
||||
return a;
|
||||
}
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.SpecialOperations;
|
||||
import org.apache.lucene.util.automaton.State;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
|
@ -246,6 +247,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
// but because we are going in reverse topo sort
|
||||
// it will not add any SEP/HOLE transitions:
|
||||
state.addEpsilon(t.getDest());
|
||||
a.setDeterministic(false);
|
||||
t = null;
|
||||
}
|
||||
} else if (t.getMin() == TokenStreamToAutomaton.HOLE) {
|
||||
|
@ -263,6 +265,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
// but because we are going in reverse topo sort
|
||||
// it will not add any SEP/HOLE transitions:
|
||||
state.addEpsilon(t.getDest());
|
||||
a.setDeterministic(false);
|
||||
t = null;
|
||||
}
|
||||
if (t != null) {
|
||||
|
@ -504,7 +507,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
|
||||
// TODO: we can optimize this somewhat by determinizing
|
||||
// while we convert
|
||||
automaton = Automaton.minimize(automaton);
|
||||
BasicOperations.determinize(automaton);
|
||||
|
||||
final CharsRef spare = new CharsRef();
|
||||
|
||||
|
|
Loading…
Reference in New Issue