mirror of https://github.com/apache/lucene.git
LUCENE-3846: simplify overriding required for FuzzySuggester
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3846@1401372 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
95b99f48d2
commit
2f136ff27c
|
@ -501,7 +501,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
|
||||
// Intersect automaton w/ suggest wFST and get all
|
||||
// prefix starting nodes & their outputs:
|
||||
final PathIntersector intersector = getPathIntersector(lookupAutomaton, fst);
|
||||
//final PathIntersector intersector = getPathIntersector(lookupAutomaton, fst);
|
||||
|
||||
//System.out.println(" prefixPaths: " + prefixPaths.size());
|
||||
|
||||
|
@ -511,8 +511,9 @@ public class AnalyzingSuggester extends Lookup {
|
|||
|
||||
final List<LookupResult> results = new ArrayList<LookupResult>();
|
||||
|
||||
List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = FSTUtil.intersectPrefixPaths(lookupAutomaton, fst);
|
||||
|
||||
if (exactFirst) {
|
||||
final List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = intersector.intersectExact();
|
||||
|
||||
int count = 0;
|
||||
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
|
||||
|
@ -604,7 +605,9 @@ public class AnalyzingSuggester extends Lookup {
|
|||
}
|
||||
}
|
||||
};
|
||||
final List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = intersector.intersectAll();
|
||||
|
||||
prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst);
|
||||
|
||||
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
|
||||
searcher.addStartPaths(path.fstNode, path.output, true, path.input);
|
||||
}
|
||||
|
@ -615,6 +618,10 @@ public class AnalyzingSuggester extends Lookup {
|
|||
spare.grow(completion.output.output2.length);
|
||||
UnicodeUtil.UTF8toUTF16(completion.output.output2, spare);
|
||||
LookupResult result = new LookupResult(spare.toString(), decodeWeight(completion.output.output1));
|
||||
|
||||
// nocommit for fuzzy case would be nice to return
|
||||
// how many edits were required...:
|
||||
|
||||
//System.out.println(" result=" + result);
|
||||
results.add(result);
|
||||
|
||||
|
@ -631,6 +638,13 @@ public class AnalyzingSuggester extends Lookup {
|
|||
}
|
||||
}
|
||||
|
||||
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
|
||||
Automaton lookupAutomaton,
|
||||
FST<Pair<Long,BytesRef>> fst)
|
||||
throws IOException {
|
||||
return prefixPaths;
|
||||
}
|
||||
|
||||
final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
|
||||
// Analyze surface form:
|
||||
TokenStream ts = indexAnalyzer.tokenStream("", new StringReader(surfaceForm.utf8ToString()));
|
||||
|
@ -706,46 +720,4 @@ public class AnalyzingSuggester extends Lookup {
|
|||
return left.output1.compareTo(right.output1);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns a new {@link PathIntersector}.
|
||||
*
|
||||
* <p>NOTE: The labels on the transitions incoming
|
||||
* automaton are bytes returned by the {@link
|
||||
* TokenStream}'s {@link TermToBytesRefAttribute}, which
|
||||
* are typically UTF8 encoded.
|
||||
*/
|
||||
protected PathIntersector getPathIntersector(Automaton automaton, FST<Pair<Long,BytesRef>> fst) {
|
||||
return new PathIntersector(automaton, fst);
|
||||
}
|
||||
|
||||
/**
|
||||
* This class is used to obtain the prefix paths in the automaton that also intersect the FST.
|
||||
*/
|
||||
protected static class PathIntersector {
|
||||
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> intersect;
|
||||
protected final Automaton automaton;
|
||||
protected final FST<Pair<Long,BytesRef>> fst;
|
||||
|
||||
/**
|
||||
* Creates a new {@link PathIntersector}
|
||||
*/
|
||||
public PathIntersector(Automaton automaton, FST<Pair<Long,BytesRef>> fst) {
|
||||
this.automaton = automaton;
|
||||
this.fst = fst;
|
||||
}
|
||||
/**
|
||||
* Returns the prefix paths for exact first top N search.
|
||||
*/
|
||||
public List<FSTUtil.Path<Pair<Long,BytesRef>>> intersectExact() throws IOException {
|
||||
return intersect = FSTUtil.intersectPrefixPaths(automaton, fst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the prefix paths for top N search.
|
||||
*/
|
||||
public List<FSTUtil.Path<Pair<Long,BytesRef>>> intersectAll() throws IOException {
|
||||
return intersect == null ? intersect = FSTUtil.intersectPrefixPaths(automaton, fst) : intersect;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@ import java.util.Set;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; // javadocs
|
||||
import org.apache.lucene.search.suggest.analyzing.FSTUtil.Path;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
|
@ -146,9 +145,24 @@ public final class FuzzySuggester extends AnalyzingSuggester {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected PathIntersector getPathIntersector(Automaton automaton,
|
||||
FST<Pair<Long,BytesRef>> fst) {
|
||||
return new FuzzyPathIntersector(automaton, fst);
|
||||
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
|
||||
Automaton lookupAutomaton,
|
||||
FST<Pair<Long,BytesRef>> fst)
|
||||
throws IOException {
|
||||
// nocommit we don't "penalize" for edits
|
||||
// ... shouldn't we? ie, ed=0 completions should have
|
||||
// higher rank than ed=1, at the same "weight"? maybe
|
||||
// we can punt on this for starters ... or maybe we
|
||||
// can re-run each prefix path through lev0, lev1,
|
||||
// lev2 to figure out the number of edits?
|
||||
Automaton levA = toLevenshteinAutomata(lookupAutomaton);
|
||||
/*
|
||||
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
|
||||
w.write(levA.toDot());
|
||||
w.close();
|
||||
System.out.println("Wrote LevA to out.dot");
|
||||
*/
|
||||
return FSTUtil.intersectPrefixPaths(levA, fst);
|
||||
}
|
||||
|
||||
Automaton toLevenshteinAutomata(Automaton automaton) {
|
||||
|
@ -195,30 +209,4 @@ public final class FuzzySuggester extends AnalyzingSuggester {
|
|||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
private final class FuzzyPathIntersector extends PathIntersector {
|
||||
|
||||
public FuzzyPathIntersector(Automaton automaton,
|
||||
FST<Pair<Long,BytesRef>> fst) {
|
||||
super(automaton, fst);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Path<Pair<Long,BytesRef>>> intersectAll() throws IOException {
|
||||
// nocommit we don't "penalize" for edits
|
||||
// ... shouldn't we? ie, ed=0 completions should have
|
||||
// higher rank than ed=1, at the same "weight"? maybe
|
||||
// we can punt on this for starters ... or maybe we
|
||||
// can re-run each prefix path through lev0, lev1,
|
||||
// lev2 to figure out the number of edits?
|
||||
Automaton levA = toLevenshteinAutomata(automaton);
|
||||
/*
|
||||
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8");
|
||||
w.write(levA.toDot());
|
||||
w.close();
|
||||
System.out.println("Wrote LevA to out.dot");
|
||||
*/
|
||||
return FSTUtil.intersectPrefixPaths(levA, fst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue