From 2f136ff27c0dbb6ce5863e626df649358470d0ea Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 23 Oct 2012 18:11:32 +0000 Subject: [PATCH] LUCENE-3846: simplify overriding required for FuzzySuggester git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3846@1401372 13f79535-47bb-0310-9956-ffa450edef68 --- .../suggest/analyzing/AnalyzingSuggester.java | 62 +++++-------------- .../suggest/analyzing/FuzzySuggester.java | 48 ++++++-------- 2 files changed, 35 insertions(+), 75 deletions(-) diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index 735cc0a015a..3497cb98a18 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -501,7 +501,7 @@ public class AnalyzingSuggester extends Lookup { // Intersect automaton w/ suggest wFST and get all // prefix starting nodes & their outputs: - final PathIntersector intersector = getPathIntersector(lookupAutomaton, fst); + //final PathIntersector intersector = getPathIntersector(lookupAutomaton, fst); //System.out.println(" prefixPaths: " + prefixPaths.size()); @@ -511,8 +511,9 @@ public class AnalyzingSuggester extends Lookup { final List results = new ArrayList(); + List>> prefixPaths = FSTUtil.intersectPrefixPaths(lookupAutomaton, fst); + if (exactFirst) { - final List>> prefixPaths = intersector.intersectExact(); int count = 0; for (FSTUtil.Path> path : prefixPaths) { @@ -604,7 +605,9 @@ public class AnalyzingSuggester extends Lookup { } } }; - final List>> prefixPaths = intersector.intersectAll(); + + prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst); + for (FSTUtil.Path> path : prefixPaths) { searcher.addStartPaths(path.fstNode, path.output, true, path.input); } @@ -615,6 +618,10 @@ public class AnalyzingSuggester extends Lookup { spare.grow(completion.output.output2.length); UnicodeUtil.UTF8toUTF16(completion.output.output2, spare); LookupResult result = new LookupResult(spare.toString(), decodeWeight(completion.output.output1)); + + // nocommit for fuzzy case would be nice to return + // how many edits were required...: + //System.out.println(" result=" + result); results.add(result); @@ -631,6 +638,13 @@ public class AnalyzingSuggester extends Lookup { } } + protected List>> getFullPrefixPaths(List>> prefixPaths, + Automaton lookupAutomaton, + FST> fst) + throws IOException { + return prefixPaths; + } + final Set toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException { // Analyze surface form: TokenStream ts = indexAnalyzer.tokenStream("", new StringReader(surfaceForm.utf8ToString())); @@ -706,46 +720,4 @@ public class AnalyzingSuggester extends Lookup { return left.output1.compareTo(right.output1); } }; - - /** - * Returns a new {@link PathIntersector}. - * - *

NOTE: The labels on the transitions incoming - * automaton are bytes returned by the {@link - * TokenStream}'s {@link TermToBytesRefAttribute}, which - * are typically UTF8 encoded. - */ - protected PathIntersector getPathIntersector(Automaton automaton, FST> fst) { - return new PathIntersector(automaton, fst); - } - - /** - * This class is used to obtain the prefix paths in the automaton that also intersect the FST. - */ - protected static class PathIntersector { - protected List>> intersect; - protected final Automaton automaton; - protected final FST> fst; - - /** - * Creates a new {@link PathIntersector} - */ - public PathIntersector(Automaton automaton, FST> fst) { - this.automaton = automaton; - this.fst = fst; - } - /** - * Returns the prefix paths for exact first top N search. - */ - public List>> intersectExact() throws IOException { - return intersect = FSTUtil.intersectPrefixPaths(automaton, fst); - } - - /** - * Returns the prefix paths for top N search. - */ - public List>> intersectAll() throws IOException { - return intersect == null ? intersect = FSTUtil.intersectPrefixPaths(automaton, fst) : intersect; - } - } } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java index 8fcabf7e8e1..24ddcf2eb5b 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FuzzySuggester.java @@ -26,7 +26,6 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; // javadocs -import org.apache.lucene.search.suggest.analyzing.FSTUtil.Path; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.automaton.Automaton; @@ -146,9 +145,24 @@ public final class FuzzySuggester extends AnalyzingSuggester { } @Override - protected PathIntersector getPathIntersector(Automaton automaton, - FST> fst) { - return new FuzzyPathIntersector(automaton, fst); + protected List>> getFullPrefixPaths(List>> prefixPaths, + Automaton lookupAutomaton, + FST> fst) + throws IOException { + // nocommit we don't "penalize" for edits + // ... shouldn't we? ie, ed=0 completions should have + // higher rank than ed=1, at the same "weight"? maybe + // we can punt on this for starters ... or maybe we + // can re-run each prefix path through lev0, lev1, + // lev2 to figure out the number of edits? + Automaton levA = toLevenshteinAutomata(lookupAutomaton); + /* + Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); + w.write(levA.toDot()); + w.close(); + System.out.println("Wrote LevA to out.dot"); + */ + return FSTUtil.intersectPrefixPaths(levA, fst); } Automaton toLevenshteinAutomata(Automaton automaton) { @@ -195,30 +209,4 @@ public final class FuzzySuggester extends AnalyzingSuggester { return a; } } - - private final class FuzzyPathIntersector extends PathIntersector { - - public FuzzyPathIntersector(Automaton automaton, - FST> fst) { - super(automaton, fst); - } - - @Override - public List>> intersectAll() throws IOException { - // nocommit we don't "penalize" for edits - // ... shouldn't we? ie, ed=0 completions should have - // higher rank than ed=1, at the same "weight"? maybe - // we can punt on this for starters ... or maybe we - // can re-run each prefix path through lev0, lev1, - // lev2 to figure out the number of edits? - Automaton levA = toLevenshteinAutomata(automaton); - /* - Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); - w.write(levA.toDot()); - w.close(); - System.out.println("Wrote LevA to out.dot"); - */ - return FSTUtil.intersectPrefixPaths(levA, fst); - } - } }