diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java index 31e81abf3c3..b49e0a0e272 100644 --- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java +++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java @@ -290,6 +290,8 @@ public final class Util { T cost = fst.outputs.add(path.cost, path.arc.output); //System.out.println(" addIfCompetitive queue.size()=" + queue.size() + " path=" + path + " + label=" + path.arc.label); + // LUCENE-4481: TODO: re-enable this pruning if we can make this admissible: + /* if (queue.size() == topN) { FSTPath bottom = queue.last(); int comp = comparator.compare(cost, bottom.cost); @@ -312,6 +314,7 @@ public final class Util { } else { // Queue isn't full yet, so any path we hit competes: } + */ // copy over the current input to the new input // and add the arc.label to the end @@ -323,9 +326,12 @@ public final class Util { queue.add(newPath); + // LUCENE-4481: TODO: re-enable this pruning if we can make this admissible: + /* if (queue.size() == topN+1) { queue.pollLast(); - } + } + */ } /** Adds all leaving arcs, including 'finished' arc, if @@ -390,8 +396,6 @@ public final class Util { break; } - //System.out.println(" remove init path=" + path); - if (path.arc.label == FST.END_LABEL) { //System.out.println(" empty string! cost=" + path.cost); // Empty string! @@ -400,10 +404,13 @@ public final class Util { continue; } + // LUCENE-4481: TODO: re-enable this pruning if we can make this admissible: + /* if (results.size() == topN-1) { // Last path -- don't bother w/ queue anymore: queue = null; } + */ //System.out.println(" path: " + path); diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index 9f98814d9db..ff7c677b462 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -591,16 +591,18 @@ public class AnalyzingSuggester extends Lookup { Util.TopNSearcher> searcher; searcher = new Util.TopNSearcher>(fst, - num - results.size(), + num, weightComparator) { private final Set seen = new HashSet(); @Override protected boolean acceptResult(IntsRef input, Pair output) { - + + //System.out.println("ACCEPT? path=" + input); // Dedup: when the input analyzes to a graph we // can get duplicate surface forms: if (seen.contains(output.output2)) { + //System.out.println("SKIP: dup"); return false; } seen.add(output.output2); @@ -630,6 +632,12 @@ public class AnalyzingSuggester extends Lookup { LookupResult result = new LookupResult(spare.toString(), decodeWeight(completion.output.output1)); //System.out.println(" result=" + result); results.add(result); + + if (results.size() == num) { + // In the exactFirst=true case the search may + // produce one extra path + break; + } } return results; diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java index c883698743d..f4b3d3aeb56 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java @@ -803,4 +803,114 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { List results = suggester.lookup("a", false, 4); } + + public void testExactFirstMissingResult() throws Exception { + + Analyzer a = new MockAnalyzer(random()); + + AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1); + + suggester.build(new TermFreqArrayIterator(new TermFreq[] { + new TermFreq("a", 5), + new TermFreq("a b", 3), + new TermFreq("a c", 4), + })); + + List results = suggester.lookup("a", false, 3); + assertEquals(3, results.size()); + assertEquals("a", results.get(0).key); + assertEquals(5, results.get(0).value); + assertEquals("a c", results.get(1).key); + assertEquals(4, results.get(1).value); + assertEquals("a b", results.get(2).key); + assertEquals(3, results.get(2).value); + } + + public void testDupSurfaceFormsMissingResults() throws Exception { + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + + return new TokenStreamComponents(tokenizer) { + + @Override + public TokenStream getTokenStream() { + return new CannedTokenStream(new Token[] { + token("hairy", 1, 1), + token("smelly", 0, 1), + token("dog", 1, 1), + }); + } + + @Override + protected void setReader(final Reader reader) throws IOException { + } + }; + } + }; + + AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); + + suggester.build(new TermFreqArrayIterator(new TermFreq[] { + new TermFreq("hambone", 6), + new TermFreq("nellie", 5), + })); + + List results = suggester.lookup("nellie", false, 2); + assertEquals(2, results.size()); + assertEquals("hambone", results.get(0).key); + assertEquals(6, results.get(0).value); + assertEquals("nellie", results.get(1).key); + assertEquals(5, results.get(1).value); + } + + public void testDupSurfaceFormsMissingResults2() throws Exception { + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + + return new TokenStreamComponents(tokenizer) { + + int count; + + @Override + public TokenStream getTokenStream() { + if (count == 0) { + count++; + return new CannedTokenStream(new Token[] { + token("p", 1, 1), + token("q", 1, 1), + token("r", 0, 1), + token("s", 0, 1), + }); + } else { + return new CannedTokenStream(new Token[] { + token("p", 1, 1), + }); + } + } + + @Override + protected void setReader(final Reader reader) throws IOException { + } + }; + } + }; + + AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); + + suggester.build(new TermFreqArrayIterator(new TermFreq[] { + new TermFreq("a", 6), + new TermFreq("b", 5), + })); + + List results = suggester.lookup("a", false, 2); + assertEquals(2, results.size()); + assertEquals("a", results.get(0).key); + assertEquals(6, results.get(0).value); + assertEquals("b", results.get(1).key); + assertEquals(5, results.get(1).value); + } }