LUCENE-4481: turn off queue pruning (it's not in general admissible)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1400416 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-10-20 10:42:59 +00:00
parent 85c73511fb
commit 7e45db75e3
3 changed files with 130 additions and 5 deletions

View File

@ -290,6 +290,8 @@ public final class Util {
T cost = fst.outputs.add(path.cost, path.arc.output);
//System.out.println(" addIfCompetitive queue.size()=" + queue.size() + " path=" + path + " + label=" + path.arc.label);
// LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
/*
if (queue.size() == topN) {
FSTPath<T> bottom = queue.last();
int comp = comparator.compare(cost, bottom.cost);
@ -312,6 +314,7 @@ public final class Util {
} else {
// Queue isn't full yet, so any path we hit competes:
}
*/
// copy over the current input to the new input
// and add the arc.label to the end
@ -323,9 +326,12 @@ public final class Util {
queue.add(newPath);
// LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
/*
if (queue.size() == topN+1) {
queue.pollLast();
}
}
*/
}
/** Adds all leaving arcs, including 'finished' arc, if
@ -390,8 +396,6 @@ public final class Util {
break;
}
//System.out.println(" remove init path=" + path);
if (path.arc.label == FST.END_LABEL) {
//System.out.println(" empty string! cost=" + path.cost);
// Empty string!
@ -400,10 +404,13 @@ public final class Util {
continue;
}
// LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
/*
if (results.size() == topN-1) {
// Last path -- don't bother w/ queue anymore:
queue = null;
}
*/
//System.out.println(" path: " + path);

View File

@ -591,16 +591,18 @@ public class AnalyzingSuggester extends Lookup {
Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst,
num - results.size(),
num,
weightComparator) {
private final Set<BytesRef> seen = new HashSet<BytesRef>();
@Override
protected boolean acceptResult(IntsRef input, Pair<Long,BytesRef> output) {
//System.out.println("ACCEPT? path=" + input);
// Dedup: when the input analyzes to a graph we
// can get duplicate surface forms:
if (seen.contains(output.output2)) {
//System.out.println("SKIP: dup");
return false;
}
seen.add(output.output2);
@ -630,6 +632,12 @@ public class AnalyzingSuggester extends Lookup {
LookupResult result = new LookupResult(spare.toString(), decodeWeight(completion.output.output1));
//System.out.println(" result=" + result);
results.add(result);
if (results.size() == num) {
// In the exactFirst=true case the search may
// produce one extra path
break;
}
}
return results;

View File

@ -803,4 +803,114 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
List<LookupResult> results = suggester.lookup("a", false, 4);
}
public void testExactFirstMissingResult() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("a", 5),
new TermFreq("a b", 3),
new TermFreq("a c", 4),
}));
List<LookupResult> results = suggester.lookup("a", false, 3);
assertEquals(3, results.size());
assertEquals("a", results.get(0).key);
assertEquals(5, results.get(0).value);
assertEquals("a c", results.get(1).key);
assertEquals(4, results.get(1).value);
assertEquals("a b", results.get(2).key);
assertEquals(3, results.get(2).value);
}
public void testDupSurfaceFormsMissingResults() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
@Override
public TokenStream getTokenStream() {
return new CannedTokenStream(new Token[] {
token("hairy", 1, 1),
token("smelly", 0, 1),
token("dog", 1, 1),
});
}
@Override
protected void setReader(final Reader reader) throws IOException {
}
};
}
};
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("hambone", 6),
new TermFreq("nellie", 5),
}));
List<LookupResult> results = suggester.lookup("nellie", false, 2);
assertEquals(2, results.size());
assertEquals("hambone", results.get(0).key);
assertEquals(6, results.get(0).value);
assertEquals("nellie", results.get(1).key);
assertEquals(5, results.get(1).value);
}
public void testDupSurfaceFormsMissingResults2() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int count;
@Override
public TokenStream getTokenStream() {
if (count == 0) {
count++;
return new CannedTokenStream(new Token[] {
token("p", 1, 1),
token("q", 1, 1),
token("r", 0, 1),
token("s", 0, 1),
});
} else {
return new CannedTokenStream(new Token[] {
token("p", 1, 1),
});
}
}
@Override
protected void setReader(final Reader reader) throws IOException {
}
};
}
};
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
new TermFreq("a", 6),
new TermFreq("b", 5),
}));
List<LookupResult> results = suggester.lookup("a", false, 2);
assertEquals(2, results.size());
assertEquals("a", results.get(0).key);
assertEquals(6, results.get(0).value);
assertEquals("b", results.get(1).key);
assertEquals(5, results.get(1).value);
}
}