mirror of https://github.com/apache/lucene.git
LUCENE-4481: turn off queue pruning (it's not in general admissible)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1400416 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
85c73511fb
commit
7e45db75e3
|
@ -290,6 +290,8 @@ public final class Util {
|
|||
T cost = fst.outputs.add(path.cost, path.arc.output);
|
||||
//System.out.println(" addIfCompetitive queue.size()=" + queue.size() + " path=" + path + " + label=" + path.arc.label);
|
||||
|
||||
// LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
|
||||
/*
|
||||
if (queue.size() == topN) {
|
||||
FSTPath<T> bottom = queue.last();
|
||||
int comp = comparator.compare(cost, bottom.cost);
|
||||
|
@ -312,6 +314,7 @@ public final class Util {
|
|||
} else {
|
||||
// Queue isn't full yet, so any path we hit competes:
|
||||
}
|
||||
*/
|
||||
|
||||
// copy over the current input to the new input
|
||||
// and add the arc.label to the end
|
||||
|
@ -323,9 +326,12 @@ public final class Util {
|
|||
|
||||
queue.add(newPath);
|
||||
|
||||
// LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
|
||||
/*
|
||||
if (queue.size() == topN+1) {
|
||||
queue.pollLast();
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
/** Adds all leaving arcs, including 'finished' arc, if
|
||||
|
@ -390,8 +396,6 @@ public final class Util {
|
|||
break;
|
||||
}
|
||||
|
||||
//System.out.println(" remove init path=" + path);
|
||||
|
||||
if (path.arc.label == FST.END_LABEL) {
|
||||
//System.out.println(" empty string! cost=" + path.cost);
|
||||
// Empty string!
|
||||
|
@ -400,10 +404,13 @@ public final class Util {
|
|||
continue;
|
||||
}
|
||||
|
||||
// LUCENE-4481: TODO: re-enable this pruning if we can make this admissible:
|
||||
/*
|
||||
if (results.size() == topN-1) {
|
||||
// Last path -- don't bother w/ queue anymore:
|
||||
queue = null;
|
||||
}
|
||||
*/
|
||||
|
||||
//System.out.println(" path: " + path);
|
||||
|
||||
|
|
|
@ -591,16 +591,18 @@ public class AnalyzingSuggester extends Lookup {
|
|||
|
||||
Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
|
||||
searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst,
|
||||
num - results.size(),
|
||||
num,
|
||||
weightComparator) {
|
||||
private final Set<BytesRef> seen = new HashSet<BytesRef>();
|
||||
|
||||
@Override
|
||||
protected boolean acceptResult(IntsRef input, Pair<Long,BytesRef> output) {
|
||||
|
||||
|
||||
//System.out.println("ACCEPT? path=" + input);
|
||||
// Dedup: when the input analyzes to a graph we
|
||||
// can get duplicate surface forms:
|
||||
if (seen.contains(output.output2)) {
|
||||
//System.out.println("SKIP: dup");
|
||||
return false;
|
||||
}
|
||||
seen.add(output.output2);
|
||||
|
@ -630,6 +632,12 @@ public class AnalyzingSuggester extends Lookup {
|
|||
LookupResult result = new LookupResult(spare.toString(), decodeWeight(completion.output.output1));
|
||||
//System.out.println(" result=" + result);
|
||||
results.add(result);
|
||||
|
||||
if (results.size() == num) {
|
||||
// In the exactFirst=true case the search may
|
||||
// produce one extra path
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
|
|
|
@ -803,4 +803,114 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
|
||||
List<LookupResult> results = suggester.lookup("a", false, 4);
|
||||
}
|
||||
|
||||
public void testExactFirstMissingResult() throws Exception {
|
||||
|
||||
Analyzer a = new MockAnalyzer(random());
|
||||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("a", 5),
|
||||
new TermFreq("a b", 3),
|
||||
new TermFreq("a c", 4),
|
||||
}));
|
||||
|
||||
List<LookupResult> results = suggester.lookup("a", false, 3);
|
||||
assertEquals(3, results.size());
|
||||
assertEquals("a", results.get(0).key);
|
||||
assertEquals(5, results.get(0).value);
|
||||
assertEquals("a c", results.get(1).key);
|
||||
assertEquals(4, results.get(1).value);
|
||||
assertEquals("a b", results.get(2).key);
|
||||
assertEquals(3, results.get(2).value);
|
||||
}
|
||||
|
||||
public void testDupSurfaceFormsMissingResults() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
return new CannedTokenStream(new Token[] {
|
||||
token("hairy", 1, 1),
|
||||
token("smelly", 0, 1),
|
||||
token("dog", 1, 1),
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("hambone", 6),
|
||||
new TermFreq("nellie", 5),
|
||||
}));
|
||||
|
||||
List<LookupResult> results = suggester.lookup("nellie", false, 2);
|
||||
assertEquals(2, results.size());
|
||||
assertEquals("hambone", results.get(0).key);
|
||||
assertEquals(6, results.get(0).value);
|
||||
assertEquals("nellie", results.get(1).key);
|
||||
assertEquals(5, results.get(1).value);
|
||||
}
|
||||
|
||||
public void testDupSurfaceFormsMissingResults2() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
|
||||
int count;
|
||||
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
if (count == 0) {
|
||||
count++;
|
||||
return new CannedTokenStream(new Token[] {
|
||||
token("p", 1, 1),
|
||||
token("q", 1, 1),
|
||||
token("r", 0, 1),
|
||||
token("s", 0, 1),
|
||||
});
|
||||
} else {
|
||||
return new CannedTokenStream(new Token[] {
|
||||
token("p", 1, 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
|
||||
|
||||
suggester.build(new TermFreqArrayIterator(new TermFreq[] {
|
||||
new TermFreq("a", 6),
|
||||
new TermFreq("b", 5),
|
||||
}));
|
||||
|
||||
List<LookupResult> results = suggester.lookup("a", false, 2);
|
||||
assertEquals(2, results.size());
|
||||
assertEquals("a", results.get(0).key);
|
||||
assertEquals(6, results.get(0).value);
|
||||
assertEquals("b", results.get(1).key);
|
||||
assertEquals(5, results.get(1).value);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue