LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1368927 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-08-03 13:00:59 +00:00
parent 10fabf16d4
commit a8ebce8f73
4 changed files with 57 additions and 14 deletions

View File

@ -157,6 +157,9 @@ Bug Fixes
instance are already checked out and queued up but not yet flushed.
(Simon Willnauer)
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
(Johannes Christen, Uwe Schindler, Robert Muir)
Changes in Runtime Behavior
* LUCENE-4109: Enable position increments in the flexible queryparser by default.

View File

@ -122,7 +122,7 @@ public class FuzzyTermsEnum extends TermsEnum {
this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
// if minSimilarity >= 1, we treat it as number of edits
if (minSimilarity >= 1f) {
this.minSimilarity = 1 - (minSimilarity+1) / this.termLength;
this.minSimilarity = 0; // just driven by number of edits
maxEdits = (int) minSimilarity;
raw = true;
} else {

View File

@ -22,6 +22,7 @@ import java.util.Arrays;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@ -189,6 +190,41 @@ public class TestFuzzyQuery extends LuceneTestCase {
directory.close();
}
public void test2() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
addDoc("LANGE", writer);
addDoc("LUETH", writer);
addDoc("PIRSING", writer);
addDoc("RIEGEL", writer);
addDoc("TRZECZIAK", writer);
addDoc("WALKER", writer);
addDoc("WBR", writer);
addDoc("WE", writer);
addDoc("WEB", writer);
addDoc("WEBE", writer);
addDoc("WEBER", writer);
addDoc("WEBERE", writer);
addDoc("WEBREE", writer);
addDoc("WEBEREI", writer);
addDoc("WBRE", writer);
addDoc("WITTKOPF", writer);
addDoc("WOJNAROWSKI", writer);
addDoc("WRICKE", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
writer.close();
FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1);
//query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals(8, hits.length);
reader.close();
directory.close();
}
/**
* MultiTermQuery provides (via attribute) information about which values
* must be competitive to enter the priority queue.

View File

@ -440,21 +440,25 @@ public class TestSlowFuzzyQuery extends LuceneTestCase {
assertEquals(1, hits.length);
assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
q = new SlowFuzzyQuery(new Term("field", "t"), 3);
hits = searcher.search(q, 10).scoreDocs;
assertEquals(1, hits.length);
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
// TODO: cannot really be supported given the legacy scoring
// system which scores negative, if the distance > min term len,
// so such matches were always impossible with lucene 3.x, etc
//
//q = new SlowFuzzyQuery(new Term("field", "t"), 3);
//hits = searcher.search(q, 10).scoreDocs;
//assertEquals(1, hits.length);
//assertEquals("test", searcher.doc(hits[0].doc).get("field"));
q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
hits = searcher.search(q, 10).scoreDocs;
assertEquals(1, hits.length);
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
// q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
// hits = searcher.search(q, 10).scoreDocs;
// assertEquals(1, hits.length);
// assertEquals("test", searcher.doc(hits[0].doc).get("field"));
q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
hits = searcher.search(q, 10).scoreDocs;
assertEquals(2, hits.length);
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
// q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
// hits = searcher.search(q, 10).scoreDocs;
// assertEquals(2, hits.length);
// assertEquals("test", searcher.doc(hits[0].doc).get("field"));
// assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
reader.close();
index.close();