mirror of https://github.com/apache/lucene.git
LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1368927 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
10fabf16d4
commit
a8ebce8f73
|
@ -157,6 +157,9 @@ Bug Fixes
|
|||
instance are already checked out and queued up but not yet flushed.
|
||||
(Simon Willnauer)
|
||||
|
||||
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
|
||||
(Johannes Christen, Uwe Schindler, Robert Muir)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-4109: Enable position increments in the flexible queryparser by default.
|
||||
|
|
|
@ -122,7 +122,7 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||
this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
|
||||
// if minSimilarity >= 1, we treat it as number of edits
|
||||
if (minSimilarity >= 1f) {
|
||||
this.minSimilarity = 1 - (minSimilarity+1) / this.termLength;
|
||||
this.minSimilarity = 0; // just driven by number of edits
|
||||
maxEdits = (int) minSimilarity;
|
||||
raw = true;
|
||||
} else {
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Arrays;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -189,6 +190,41 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
directory.close();
|
||||
}
|
||||
|
||||
public void test2() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
|
||||
addDoc("LANGE", writer);
|
||||
addDoc("LUETH", writer);
|
||||
addDoc("PIRSING", writer);
|
||||
addDoc("RIEGEL", writer);
|
||||
addDoc("TRZECZIAK", writer);
|
||||
addDoc("WALKER", writer);
|
||||
addDoc("WBR", writer);
|
||||
addDoc("WE", writer);
|
||||
addDoc("WEB", writer);
|
||||
addDoc("WEBE", writer);
|
||||
addDoc("WEBER", writer);
|
||||
addDoc("WEBERE", writer);
|
||||
addDoc("WEBREE", writer);
|
||||
addDoc("WEBEREI", writer);
|
||||
addDoc("WBRE", writer);
|
||||
addDoc("WITTKOPF", writer);
|
||||
addDoc("WOJNAROWSKI", writer);
|
||||
addDoc("WRICKE", writer);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
|
||||
FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1);
|
||||
//query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals(8, hits.length);
|
||||
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* MultiTermQuery provides (via attribute) information about which values
|
||||
* must be competitive to enter the priority queue.
|
||||
|
|
|
@ -440,21 +440,25 @@ public class TestSlowFuzzyQuery extends LuceneTestCase {
|
|||
assertEquals(1, hits.length);
|
||||
assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
|
||||
|
||||
q = new SlowFuzzyQuery(new Term("field", "t"), 3);
|
||||
hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
// TODO: cannot really be supported given the legacy scoring
|
||||
// system which scores negative, if the distance > min term len,
|
||||
// so such matches were always impossible with lucene 3.x, etc
|
||||
//
|
||||
//q = new SlowFuzzyQuery(new Term("field", "t"), 3);
|
||||
//hits = searcher.search(q, 10).scoreDocs;
|
||||
//assertEquals(1, hits.length);
|
||||
//assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
|
||||
q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
|
||||
hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
// q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
|
||||
// hits = searcher.search(q, 10).scoreDocs;
|
||||
// assertEquals(1, hits.length);
|
||||
// assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
|
||||
q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
|
||||
hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(2, hits.length);
|
||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
|
||||
// q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
|
||||
// hits = searcher.search(q, 10).scoreDocs;
|
||||
// assertEquals(2, hits.length);
|
||||
// assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
// assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
|
||||
|
||||
reader.close();
|
||||
index.close();
|
||||
|
|
Loading…
Reference in New Issue