mirror of https://github.com/apache/lucene.git
LUCENE-1224: Short circuit FuzzyQuery.rewrite when input token length is small compared to minSimilarity.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@735517 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8c58de6092
commit
8bf2fda5bd
|
@ -1,4 +1,4 @@
|
|||
Lucene Change Log
|
||||
Lucene Change Log
|
||||
$Id$
|
||||
|
||||
======================= Trunk (not yet released) =======================
|
||||
|
@ -141,6 +141,9 @@ Optimizations
|
|||
3. LUCENE-1484: Remove synchronization of IndexReader.document() by
|
||||
using CloseableThreadLocal internally. (Jason Rutherglen via Mike
|
||||
McCandless).
|
||||
|
||||
4. LUCENE-1224: Short circuit FuzzyQuery.rewrite when input token length
|
||||
is small compared to minSimilarity. (Timo Nentwig, Mark Miller)
|
||||
|
||||
Documentation
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ public class FuzzyQuery extends MultiTermQuery {
|
|||
|
||||
private float minimumSimilarity;
|
||||
private int prefixLength;
|
||||
private boolean termLongEnough = false;
|
||||
|
||||
/**
|
||||
* Create a new FuzzyQuery that will match terms with a similarity
|
||||
|
@ -61,6 +62,10 @@ public class FuzzyQuery extends MultiTermQuery {
|
|||
if (prefixLength < 0)
|
||||
throw new IllegalArgumentException("prefixLength < 0");
|
||||
|
||||
if (term.text().length() > 1.0f / (1.0f - minimumSimilarity)) {
|
||||
this.termLongEnough = true;
|
||||
}
|
||||
|
||||
this.minimumSimilarity = minimumSimilarity;
|
||||
this.prefixLength = prefixLength;
|
||||
}
|
||||
|
@ -105,6 +110,10 @@ public class FuzzyQuery extends MultiTermQuery {
|
|||
}
|
||||
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
if(!termLongEnough) { // can't match
|
||||
return new BooleanQuery();
|
||||
}
|
||||
|
||||
FilteredTermEnum enumerator = getEnum(reader);
|
||||
int maxClauseCount = BooleanQuery.getMaxClauseCount();
|
||||
ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
|
||||
|
|
|
@ -249,6 +249,38 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
directory.close();
|
||||
}
|
||||
|
||||
public void testTokenLengthOpt() throws IOException {
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
||||
true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
addDoc("12345678911", writer);
|
||||
addDoc("segment", writer);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
IndexSearcher searcher = new IndexSearcher(directory);
|
||||
|
||||
Query query;
|
||||
// term not over 10 chars, so optimization shortcuts
|
||||
query = new FuzzyQuery(new Term("field", "1234569"), 0.9f);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
// 10 chars, so no optimization
|
||||
query = new FuzzyQuery(new Term("field", "1234567891"), 0.9f);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
// over 10 chars, so no optimization
|
||||
query = new FuzzyQuery(new Term("field", "12345678911"), 0.9f);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
|
||||
// over 10 chars, no match
|
||||
query = new FuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
}
|
||||
|
||||
private void addDoc(String text, IndexWriter writer) throws IOException {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", text, Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
|
Loading…
Reference in New Issue