mirror of https://github.com/apache/lucene.git
LUCENE-1224: Short circuit FuzzyQuery.rewrite when input token length is small compared to minSimilarity.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@735517 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8c58de6092
commit
8bf2fda5bd
|
@ -1,4 +1,4 @@
|
||||||
Lucene Change Log
|
Lucene Change Log
|
||||||
$Id$
|
$Id$
|
||||||
|
|
||||||
======================= Trunk (not yet released) =======================
|
======================= Trunk (not yet released) =======================
|
||||||
|
@ -141,6 +141,9 @@ Optimizations
|
||||||
3. LUCENE-1484: Remove synchronization of IndexReader.document() by
|
3. LUCENE-1484: Remove synchronization of IndexReader.document() by
|
||||||
using CloseableThreadLocal internally. (Jason Rutherglen via Mike
|
using CloseableThreadLocal internally. (Jason Rutherglen via Mike
|
||||||
McCandless).
|
McCandless).
|
||||||
|
|
||||||
|
4. LUCENE-1224: Short circuit FuzzyQuery.rewrite when input token length
|
||||||
|
is small compared to minSimilarity. (Timo Nentwig, Mark Miller)
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
|
|
||||||
|
|
|
@ -34,6 +34,7 @@ public class FuzzyQuery extends MultiTermQuery {
|
||||||
|
|
||||||
private float minimumSimilarity;
|
private float minimumSimilarity;
|
||||||
private int prefixLength;
|
private int prefixLength;
|
||||||
|
private boolean termLongEnough = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new FuzzyQuery that will match terms with a similarity
|
* Create a new FuzzyQuery that will match terms with a similarity
|
||||||
|
@ -61,6 +62,10 @@ public class FuzzyQuery extends MultiTermQuery {
|
||||||
if (prefixLength < 0)
|
if (prefixLength < 0)
|
||||||
throw new IllegalArgumentException("prefixLength < 0");
|
throw new IllegalArgumentException("prefixLength < 0");
|
||||||
|
|
||||||
|
if (term.text().length() > 1.0f / (1.0f - minimumSimilarity)) {
|
||||||
|
this.termLongEnough = true;
|
||||||
|
}
|
||||||
|
|
||||||
this.minimumSimilarity = minimumSimilarity;
|
this.minimumSimilarity = minimumSimilarity;
|
||||||
this.prefixLength = prefixLength;
|
this.prefixLength = prefixLength;
|
||||||
}
|
}
|
||||||
|
@ -105,6 +110,10 @@ public class FuzzyQuery extends MultiTermQuery {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Query rewrite(IndexReader reader) throws IOException {
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
if(!termLongEnough) { // can't match
|
||||||
|
return new BooleanQuery();
|
||||||
|
}
|
||||||
|
|
||||||
FilteredTermEnum enumerator = getEnum(reader);
|
FilteredTermEnum enumerator = getEnum(reader);
|
||||||
int maxClauseCount = BooleanQuery.getMaxClauseCount();
|
int maxClauseCount = BooleanQuery.getMaxClauseCount();
|
||||||
ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
|
ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
|
||||||
|
|
|
@ -249,6 +249,38 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
||||||
directory.close();
|
directory.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTokenLengthOpt() throws IOException {
|
||||||
|
RAMDirectory directory = new RAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(),
|
||||||
|
true, IndexWriter.MaxFieldLength.LIMITED);
|
||||||
|
addDoc("12345678911", writer);
|
||||||
|
addDoc("segment", writer);
|
||||||
|
writer.optimize();
|
||||||
|
writer.close();
|
||||||
|
IndexSearcher searcher = new IndexSearcher(directory);
|
||||||
|
|
||||||
|
Query query;
|
||||||
|
// term not over 10 chars, so optimization shortcuts
|
||||||
|
query = new FuzzyQuery(new Term("field", "1234569"), 0.9f);
|
||||||
|
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||||
|
assertEquals(0, hits.length);
|
||||||
|
|
||||||
|
// 10 chars, so no optimization
|
||||||
|
query = new FuzzyQuery(new Term("field", "1234567891"), 0.9f);
|
||||||
|
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||||
|
assertEquals(0, hits.length);
|
||||||
|
|
||||||
|
// over 10 chars, so no optimization
|
||||||
|
query = new FuzzyQuery(new Term("field", "12345678911"), 0.9f);
|
||||||
|
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||||
|
assertEquals(1, hits.length);
|
||||||
|
|
||||||
|
// over 10 chars, no match
|
||||||
|
query = new FuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f);
|
||||||
|
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||||
|
assertEquals(0, hits.length);
|
||||||
|
}
|
||||||
|
|
||||||
private void addDoc(String text, IndexWriter writer) throws IOException {
|
private void addDoc(String text, IndexWriter writer) throws IOException {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new Field("field", text, Field.Store.YES, Field.Index.ANALYZED));
|
doc.add(new Field("field", text, Field.Store.YES, Field.Index.ANALYZED));
|
||||||
|
|
Loading…
Reference in New Issue