mirror of https://github.com/apache/lucene.git
FuzzyQuery is now using a default prefix length of 2 in
order to become usable for big indices. Furthermore, it no longer throws TooManyClausesException. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150572 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
435e471111
commit
a433e2c1de
|
@ -74,6 +74,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
String field;
|
||||
int phraseSlop = 0;
|
||||
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
|
||||
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
|
||||
Locale locale = Locale.getDefault();
|
||||
|
||||
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
|
@ -131,18 +132,36 @@ public class QueryParser implements QueryParserConstants {
|
|||
}
|
||||
|
||||
/**
|
||||
* Get the default minimal similarity for fuzzy queries.
|
||||
* Get the minimal similarity for fuzzy queries.
|
||||
*/
|
||||
public float getFuzzyMinSim() {
|
||||
return fuzzyMinSim;
|
||||
}
|
||||
|
||||
/**
|
||||
*Set the default minimum similarity for fuzzy queries.
|
||||
* Set the minimum similarity for fuzzy queries.
|
||||
* Default is 0.5f.
|
||||
*/
|
||||
public void setFuzzyMinSim(float fuzzyMinSim) {
|
||||
this.fuzzyMinSim = fuzzyMinSim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the prefix length for fuzzy queries.
|
||||
* @return Returns the fuzzyPrefixLength.
|
||||
*/
|
||||
public int getFuzzyPrefixLength() {
|
||||
return fuzzyPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the prefix length for fuzzy queries. Default is 2.
|
||||
* @param fuzzyPrefixLength The fuzzyPrefixLength to set.
|
||||
*/
|
||||
public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
|
||||
this.fuzzyPrefixLength = fuzzyPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the default slop for phrases. If zero, then exact phrase matches
|
||||
* are required. Default value is zero.
|
||||
|
@ -419,7 +438,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
return new PrefixQuery(t);
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* Factory method for generating a query (similar to
|
||||
* ({@link #getWildcardQuery}). Called when parser parses
|
||||
* an input term token that has the fuzzy suffix (~) appended.
|
||||
|
@ -433,7 +452,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
|
||||
{
|
||||
Term t = new Term(field, termStr);
|
||||
return new FuzzyQuery(t, minSimilarity);
|
||||
return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -97,6 +97,7 @@ public class QueryParser {
|
|||
String field;
|
||||
int phraseSlop = 0;
|
||||
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
|
||||
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
|
||||
Locale locale = Locale.getDefault();
|
||||
|
||||
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
|
@ -154,17 +155,35 @@ public class QueryParser {
|
|||
}
|
||||
|
||||
/**
|
||||
* Get the default minimal similarity for fuzzy queries.
|
||||
* Get the minimal similarity for fuzzy queries.
|
||||
*/
|
||||
public float getFuzzyMinSim() {
|
||||
return fuzzyMinSim;
|
||||
}
|
||||
|
||||
/**
|
||||
*Set the default minimum similarity for fuzzy queries.
|
||||
* Set the minimum similarity for fuzzy queries.
|
||||
* Default is 0.5f.
|
||||
*/
|
||||
public void setFuzzyMinSim(float fuzzyMinSim) {
|
||||
this.fuzzyMinSim = fuzzyMinSim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the prefix length for fuzzy queries.
|
||||
* @return Returns the fuzzyPrefixLength.
|
||||
*/
|
||||
public int getFuzzyPrefixLength() {
|
||||
return fuzzyPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the prefix length for fuzzy queries. Default is 2.
|
||||
* @param fuzzyPrefixLength The fuzzyPrefixLength to set.
|
||||
*/
|
||||
public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
|
||||
this.fuzzyPrefixLength = fuzzyPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the default slop for phrases. If zero, then exact phrase matches
|
||||
|
@ -441,8 +460,8 @@ public class QueryParser {
|
|||
Term t = new Term(field, termStr);
|
||||
return new PrefixQuery(t);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
/**
|
||||
* Factory method for generating a query (similar to
|
||||
* ({@link #getWildcardQuery}). Called when parser parses
|
||||
* an input term token that has the fuzzy suffix (~) appended.
|
||||
|
@ -456,7 +475,7 @@ public class QueryParser {
|
|||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
|
||||
{
|
||||
Term t = new Term(field, termStr);
|
||||
return new FuzzyQuery(t, minSimilarity);
|
||||
return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -34,7 +34,7 @@ public abstract class FilteredTermEnum extends TermEnum {
|
|||
protected abstract boolean termCompare(Term term);
|
||||
|
||||
/** Equality measure on the term */
|
||||
protected abstract float difference();
|
||||
public abstract float difference();
|
||||
|
||||
/** Indiciates the end of the enumeration has been reached */
|
||||
protected abstract boolean endEnum();
|
||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.lucene.search;
|
|||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Implements the fuzzy search query. The similiarity measurement
|
||||
|
@ -26,6 +28,8 @@ import java.io.IOException;
|
|||
public final class FuzzyQuery extends MultiTermQuery {
|
||||
|
||||
public final static float defaultMinSimilarity = 0.5f;
|
||||
public final static int defaultPrefixLength = 2;
|
||||
|
||||
private float minimumSimilarity;
|
||||
private int prefixLength;
|
||||
|
||||
|
@ -48,16 +52,14 @@ public final class FuzzyQuery extends MultiTermQuery {
|
|||
public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws IllegalArgumentException {
|
||||
super(term);
|
||||
|
||||
if (minimumSimilarity > 1.0f)
|
||||
throw new IllegalArgumentException("minimumSimilarity > 1");
|
||||
if (minimumSimilarity >= 1.0f)
|
||||
throw new IllegalArgumentException("minimumSimilarity >= 1");
|
||||
else if (minimumSimilarity < 0.0f)
|
||||
throw new IllegalArgumentException("minimumSimilarity < 0");
|
||||
this.minimumSimilarity = minimumSimilarity;
|
||||
|
||||
this.minimumSimilarity = minimumSimilarity;
|
||||
if(prefixLength < 0)
|
||||
throw new IllegalArgumentException("prefixLength < 0");
|
||||
else if(prefixLength >= term.text().length())
|
||||
throw new IllegalArgumentException("prefixLength >= term.text().length()");
|
||||
this.prefixLength = prefixLength;
|
||||
}
|
||||
|
||||
|
@ -65,14 +67,14 @@ public final class FuzzyQuery extends MultiTermQuery {
|
|||
* Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.
|
||||
*/
|
||||
public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException {
|
||||
this(term, minimumSimilarity, 0);
|
||||
this(term, minimumSimilarity, defaultPrefixLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.
|
||||
*/
|
||||
public FuzzyQuery(Term term) {
|
||||
this(term, defaultMinSimilarity, 0);
|
||||
this(term, defaultMinSimilarity, defaultPrefixLength);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -95,8 +97,74 @@ public final class FuzzyQuery extends MultiTermQuery {
|
|||
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
|
||||
return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
|
||||
}
|
||||
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
FilteredTermEnum enumerator = getEnum(reader);
|
||||
int maxClauseCount = BooleanQuery.getMaxClauseCount();
|
||||
ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
|
||||
|
||||
try {
|
||||
do {
|
||||
float minScore = 0.0f;
|
||||
float score = 0.0f;
|
||||
Term t = enumerator.term();
|
||||
if (t != null) {
|
||||
score = enumerator.difference();
|
||||
// terms come in alphabetical order, therefore if queue is full and score
|
||||
// not bigger than minScore, we can skip
|
||||
if(stQueue.size() < maxClauseCount || score > minScore){
|
||||
stQueue.insert(new ScoreTerm(t, score));
|
||||
minScore = ((ScoreTerm)stQueue.top()).score; // maintain minScore
|
||||
}
|
||||
}
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
enumerator.close();
|
||||
}
|
||||
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
int size = stQueue.size();
|
||||
for(int i = 0; i < size; i++){
|
||||
ScoreTerm st = (ScoreTerm) stQueue.pop();
|
||||
TermQuery tq = new TermQuery(st.term); // found a match
|
||||
tq.setBoost(getBoost() * st.score); // set the boost
|
||||
query.add(tq, BooleanClause.Occur.SHOULD); // add to query
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
public String toString(String field) {
|
||||
return super.toString(field) + '~' + Float.toString(minimumSimilarity);
|
||||
}
|
||||
|
||||
private static class ScoreTerm{
|
||||
public Term term;
|
||||
public float score;
|
||||
|
||||
public ScoreTerm(Term term, float score){
|
||||
this.term = term;
|
||||
this.score = score;
|
||||
}
|
||||
}
|
||||
|
||||
private static class ScoreTermQueue extends PriorityQueue {
|
||||
|
||||
public ScoreTermQueue(int size){
|
||||
initialize(size);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
protected boolean lessThan(Object a, Object b) {
|
||||
ScoreTerm termA = (ScoreTerm)a;
|
||||
ScoreTerm termB = (ScoreTerm)b;
|
||||
if (termA.score == termB.score)
|
||||
return termA.term.compareTo(termB.term) > 0;
|
||||
else
|
||||
return termA.score < termB.score;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.index.Term;
|
|||
<p>Term enumerations are always ordered by Term.compareTo(). Each term in
|
||||
the enumeration is greater than all that precede it. */
|
||||
public final class FuzzyTermEnum extends FilteredTermEnum {
|
||||
double distance;
|
||||
float similarity;
|
||||
boolean endEnum = false;
|
||||
|
||||
Term searchTerm = null;
|
||||
|
@ -35,7 +35,7 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
|
|||
String prefix = "";
|
||||
int prefixLength = 0;
|
||||
float minimumSimilarity;
|
||||
double scale_factor;
|
||||
float scale_factor;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -47,7 +47,7 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
|
|||
* @see #FuzzyTermEnum(IndexReader, Term, float, int)
|
||||
*/
|
||||
public FuzzyTermEnum(IndexReader reader, Term term) throws IOException {
|
||||
this(reader, term, FuzzyQuery.defaultMinSimilarity, 0);
|
||||
this(reader, term, FuzzyQuery.defaultMinSimilarity, FuzzyQuery.defaultPrefixLength);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -60,7 +60,7 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
|
|||
* @see #FuzzyTermEnum(IndexReader, Term, float, int)
|
||||
*/
|
||||
public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity) throws IOException {
|
||||
this(reader, term, minSimilarity, 0);
|
||||
this(reader, term, minSimilarity, FuzzyQuery.defaultPrefixLength);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -76,18 +76,30 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
|
|||
*/
|
||||
public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength) throws IOException {
|
||||
super();
|
||||
|
||||
if (minimumSimilarity >= 1.0f)
|
||||
throw new IllegalArgumentException("minimumSimilarity >= 1");
|
||||
else if (minimumSimilarity < 0.0f)
|
||||
throw new IllegalArgumentException("minimumSimilarity < 0");
|
||||
|
||||
minimumSimilarity = minSimilarity;
|
||||
scale_factor = 1.0f / (1.0f - minimumSimilarity);
|
||||
searchTerm = term;
|
||||
field = searchTerm.field();
|
||||
text = searchTerm.text();
|
||||
textlen = text.length();
|
||||
if(prefixLength > 0 && prefixLength < textlen){
|
||||
this.prefixLength = prefixLength;
|
||||
prefix = text.substring(0, prefixLength);
|
||||
text = text.substring(prefixLength);
|
||||
textlen = text.length();
|
||||
}
|
||||
|
||||
if(prefixLength < 0)
|
||||
throw new IllegalArgumentException("prefixLength < 0");
|
||||
|
||||
if(prefixLength > textlen)
|
||||
prefixLength = textlen;
|
||||
|
||||
this.prefixLength = prefixLength;
|
||||
prefix = text.substring(0, prefixLength);
|
||||
text = text.substring(prefixLength);
|
||||
textlen = text.length();
|
||||
|
||||
setEnum(reader.terms(new Term(searchTerm.field(), prefix)));
|
||||
}
|
||||
|
||||
|
@ -101,15 +113,15 @@ public final class FuzzyTermEnum extends FilteredTermEnum {
|
|||
String target = termText.substring(prefixLength);
|
||||
int targetlen = target.length();
|
||||
int dist = editDistance(text, target, textlen, targetlen);
|
||||
distance = 1 - ((double)dist / (double)Math.min(textlen, targetlen));
|
||||
return (distance > minimumSimilarity);
|
||||
similarity = 1 - ((float)dist / (float) (prefixLength + Math.min(textlen, targetlen)));
|
||||
return (similarity > minimumSimilarity);
|
||||
}
|
||||
endEnum = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
protected final float difference() {
|
||||
return (float)((distance - minimumSimilarity) * scale_factor);
|
||||
public final float difference() {
|
||||
return (float)((similarity - minimumSimilarity) * scale_factor);
|
||||
}
|
||||
|
||||
public final boolean endEnum() {
|
||||
|
|
|
@ -248,10 +248,10 @@ public class TestQueryParser extends TestCase {
|
|||
assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
|
||||
FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7", null);
|
||||
assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
|
||||
assertEquals(0, fq.getPrefixLength());
|
||||
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
|
||||
fq = (FuzzyQuery)getQuery("term~", null);
|
||||
assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
|
||||
assertEquals(0, fq.getPrefixLength());
|
||||
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
|
||||
try {
|
||||
getQuery("term~1.1", null); // value > 1, throws exception
|
||||
fail();
|
||||
|
|
|
@ -47,20 +47,40 @@ public class TestFuzzyQuery extends TestCase {
|
|||
writer.close();
|
||||
IndexSearcher searcher = new IndexSearcher(directory);
|
||||
|
||||
FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"));
|
||||
FuzzyQuery query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0);
|
||||
Hits hits = searcher.search(query);
|
||||
assertEquals(3, hits.length());
|
||||
|
||||
// same with prefix
|
||||
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 1);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(3, hits.length());
|
||||
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 2);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(3, hits.length());
|
||||
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 3);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(3, hits.length());
|
||||
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 4);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(2, hits.length());
|
||||
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 5);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
|
||||
// not similar enough:
|
||||
query = new FuzzyQuery(new Term("field", "xxxxx"));
|
||||
query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
query = new FuzzyQuery(new Term("field", "aaccc")); // edit distance to "aaaaa" = 3
|
||||
query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3
|
||||
hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// query identical to a word in the index:
|
||||
query = new FuzzyQuery(new Term("field", "aaaaa"));
|
||||
query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(3, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("aaaaa"));
|
||||
|
@ -69,20 +89,71 @@ public class TestFuzzyQuery extends TestCase {
|
|||
assertEquals(hits.doc(2).get("field"), ("aaabb"));
|
||||
|
||||
// query similar to a word in the index:
|
||||
query = new FuzzyQuery(new Term("field", "aaaac"));
|
||||
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(3, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("aaaaa"));
|
||||
assertEquals(hits.doc(1).get("field"), ("aaaab"));
|
||||
assertEquals(hits.doc(2).get("field"), ("aaabb"));
|
||||
|
||||
// now with prefix
|
||||
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 1);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(3, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("aaaaa"));
|
||||
assertEquals(hits.doc(1).get("field"), ("aaaab"));
|
||||
assertEquals(hits.doc(2).get("field"), ("aaabb"));
|
||||
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 2);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(3, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("aaaaa"));
|
||||
assertEquals(hits.doc(1).get("field"), ("aaaab"));
|
||||
assertEquals(hits.doc(2).get("field"), ("aaabb"));
|
||||
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 3);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(3, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("aaaaa"));
|
||||
assertEquals(hits.doc(1).get("field"), ("aaaab"));
|
||||
assertEquals(hits.doc(2).get("field"), ("aaabb"));
|
||||
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 4);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(2, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("aaaaa"));
|
||||
assertEquals(hits.doc(1).get("field"), ("aaaab"));
|
||||
query = new FuzzyQuery(new Term("field", "aaaac"), FuzzyQuery.defaultMinSimilarity, 5);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
|
||||
query = new FuzzyQuery(new Term("field", "ddddX"));
|
||||
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("ddddd"));
|
||||
|
||||
// now with prefix
|
||||
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 1);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("ddddd"));
|
||||
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 2);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("ddddd"));
|
||||
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 3);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("ddddd"));
|
||||
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 4);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("ddddd"));
|
||||
query = new FuzzyQuery(new Term("field", "ddddX"), FuzzyQuery.defaultMinSimilarity, 5);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
|
||||
// different field = no match:
|
||||
query = new FuzzyQuery(new Term("anotherfield", "ddddX"));
|
||||
query = new FuzzyQuery(new Term("anotherfield", "ddddX"), FuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
|
@ -101,31 +172,63 @@ public class TestFuzzyQuery extends TestCase {
|
|||
|
||||
FuzzyQuery query;
|
||||
// not similar enough:
|
||||
query = new FuzzyQuery(new Term("field", "xxxxx"));
|
||||
query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0);
|
||||
Hits hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
// edit distance to "aaaaaaa" = 3, this matches because the string is longer than
|
||||
// in testDefaultFuzziness so a bigger difference is allowed:
|
||||
query = new FuzzyQuery(new Term("field", "aaaaccc"));
|
||||
query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("aaaaaaa"));
|
||||
|
||||
// now with prefix
|
||||
query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 1);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("aaaaaaa"));
|
||||
query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 4);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
assertEquals(hits.doc(0).get("field"), ("aaaaaaa"));
|
||||
query = new FuzzyQuery(new Term("field", "aaaaccc"), FuzzyQuery.defaultMinSimilarity, 5);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// no match, more than half of the characters is wrong:
|
||||
query = new FuzzyQuery(new Term("field", "aaacccc"));
|
||||
query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// now with prefix
|
||||
query = new FuzzyQuery(new Term("field", "aaacccc"), FuzzyQuery.defaultMinSimilarity, 2);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// "student" and "stellent" are indeed similar to "segment" by default:
|
||||
query = new FuzzyQuery(new Term("field", "student"));
|
||||
query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
query = new FuzzyQuery(new Term("field", "stellent"));
|
||||
query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
|
||||
|
||||
// now with prefix
|
||||
query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 1);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 1);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(1, hits.length());
|
||||
query = new FuzzyQuery(new Term("field", "student"), FuzzyQuery.defaultMinSimilarity, 2);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
query = new FuzzyQuery(new Term("field", "stellent"), FuzzyQuery.defaultMinSimilarity, 2);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
// "student" doesn't match anymore thanks to increased minimum similarity:
|
||||
query = new FuzzyQuery(new Term("field", "student"), 0.6f);
|
||||
query = new FuzzyQuery(new Term("field", "student"), 0.6f, 0);
|
||||
hits = searcher.search(query);
|
||||
assertEquals(0, hits.length());
|
||||
|
||||
|
|
Loading…
Reference in New Issue