LUCENE-4809: FuzzyLikeThisQuery fails if field does not exist or is not indexed with NPE during rewrite

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1451577 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2013-03-01 12:50:40 +00:00
parent abd85ff5a0
commit 9b6b4ec703
2 changed files with 25 additions and 1 deletions

View File

@ -30,6 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.*;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.search.similarities.DefaultSimilarity;
@ -196,6 +197,10 @@ public class FuzzyLikeThisQuery extends Query
int corpusNumDocs = reader.numDocs();
HashSet<String> processedTerms = new HashSet<String>();
ts.reset();
final Terms terms = MultiFields.getTerms(reader, f.fieldName);
if (terms == null) {
return;
}
while (ts.incrementToken()) {
String term = termAtt.toString();
if (!processedTerms.contains(term)) {
@ -206,7 +211,7 @@ public class FuzzyLikeThisQuery extends Query
AttributeSource atts = new AttributeSource();
MaxNonCompetitiveBoostAttribute maxBoostAtt =
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(MultiFields.getTerms(reader, startTerm.field()), atts, startTerm, f.minSimilarity, f.prefixLength);
SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength);
//store the df so all variants use same idf
int df = reader.docFreq(startTerm);
int numVariants = 0;

View File

@ -108,6 +108,25 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
StoredDocument doc = searcher.doc(sd[0].doc);
assertEquals("Should match most similar when using 2 words", "2", doc.get("id"));
}
// LUCENE-4809
public void testNonExistingField() throws Throwable {
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
flt.addTerms("jonathin smoth", "name", 0.3f, 1);
flt.addTerms("jonathin smoth", "this field does not exist", 0.3f, 1);
// don't fail here just because the field doesn't exits
Query q = flt.rewrite(searcher.getIndexReader());
HashSet<Term> queryTerms = new HashSet<Term>();
q.extractTerms(queryTerms);
assertTrue("Should have variant jonathan", queryTerms.contains(new Term("name", "jonathan")));
assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
TopDocs topDocs = searcher.search(flt, 1);
ScoreDoc[] sd = topDocs.scoreDocs;
assertTrue("score docs must match 1 doc", (sd != null) && (sd.length > 0));
StoredDocument doc = searcher.doc(sd[0].doc);
assertEquals("Should match most similar when using 2 words", "2", doc.get("id"));
}
//Test bug found when first query word does not match anything
public void testNoMatchFirstWordBug() throws Throwable {