LUCENE-4809: FuzzyLikeThisQuery fails if field does not exist or is not indexed with NPE during rewrite

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1451577 13f79535-47bb-0310-9956-ffa450edef68
2013-03-01 12:50:40 +00:00 · 2013-03-01 12:50:40 +00:00 · 9b6b4ec703
parent abd85ff5a0
commit 9b6b4ec703
2 changed files with 25 additions and 1 deletions
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java
@ -30,6 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.search.similarities.DefaultSimilarity;
@ -196,6 +197,10 @@ public class FuzzyLikeThisQuery extends Query
    int corpusNumDocs = reader.numDocs();
    HashSet<String> processedTerms = new HashSet<String>();
    ts.reset();
+    final Terms terms = MultiFields.getTerms(reader, f.fieldName);
+    if (terms == null) {
+      return;
+    }
    while (ts.incrementToken()) {
      String term = termAtt.toString();
      if (!processedTerms.contains(term)) {
@ -206,7 +211,7 @@ public class FuzzyLikeThisQuery extends Query
        AttributeSource atts = new AttributeSource();
        MaxNonCompetitiveBoostAttribute maxBoostAtt =
            atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
-        SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(MultiFields.getTerms(reader, startTerm.field()), atts, startTerm, f.minSimilarity, f.prefixLength);
+        SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength);
        //store the df so all variants use same idf
        int df = reader.docFreq(startTerm);
        int numVariants = 0;
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/FuzzyLikeThisQueryTest.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/queries/FuzzyLikeThisQueryTest.java
@ -108,6 +108,25 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
    StoredDocument doc = searcher.doc(sd[0].doc);
    assertEquals("Should match most similar when using 2 words", "2", doc.get("id"));
  }
+  
+  // LUCENE-4809
+  public void testNonExistingField() throws Throwable {
+    FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
+    flt.addTerms("jonathin smoth", "name", 0.3f, 1);
+    flt.addTerms("jonathin smoth", "this field does not exist", 0.3f, 1);
+    // don't fail here just because the field doesn't exits
+    Query q = flt.rewrite(searcher.getIndexReader());
+    HashSet<Term> queryTerms = new HashSet<Term>();
+    q.extractTerms(queryTerms);
+    assertTrue("Should have variant jonathan", queryTerms.contains(new Term("name", "jonathan")));
+    assertTrue("Should have variant smith", queryTerms.contains(new Term("name", "smith")));
+    TopDocs topDocs = searcher.search(flt, 1);
+    ScoreDoc[] sd = topDocs.scoreDocs;
+    assertTrue("score docs must match 1 doc", (sd != null) && (sd.length > 0));
+    StoredDocument doc = searcher.doc(sd[0].doc);
+    assertEquals("Should match most similar when using 2 words", "2", doc.get("id"));
+  }
+

  //Test bug found when first query word does not match anything
  public void testNoMatchFirstWordBug() throws Throwable {