LUCENE-763: LuceneDictionary skips first word in enumeration

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@543220 13f79535-47bb-0310-9956-ffa450edef68
2007-05-31 19:13:36 +00:00 · 2007-05-31 19:13:36 +00:00 · 6e98fa61d4
parent 42c3d11f07
commit 6e98fa61d4
3 changed files with 231 additions and 16 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -150,6 +150,9 @@ Bug fixes
 19. LUCENE-698: FilteredQuery now takes the query boost into account for 
    scoring. (Michael Busch)

+20. LUCENE-763: Spellchecker: LuceneDictionary used to skip first word in 
+    enumeration. (Christian Mallwitz via Daniel Naber)
+
 New features

 1. LUCENE-759: Added two n-gram-producing TokenFilters.
--- a/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
+++ b/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
@ -30,7 +30,11 @@ import java.io.*;
 * Lucene Dictionary: terms taken from the given field
 * of a Lucene index.
 *
+ * When using IndexReader.terms(Term) the code must not call next() on TermEnum
+ * as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
+ *
 * @author Nicolas Maisonneuve
+ * @author Christian Mallwitz
 */
 public class LuceneDictionary implements Dictionary {
  private IndexReader reader;
@ -64,6 +68,13 @@ public class LuceneDictionary implements Dictionary {
        hasNext();
      }
      hasNextCalled = false;
+
+      try {
+        termEnum.next();
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+
      return (actualTerm != null) ? actualTerm.text() : null;
    }

@ -72,23 +83,23 @@ public class LuceneDictionary implements Dictionary {
        return actualTerm != null;
      }
      hasNextCalled = true;
-      try {
-        // if there are no more words
-        if (!termEnum.next()) {
-          actualTerm = null;
-          return false;
-        }
-        // if the next word is in the field
-        actualTerm = termEnum.term();
-        String currentField = actualTerm.field();
-        if (currentField != field) {
-          actualTerm = null;
-          return false;
-        }
-        return true;
-      } catch (IOException e) {
-        throw new RuntimeException(e);
+
+      actualTerm = termEnum.term();
+
+      // if there are no words return false
+      if (actualTerm == null) {
+        return false;
      }
+
+      String currentField = actualTerm.field();
+
+      // if the next word doesn't have the same field return false
+      if (currentField != field) {
+        actualTerm = null;
+        return false;
+      }
+
+      return true;
    }

    public void remove() {
--- a/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java
+++ b/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java
@ -0,0 +1,201 @@
+package org.apache.lucene.search.spell;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import junit.framework.TestCase;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * Test case for LuceneDictionary.
+ * It first creates a simple index and then a couple of instances of LuceneDictionary
+ * on different fields and checks if all the right text comes back.
+ *
+ * @author Christian Mallwitz
+ */
+public class TestLuceneDictionary extends TestCase {
+
+  private Directory store = new RAMDirectory();
+
+  private IndexReader indexReader = null;
+
+  private LuceneDictionary ld;
+  private Iterator it;
+
+  public void setUp() throws Exception {
+
+    IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true);
+
+    Document doc;
+
+    doc = new  Document();
+    doc.add(new Field("aaa", "foo", Field.Store.YES, Field.Index.TOKENIZED));
+    writer.addDocument(doc);
+
+    doc = new  Document();
+    doc.add(new Field("aaa", "foo", Field.Store.YES, Field.Index.TOKENIZED));
+    writer.addDocument(doc);
+
+    doc = new  Document();
+    doc.add(new  Field("contents", "Tom", Field.Store.YES, Field.Index.TOKENIZED));
+    writer.addDocument(doc);
+
+    doc = new  Document();
+    doc.add(new  Field("contents", "Jerry", Field.Store.YES, Field.Index.TOKENIZED));
+    writer.addDocument(doc);
+
+    doc = new Document();
+    doc.add(new Field("zzz", "bar", Field.Store.YES, Field.Index.TOKENIZED));
+    writer.addDocument(doc);
+
+    writer.optimize();
+    writer.close();
+  }
+
+  public void testFieldNonExistent() throws IOException {
+    try {
+      indexReader = IndexReader.open(store);
+
+      ld = new LuceneDictionary(indexReader, "nonexistent_field");
+      it = ld.getWordsIterator();
+
+      assertFalse("More elements than expected", it.hasNext());
+      assertTrue("Nonexistent element is really null", it.next() == null);
+    } finally {
+      if  (indexReader != null) { indexReader.close(); }
+    }
+  }
+
+  public void testFieldAaa() throws IOException {
+    try {
+      indexReader = IndexReader.open(store);
+
+      ld = new LuceneDictionary(indexReader, "aaa");
+      it = ld.getWordsIterator();
+
+      assertTrue("First element doesn't exist.", it.hasNext());
+      assertTrue("First element isn't correct", it.next().equals("foo"));
+      assertFalse("More elements than expected", it.hasNext());
+      assertTrue("Nonexistent element is really null", it.next() == null);
+    } finally {
+      if  (indexReader != null) { indexReader.close(); }
+    }
+  }
+
+  public void testFieldContents_1() throws IOException {
+    try {
+      indexReader = IndexReader.open(store);
+
+      ld = new LuceneDictionary(indexReader, "contents");
+      it = ld.getWordsIterator();
+
+      assertTrue("First element doesn't exist.", it.hasNext());
+      assertTrue("First element isn't correct", it.next().equals("Jerry"));
+      assertTrue("Second element doesn't exist.", it.hasNext());
+      assertTrue("Second element isn't correct", it.next().equals("Tom"));
+      assertFalse("More elements than expected", it.hasNext());
+      assertTrue("Nonexistent element is really null", it.next() == null);
+
+      ld = new LuceneDictionary(indexReader, "contents");
+      it = ld.getWordsIterator();
+
+      int counter = 2;
+      while (it.hasNext()) {
+        it.next();
+        counter--;
+      }
+
+      assertTrue("Number of words incorrect", counter == 0);
+    }
+    finally {
+      if  (indexReader != null) { indexReader.close(); }
+    }
+  }
+
+  public void testFieldContents_2() throws IOException {
+    try {
+      indexReader = IndexReader.open(store);
+
+      ld = new LuceneDictionary(indexReader, "contents");
+      it = ld.getWordsIterator();
+
+      // hasNext() should have no side effects
+      assertTrue("First element isn't were it should be.", it.hasNext());
+      assertTrue("First element isn't were it should be.", it.hasNext());
+      assertTrue("First element isn't were it should be.", it.hasNext());
+
+      // just iterate through words
+      assertTrue("First element isn't correct", it.next().equals("Jerry"));
+      assertTrue("Second element isn't correct", it.next().equals("Tom"));
+      assertTrue("Nonexistent element is really null", it.next() == null);
+
+      // hasNext() should still have no side effects ...
+      assertFalse("There should be any more elements", it.hasNext());
+      assertFalse("There should be any more elements", it.hasNext());
+      assertFalse("There should be any more elements", it.hasNext());
+
+      // .. and there are really no more words
+      assertTrue("Nonexistent element is really null", it.next() == null);
+      assertTrue("Nonexistent element is really null", it.next() == null);
+      assertTrue("Nonexistent element is really null", it.next() == null);
+    }
+    finally {
+      if  (indexReader != null) { indexReader.close(); }
+    }
+  }
+
+  public void testFieldZzz() throws IOException {
+    try {
+      indexReader = IndexReader.open(store);
+
+      ld = new LuceneDictionary(indexReader, "zzz");
+      it = ld.getWordsIterator();
+
+      assertTrue("First element doesn't exist.", it.hasNext());
+      assertTrue("First element isn't correct", it.next().equals("bar"));
+      assertFalse("More elements than expected", it.hasNext());
+      assertTrue("Nonexistent element is really null", it.next() == null);
+    }
+    finally {
+      if  (indexReader != null) { indexReader.close(); }
+    }
+  }
+  
+  public void testSpellchecker() throws IOException {
+    SpellChecker sc = new SpellChecker(new RAMDirectory());
+    indexReader = IndexReader.open(store);
+    sc.indexDictionary(new LuceneDictionary(indexReader, "contents"));
+    String[] suggestions = sc.suggestSimilar("Tam", 1);
+    assertEquals(1, suggestions.length);
+    assertEquals("Tom", suggestions[0]);
+    suggestions = sc.suggestSimilar("Jarry", 1);
+    assertEquals(1, suggestions.length);
+    assertEquals("Jerry", suggestions[0]);
+    indexReader.close();
+  }
+  
+}