LUCENE-763: LuceneDictionary skips first word in enumeration

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@543220 13f79535-47bb-0310-9956-ffa450edef68
2007-05-31 19:13:36 +00:00 · 2007-05-31 19:13:36 +00:00 · 6e98fa61d4
parent 42c3d11f07
commit 6e98fa61d4
3 changed files with 231 additions and 16 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -150,6 +150,9 @@ Bug fixes
 19. LUCENE-698: FilteredQuery now takes the query boost into account for 
    scoring. (Michael Busch)
 20. LUCENE-763: Spellchecker: LuceneDictionary used to skip first word in 
    enumeration. (Christian Mallwitz via Daniel Naber)
 New features
 1. LUCENE-759: Added two n-gram-producing TokenFilters.
--- a/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
+++ b/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
@ -30,7 +30,11 @@ import java.io.*;
 * Lucene Dictionary: terms taken from the given field
 * of a Lucene index.
 *
 * When using IndexReader.terms(Term) the code must not call next() on TermEnum
 * as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
 *
 * @author Nicolas Maisonneuve
 * @author Christian Mallwitz
 */
 public class LuceneDictionary implements Dictionary {
  private IndexReader reader;
@ -64,6 +68,13 @@ public class LuceneDictionary implements Dictionary {
        hasNext();
      }
      hasNextCalled = false;
      try {
        termEnum.next();
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
      return (actualTerm != null) ? actualTerm.text() : null;
    }
@ -72,23 +83,23 @@ public class LuceneDictionary implements Dictionary {
        return actualTerm != null;
      }
      hasNextCalled = true;
-      try {
+
-        // if there are no more words
+      actualTerm = termEnum.term();
-        if (!termEnum.next()) {
+
-          actualTerm = null;
+      // if there are no words return false
-          return false;
+      if (actualTerm == null) {
-        }
+        return false;
        // if the next word is in the field
        actualTerm = termEnum.term();
        String currentField = actualTerm.field();
        if (currentField != field) {
          actualTerm = null;
          return false;
        }
        return true;
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
      String currentField = actualTerm.field();
      // if the next word doesn't have the same field return false
      if (currentField != field) {
        actualTerm = null;
        return false;
      }
      return true;
    }
    public void remove() {
--- a/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java
+++ b/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java
@ -0,0 +1,201 @@
 package org.apache.lucene.search.spell;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.Iterator;
 import junit.framework.TestCase;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
 /**
 * Test case for LuceneDictionary.
 * It first creates a simple index and then a couple of instances of LuceneDictionary
 * on different fields and checks if all the right text comes back.
 *
 * @author Christian Mallwitz
 */
 public class TestLuceneDictionary extends TestCase {
  private Directory store = new RAMDirectory();
  private IndexReader indexReader = null;
  private LuceneDictionary ld;
  private Iterator it;
  public void setUp() throws Exception {
    IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true);
    Document doc;
    doc = new  Document();
    doc.add(new Field("aaa", "foo", Field.Store.YES, Field.Index.TOKENIZED));
    writer.addDocument(doc);
    doc = new  Document();
    doc.add(new Field("aaa", "foo", Field.Store.YES, Field.Index.TOKENIZED));
    writer.addDocument(doc);
    doc = new  Document();
    doc.add(new  Field("contents", "Tom", Field.Store.YES, Field.Index.TOKENIZED));
    writer.addDocument(doc);
    doc = new  Document();
    doc.add(new  Field("contents", "Jerry", Field.Store.YES, Field.Index.TOKENIZED));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("zzz", "bar", Field.Store.YES, Field.Index.TOKENIZED));
    writer.addDocument(doc);
    writer.optimize();
    writer.close();
  }
  public void testFieldNonExistent() throws IOException {
    try {
      indexReader = IndexReader.open(store);
      ld = new LuceneDictionary(indexReader, "nonexistent_field");
      it = ld.getWordsIterator();
      assertFalse("More elements than expected", it.hasNext());
      assertTrue("Nonexistent element is really null", it.next() == null);
    } finally {
      if  (indexReader != null) { indexReader.close(); }
    }
  }
  public void testFieldAaa() throws IOException {
    try {
      indexReader = IndexReader.open(store);
      ld = new LuceneDictionary(indexReader, "aaa");
      it = ld.getWordsIterator();
      assertTrue("First element doesn't exist.", it.hasNext());
      assertTrue("First element isn't correct", it.next().equals("foo"));
      assertFalse("More elements than expected", it.hasNext());
      assertTrue("Nonexistent element is really null", it.next() == null);
    } finally {
      if  (indexReader != null) { indexReader.close(); }
    }
  }
  public void testFieldContents_1() throws IOException {
    try {
      indexReader = IndexReader.open(store);
      ld = new LuceneDictionary(indexReader, "contents");
      it = ld.getWordsIterator();
      assertTrue("First element doesn't exist.", it.hasNext());
      assertTrue("First element isn't correct", it.next().equals("Jerry"));
      assertTrue("Second element doesn't exist.", it.hasNext());
      assertTrue("Second element isn't correct", it.next().equals("Tom"));
      assertFalse("More elements than expected", it.hasNext());
      assertTrue("Nonexistent element is really null", it.next() == null);
      ld = new LuceneDictionary(indexReader, "contents");
      it = ld.getWordsIterator();
      int counter = 2;
      while (it.hasNext()) {
        it.next();
        counter--;
      }
      assertTrue("Number of words incorrect", counter == 0);
    }
    finally {
      if  (indexReader != null) { indexReader.close(); }
    }
  }
  public void testFieldContents_2() throws IOException {
    try {
      indexReader = IndexReader.open(store);
      ld = new LuceneDictionary(indexReader, "contents");
      it = ld.getWordsIterator();
      // hasNext() should have no side effects
      assertTrue("First element isn't were it should be.", it.hasNext());
      assertTrue("First element isn't were it should be.", it.hasNext());
      assertTrue("First element isn't were it should be.", it.hasNext());
      // just iterate through words
      assertTrue("First element isn't correct", it.next().equals("Jerry"));
      assertTrue("Second element isn't correct", it.next().equals("Tom"));
      assertTrue("Nonexistent element is really null", it.next() == null);
      // hasNext() should still have no side effects ...
      assertFalse("There should be any more elements", it.hasNext());
      assertFalse("There should be any more elements", it.hasNext());
      assertFalse("There should be any more elements", it.hasNext());
      // .. and there are really no more words
      assertTrue("Nonexistent element is really null", it.next() == null);
      assertTrue("Nonexistent element is really null", it.next() == null);
      assertTrue("Nonexistent element is really null", it.next() == null);
    }
    finally {
      if  (indexReader != null) { indexReader.close(); }
    }
  }
  public void testFieldZzz() throws IOException {
    try {
      indexReader = IndexReader.open(store);
      ld = new LuceneDictionary(indexReader, "zzz");
      it = ld.getWordsIterator();
      assertTrue("First element doesn't exist.", it.hasNext());
      assertTrue("First element isn't correct", it.next().equals("bar"));
      assertFalse("More elements than expected", it.hasNext());
      assertTrue("Nonexistent element is really null", it.next() == null);
    }
    finally {
      if  (indexReader != null) { indexReader.close(); }
    }
  }
  public void testSpellchecker() throws IOException {
    SpellChecker sc = new SpellChecker(new RAMDirectory());
    indexReader = IndexReader.open(store);
    sc.indexDictionary(new LuceneDictionary(indexReader, "contents"));
    String[] suggestions = sc.suggestSimilar("Tam", 1);
    assertEquals(1, suggestions.length);
    assertEquals("Tom", suggestions[0]);
    suggestions = sc.suggestSimilar("Jarry", 1);
    assertEquals(1, suggestions.length);
    assertEquals("Jerry", suggestions[0]);
    indexReader.close();
  }
 }