LUCENE-763: LuceneDictionary skips first word in enumeration

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@543220 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Daniel Naber 2007-05-31 19:13:36 +00:00
parent 42c3d11f07
commit 6e98fa61d4
3 changed files with 231 additions and 16 deletions

View File

@ -150,6 +150,9 @@ Bug fixes
19. LUCENE-698: FilteredQuery now takes the query boost into account for 19. LUCENE-698: FilteredQuery now takes the query boost into account for
scoring. (Michael Busch) scoring. (Michael Busch)
20. LUCENE-763: Spellchecker: LuceneDictionary used to skip first word in
enumeration. (Christian Mallwitz via Daniel Naber)
New features New features
1. LUCENE-759: Added two n-gram-producing TokenFilters. 1. LUCENE-759: Added two n-gram-producing TokenFilters.

View File

@ -30,7 +30,11 @@ import java.io.*;
* Lucene Dictionary: terms taken from the given field * Lucene Dictionary: terms taken from the given field
* of a Lucene index. * of a Lucene index.
* *
* When using IndexReader.terms(Term) the code must not call next() on TermEnum
* as the first call to TermEnum, see: http://issues.apache.org/jira/browse/LUCENE-6
*
* @author Nicolas Maisonneuve * @author Nicolas Maisonneuve
* @author Christian Mallwitz
*/ */
public class LuceneDictionary implements Dictionary { public class LuceneDictionary implements Dictionary {
private IndexReader reader; private IndexReader reader;
@ -64,6 +68,13 @@ public class LuceneDictionary implements Dictionary {
hasNext(); hasNext();
} }
hasNextCalled = false; hasNextCalled = false;
try {
termEnum.next();
} catch (IOException e) {
throw new RuntimeException(e);
}
return (actualTerm != null) ? actualTerm.text() : null; return (actualTerm != null) ? actualTerm.text() : null;
} }
@ -72,23 +83,23 @@ public class LuceneDictionary implements Dictionary {
return actualTerm != null; return actualTerm != null;
} }
hasNextCalled = true; hasNextCalled = true;
try {
// if there are no more words actualTerm = termEnum.term();
if (!termEnum.next()) {
actualTerm = null; // if there are no words return false
return false; if (actualTerm == null) {
} return false;
// if the next word is in the field
actualTerm = termEnum.term();
String currentField = actualTerm.field();
if (currentField != field) {
actualTerm = null;
return false;
}
return true;
} catch (IOException e) {
throw new RuntimeException(e);
} }
String currentField = actualTerm.field();
// if the next word doesn't have the same field return false
if (currentField != field) {
actualTerm = null;
return false;
}
return true;
} }
public void remove() { public void remove() {

View File

@ -0,0 +1,201 @@
package org.apache.lucene.search.spell;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Iterator;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
/**
* Test case for LuceneDictionary.
* It first creates a simple index and then a couple of instances of LuceneDictionary
* on different fields and checks if all the right text comes back.
*
* @author Christian Mallwitz
*/
public class TestLuceneDictionary extends TestCase {
private Directory store = new RAMDirectory();
private IndexReader indexReader = null;
private LuceneDictionary ld;
private Iterator it;
public void setUp() throws Exception {
IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true);
Document doc;
doc = new Document();
doc.add(new Field("aaa", "foo", Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("aaa", "foo", Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("contents", "Tom", Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("zzz", "bar", Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
writer.optimize();
writer.close();
}
public void testFieldNonExistent() throws IOException {
try {
indexReader = IndexReader.open(store);
ld = new LuceneDictionary(indexReader, "nonexistent_field");
it = ld.getWordsIterator();
assertFalse("More elements than expected", it.hasNext());
assertTrue("Nonexistent element is really null", it.next() == null);
} finally {
if (indexReader != null) { indexReader.close(); }
}
}
public void testFieldAaa() throws IOException {
try {
indexReader = IndexReader.open(store);
ld = new LuceneDictionary(indexReader, "aaa");
it = ld.getWordsIterator();
assertTrue("First element doesn't exist.", it.hasNext());
assertTrue("First element isn't correct", it.next().equals("foo"));
assertFalse("More elements than expected", it.hasNext());
assertTrue("Nonexistent element is really null", it.next() == null);
} finally {
if (indexReader != null) { indexReader.close(); }
}
}
public void testFieldContents_1() throws IOException {
try {
indexReader = IndexReader.open(store);
ld = new LuceneDictionary(indexReader, "contents");
it = ld.getWordsIterator();
assertTrue("First element doesn't exist.", it.hasNext());
assertTrue("First element isn't correct", it.next().equals("Jerry"));
assertTrue("Second element doesn't exist.", it.hasNext());
assertTrue("Second element isn't correct", it.next().equals("Tom"));
assertFalse("More elements than expected", it.hasNext());
assertTrue("Nonexistent element is really null", it.next() == null);
ld = new LuceneDictionary(indexReader, "contents");
it = ld.getWordsIterator();
int counter = 2;
while (it.hasNext()) {
it.next();
counter--;
}
assertTrue("Number of words incorrect", counter == 0);
}
finally {
if (indexReader != null) { indexReader.close(); }
}
}
public void testFieldContents_2() throws IOException {
try {
indexReader = IndexReader.open(store);
ld = new LuceneDictionary(indexReader, "contents");
it = ld.getWordsIterator();
// hasNext() should have no side effects
assertTrue("First element isn't were it should be.", it.hasNext());
assertTrue("First element isn't were it should be.", it.hasNext());
assertTrue("First element isn't were it should be.", it.hasNext());
// just iterate through words
assertTrue("First element isn't correct", it.next().equals("Jerry"));
assertTrue("Second element isn't correct", it.next().equals("Tom"));
assertTrue("Nonexistent element is really null", it.next() == null);
// hasNext() should still have no side effects ...
assertFalse("There should be any more elements", it.hasNext());
assertFalse("There should be any more elements", it.hasNext());
assertFalse("There should be any more elements", it.hasNext());
// .. and there are really no more words
assertTrue("Nonexistent element is really null", it.next() == null);
assertTrue("Nonexistent element is really null", it.next() == null);
assertTrue("Nonexistent element is really null", it.next() == null);
}
finally {
if (indexReader != null) { indexReader.close(); }
}
}
public void testFieldZzz() throws IOException {
try {
indexReader = IndexReader.open(store);
ld = new LuceneDictionary(indexReader, "zzz");
it = ld.getWordsIterator();
assertTrue("First element doesn't exist.", it.hasNext());
assertTrue("First element isn't correct", it.next().equals("bar"));
assertFalse("More elements than expected", it.hasNext());
assertTrue("Nonexistent element is really null", it.next() == null);
}
finally {
if (indexReader != null) { indexReader.close(); }
}
}
public void testSpellchecker() throws IOException {
SpellChecker sc = new SpellChecker(new RAMDirectory());
indexReader = IndexReader.open(store);
sc.indexDictionary(new LuceneDictionary(indexReader, "contents"));
String[] suggestions = sc.suggestSimilar("Tam", 1);
assertEquals(1, suggestions.length);
assertEquals("Tom", suggestions[0]);
suggestions = sc.suggestSimilar("Jarry", 1);
assertEquals(1, suggestions.length);
assertEquals("Jerry", suggestions[0]);
indexReader.close();
}
}