mirror of https://github.com/apache/lucene.git
LUCENE-2108: Enable safe concurrent spell-index modifications in Spellchecker
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@887532 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fa65d42e94
commit
ab447b6af0
|
@ -17,6 +17,12 @@ API Changes
|
|||
|
||||
New features
|
||||
|
||||
* LUCENE-2108: Spellchecker now safely supports concurrent modifications to
|
||||
the spell-index. Threads can safely obtain term suggestions while the spell-
|
||||
index is rebuild, cleared or reset. Internal IndexSearcher instances remain
|
||||
open until the last thread accessing them releases the reference.
|
||||
(Simon Willnauer)
|
||||
|
||||
* LUCENE-2067: Add a Czech light stemmer. CzechAnalyzer will now stem words
|
||||
when Version is set to 3.1 or higher. (Robert Muir)
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
/**
|
||||
|
@ -60,10 +61,14 @@ public class SpellChecker {
|
|||
* Field name for each word in the ngram index.
|
||||
*/
|
||||
public static final String F_WORD = "word";
|
||||
|
||||
private static final Term F_WORD_TERM = new Term(F_WORD);
|
||||
|
||||
/**
|
||||
* the spell index
|
||||
*/
|
||||
// don't modify the directory directly - see #swapSearcher()
|
||||
// TODO: why is this package private?
|
||||
Directory spellIndex;
|
||||
|
||||
/**
|
||||
|
@ -72,7 +77,22 @@ public class SpellChecker {
|
|||
private float bStart = 2.0f;
|
||||
private float bEnd = 1.0f;
|
||||
|
||||
// don't use this searcher directly - see #swapSearcher()
|
||||
private IndexSearcher searcher;
|
||||
|
||||
/*
|
||||
* this locks all modifications to the current searcher.
|
||||
*/
|
||||
private final Object searcherLock = new Object();
|
||||
|
||||
/*
|
||||
* this lock synchronizes all possible modifications to the
|
||||
* current index directory. It should not be possible to try modifying
|
||||
* the same index concurrently. Note: Do not acquire the searcher lock
|
||||
* before acquiring this lock!
|
||||
*/
|
||||
private final Object modifyCurrentIndexLock = new Object();
|
||||
private volatile boolean closed = false;
|
||||
|
||||
// minimum score for hits generated by the spell checker query
|
||||
private float minScore = 0.5f;
|
||||
|
@ -82,15 +102,24 @@ public class SpellChecker {
|
|||
/**
|
||||
* Use the given directory as a spell checker index. The directory
|
||||
* is created if it doesn't exist yet.
|
||||
* @param spellIndex the spell index directory
|
||||
* @param sd the {@link StringDistance} measurement to use
|
||||
* @throws IOException if Spellchecker can not open the directory
|
||||
*/
|
||||
public SpellChecker(Directory spellIndex, StringDistance sd) throws IOException {
|
||||
setSpellIndex(spellIndex);
|
||||
setStringDistance(sd);
|
||||
}
|
||||
/**
|
||||
* Use the given directory as a spell checker index with a
|
||||
* {@link LevensteinDistance} as the default {@link StringDistance}. The
|
||||
* directory is created if it doesn't exist yet.
|
||||
*
|
||||
* @param spellIndex
|
||||
* the spell index directory
|
||||
* @throws IOException
|
||||
* if spellchecker can not open the directory
|
||||
*/
|
||||
public SpellChecker(Directory spellIndex,StringDistance sd) throws IOException {
|
||||
this.setSpellIndex(spellIndex);
|
||||
this.setStringDistance(sd);
|
||||
}
|
||||
|
||||
public SpellChecker(Directory spellIndex) throws IOException {
|
||||
this(spellIndex, new LevensteinDistance());
|
||||
}
|
||||
|
@ -99,27 +128,41 @@ public class SpellChecker {
|
|||
* Use a different index as the spell checker index or re-open
|
||||
* the existing index if <code>spellIndex</code> is the same value
|
||||
* as given in the constructor.
|
||||
*
|
||||
* @param spellIndex
|
||||
* @throws IOException
|
||||
* @param spellIndexDir the spell directory to use
|
||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||
* @throws IOException if spellchecker can not open the directory
|
||||
*/
|
||||
public void setSpellIndex(Directory spellIndex) throws IOException {
|
||||
this.spellIndex = spellIndex;
|
||||
if (!IndexReader.indexExists(spellIndex)) {
|
||||
IndexWriter writer = new IndexWriter(spellIndex, null, true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
writer.close();
|
||||
// TODO: we should make this final as it is called in the constructor
|
||||
public void setSpellIndex(Directory spellIndexDir) throws IOException {
|
||||
// this could be the same directory as the current spellIndex
|
||||
// modifications to the directory should be synchronized
|
||||
synchronized (modifyCurrentIndexLock) {
|
||||
ensureOpen();
|
||||
if (!IndexReader.indexExists(spellIndexDir)) {
|
||||
IndexWriter writer = new IndexWriter(spellIndexDir, null, true,
|
||||
IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
writer.close();
|
||||
}
|
||||
swapSearcher(spellIndexDir);
|
||||
}
|
||||
// close the old searcher, if there was one
|
||||
if (searcher != null) {
|
||||
searcher.close();
|
||||
}
|
||||
searcher = new IndexSearcher(this.spellIndex, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the {@link StringDistance} implementation for this
|
||||
* {@link SpellChecker} instance.
|
||||
*
|
||||
* @param sd the {@link StringDistance} implementation for this
|
||||
* {@link SpellChecker} instance
|
||||
*/
|
||||
public void setStringDistance(StringDistance sd) {
|
||||
this.sd = sd;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link StringDistance} instance used by this
|
||||
* {@link SpellChecker} instance.
|
||||
*
|
||||
* @return the {@link StringDistance} instance used by this
|
||||
* {@link SpellChecker} instance.
|
||||
*/
|
||||
public StringDistance getStringDistance() {
|
||||
return sd;
|
||||
}
|
||||
|
@ -144,7 +187,8 @@ public class SpellChecker {
|
|||
*
|
||||
* @param word the word you want a spell check done on
|
||||
* @param numSug the number of suggested words
|
||||
* @throws IOException
|
||||
* @throws IOException if the underlying index throws an {@link IOException}
|
||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||
* @return String[]
|
||||
*/
|
||||
public String[] suggestSimilar(String word, int numSug) throws IOException {
|
||||
|
@ -169,98 +213,104 @@ public class SpellChecker {
|
|||
* words are restricted to the words present in this field.
|
||||
* @param morePopular return only the suggest words that are as frequent or more frequent than the searched word
|
||||
* (only if restricted mode = (indexReader!=null and field!=null)
|
||||
* @throws IOException
|
||||
* @throws IOException if the underlying index throws an {@link IOException}
|
||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||
* @return String[] the sorted list of the suggest words with these 2 criteria:
|
||||
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity
|
||||
* of the suggest words in the field of the user index
|
||||
*/
|
||||
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
|
||||
String field, boolean morePopular) throws IOException {
|
||||
|
||||
float min = this.minScore;
|
||||
final int lengthWord = word.length();
|
||||
|
||||
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
|
||||
final int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
|
||||
// if the word exists in the real index and we don't care for word frequency, return the word itself
|
||||
if (!morePopular && freq > 0) {
|
||||
return new String[] { word };
|
||||
}
|
||||
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
String[] grams;
|
||||
String key;
|
||||
|
||||
for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) {
|
||||
|
||||
key = "gram" + ng; // form key
|
||||
|
||||
grams = formGrams(word, ng); // form word into ngrams (allow dups too)
|
||||
|
||||
if (grams.length == 0) {
|
||||
continue; // hmm
|
||||
// obtainSearcher calls ensureOpen
|
||||
final IndexSearcher indexSearcher = obtainSearcher();
|
||||
try{
|
||||
float min = this.minScore;
|
||||
final int lengthWord = word.length();
|
||||
|
||||
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
|
||||
final int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
|
||||
// if the word exists in the real index and we don't care for word frequency, return the word itself
|
||||
if (!morePopular && freq > 0) {
|
||||
return new String[] { word };
|
||||
}
|
||||
|
||||
if (bStart > 0) { // should we boost prefixes?
|
||||
add(query, "start" + ng, grams[0], bStart); // matches start of word
|
||||
|
||||
}
|
||||
if (bEnd > 0) { // should we boost suffixes
|
||||
add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word
|
||||
|
||||
}
|
||||
for (int i = 0; i < grams.length; i++) {
|
||||
add(query, key, grams[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int maxHits = 10 * numSug;
|
||||
|
||||
// System.out.println("Q: " + query);
|
||||
ScoreDoc[] hits = searcher.search(query, null, maxHits).scoreDocs;
|
||||
// System.out.println("HITS: " + hits.length());
|
||||
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
|
||||
|
||||
// go thru more than 'maxr' matches in case the distance filter triggers
|
||||
int stop = Math.min(hits.length, maxHits);
|
||||
SuggestWord sugWord = new SuggestWord();
|
||||
for (int i = 0; i < stop; i++) {
|
||||
|
||||
sugWord.string = searcher.doc(hits[i].doc).get(F_WORD); // get orig word
|
||||
|
||||
// don't suggest a word for itself, that would be silly
|
||||
if (sugWord.string.equals(word)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// edit distance
|
||||
sugWord.score = sd.getDistance(word,sugWord.string);
|
||||
if (sugWord.score < min) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ir != null && field != null) { // use the user index
|
||||
sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
|
||||
// don't suggest a word that is not present in the field
|
||||
if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1) {
|
||||
continue;
|
||||
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
String[] grams;
|
||||
String key;
|
||||
|
||||
for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) {
|
||||
|
||||
key = "gram" + ng; // form key
|
||||
|
||||
grams = formGrams(word, ng); // form word into ngrams (allow dups too)
|
||||
|
||||
if (grams.length == 0) {
|
||||
continue; // hmm
|
||||
}
|
||||
|
||||
if (bStart > 0) { // should we boost prefixes?
|
||||
add(query, "start" + ng, grams[0], bStart); // matches start of word
|
||||
|
||||
}
|
||||
if (bEnd > 0) { // should we boost suffixes
|
||||
add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word
|
||||
|
||||
}
|
||||
for (int i = 0; i < grams.length; i++) {
|
||||
add(query, key, grams[i]);
|
||||
}
|
||||
}
|
||||
sugQueue.insertWithOverflow(sugWord);
|
||||
if (sugQueue.size() == numSug) {
|
||||
// if queue full, maintain the minScore score
|
||||
min = sugQueue.top().score;
|
||||
|
||||
int maxHits = 10 * numSug;
|
||||
|
||||
// System.out.println("Q: " + query);
|
||||
ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs;
|
||||
// System.out.println("HITS: " + hits.length());
|
||||
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
|
||||
|
||||
// go thru more than 'maxr' matches in case the distance filter triggers
|
||||
int stop = Math.min(hits.length, maxHits);
|
||||
SuggestWord sugWord = new SuggestWord();
|
||||
for (int i = 0; i < stop; i++) {
|
||||
|
||||
sugWord.string = indexSearcher.doc(hits[i].doc).get(F_WORD); // get orig word
|
||||
|
||||
// don't suggest a word for itself, that would be silly
|
||||
if (sugWord.string.equals(word)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// edit distance
|
||||
sugWord.score = sd.getDistance(word,sugWord.string);
|
||||
if (sugWord.score < min) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ir != null && field != null) { // use the user index
|
||||
sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
|
||||
// don't suggest a word that is not present in the field
|
||||
if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
sugQueue.insertWithOverflow(sugWord);
|
||||
if (sugQueue.size() == numSug) {
|
||||
// if queue full, maintain the minScore score
|
||||
min = sugQueue.top().score;
|
||||
}
|
||||
sugWord = new SuggestWord();
|
||||
}
|
||||
sugWord = new SuggestWord();
|
||||
|
||||
// convert to array string
|
||||
String[] list = new String[sugQueue.size()];
|
||||
for (int i = sugQueue.size() - 1; i >= 0; i--) {
|
||||
list[i] = sugQueue.pop().string;
|
||||
}
|
||||
|
||||
return list;
|
||||
} finally {
|
||||
releaseSearcher(indexSearcher);
|
||||
}
|
||||
|
||||
// convert to array string
|
||||
String[] list = new String[sugQueue.size()];
|
||||
for (int i = sugQueue.size() - 1; i >= 0; i--) {
|
||||
list[i] = sugQueue.pop().string;
|
||||
}
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -297,24 +347,33 @@ public class SpellChecker {
|
|||
/**
|
||||
* Removes all terms from the spell check index.
|
||||
* @throws IOException
|
||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||
*/
|
||||
public void clearIndex() throws IOException {
|
||||
IndexWriter writer = new IndexWriter(spellIndex, null, true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
writer.close();
|
||||
|
||||
//close the old searcher
|
||||
searcher.close();
|
||||
searcher = new IndexSearcher(this.spellIndex, true);
|
||||
synchronized (modifyCurrentIndexLock) {
|
||||
ensureOpen();
|
||||
final Directory dir = this.spellIndex;
|
||||
final IndexWriter writer = new IndexWriter(dir, null, true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
writer.close();
|
||||
swapSearcher(dir);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the word exists in the index.
|
||||
* @param word
|
||||
* @throws IOException
|
||||
* @return true iff the word exists in the index
|
||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||
* @return true if the word exists in the index
|
||||
*/
|
||||
public boolean exist(String word) throws IOException {
|
||||
return searcher.docFreq(new Term(F_WORD, word)) > 0;
|
||||
// obtainSearcher calls ensureOpen
|
||||
final IndexSearcher indexSearcher = obtainSearcher();
|
||||
try{
|
||||
return indexSearcher.docFreq(F_WORD_TERM.createTerm(word)) > 0;
|
||||
} finally {
|
||||
releaseSearcher(indexSearcher);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -322,37 +381,42 @@ public class SpellChecker {
|
|||
* @param dict Dictionary to index
|
||||
* @param mergeFactor mergeFactor to use when indexing
|
||||
* @param ramMB the max amount or memory in MB to use
|
||||
* @throws AlreadyClosedException if the Spellchecker is already closed
|
||||
* @throws IOException
|
||||
*/
|
||||
public void indexDictionary(Dictionary dict, int mergeFactor, int ramMB) throws IOException {
|
||||
IndexWriter writer = new IndexWriter(spellIndex, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
writer.setMergeFactor(mergeFactor);
|
||||
writer.setRAMBufferSizeMB(ramMB);
|
||||
|
||||
Iterator<String> iter = dict.getWordsIterator();
|
||||
while (iter.hasNext()) {
|
||||
String word = iter.next();
|
||||
|
||||
int len = word.length();
|
||||
if (len < 3) {
|
||||
continue; // too short we bail but "too long" is fine...
|
||||
synchronized (modifyCurrentIndexLock) {
|
||||
ensureOpen();
|
||||
final Directory dir = this.spellIndex;
|
||||
final IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(),
|
||||
IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
writer.setMergeFactor(mergeFactor);
|
||||
writer.setRAMBufferSizeMB(ramMB);
|
||||
|
||||
Iterator<String> iter = dict.getWordsIterator();
|
||||
while (iter.hasNext()) {
|
||||
String word = iter.next();
|
||||
|
||||
int len = word.length();
|
||||
if (len < 3) {
|
||||
continue; // too short we bail but "too long" is fine...
|
||||
}
|
||||
|
||||
if (this.exist(word)) { // if the word already exist in the gramindex
|
||||
continue;
|
||||
}
|
||||
|
||||
// ok index the word
|
||||
Document doc = createDocument(word, getMin(len), getMax(len));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
if (this.exist(word)) { // if the word already exist in the gramindex
|
||||
continue;
|
||||
}
|
||||
|
||||
// ok index the word
|
||||
Document doc = createDocument(word, getMin(len), getMax(len));
|
||||
writer.addDocument(doc);
|
||||
// close writer
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
// also re-open the spell index to see our own changes when the next suggestion
|
||||
// is fetched:
|
||||
swapSearcher(dir);
|
||||
}
|
||||
// close writer
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
// also re-open the spell index to see our own changes when the next suggestion
|
||||
// is fetched:
|
||||
searcher.close();
|
||||
searcher = new IndexSearcher(this.spellIndex, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -364,7 +428,7 @@ public class SpellChecker {
|
|||
indexDictionary(dict, 300, 10);
|
||||
}
|
||||
|
||||
private int getMin(int l) {
|
||||
private static int getMin(int l) {
|
||||
if (l > 5) {
|
||||
return 3;
|
||||
}
|
||||
|
@ -374,7 +438,7 @@ public class SpellChecker {
|
|||
return 1;
|
||||
}
|
||||
|
||||
private int getMax(int l) {
|
||||
private static int getMax(int l) {
|
||||
if (l > 5) {
|
||||
return 4;
|
||||
}
|
||||
|
@ -409,12 +473,84 @@ public class SpellChecker {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private IndexSearcher obtainSearcher() {
|
||||
synchronized (searcherLock) {
|
||||
ensureOpen();
|
||||
searcher.getIndexReader().incRef();
|
||||
return searcher;
|
||||
}
|
||||
}
|
||||
|
||||
private void releaseSearcher(final IndexSearcher aSearcher) throws IOException{
|
||||
// don't check if open - always decRef
|
||||
// don't decrement the private searcher - could have been swapped
|
||||
aSearcher.getIndexReader().decRef();
|
||||
}
|
||||
|
||||
private void ensureOpen() {
|
||||
if (closed) {
|
||||
throw new AlreadyClosedException("Spellchecker has been closed");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the IndexSearcher used by this SpellChecker.
|
||||
* Close the IndexSearcher used by this SpellChecker
|
||||
* @throws IOException if the close operation causes an {@link IOException}
|
||||
* @throws AlreadyClosedException if the {@link SpellChecker} is already closed
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
searcher.close();
|
||||
searcher = null;
|
||||
synchronized (searcherLock) {
|
||||
ensureOpen();
|
||||
closed = true;
|
||||
if (searcher != null) {
|
||||
searcher.close();
|
||||
}
|
||||
searcher = null;
|
||||
}
|
||||
}
|
||||
|
||||
private void swapSearcher(final Directory dir) throws IOException {
|
||||
/*
|
||||
* opening a searcher is possibly very expensive.
|
||||
* We rather close it again if the Spellchecker was closed during
|
||||
* this operation than block access to the current searcher while opening.
|
||||
*/
|
||||
final IndexSearcher indexSearcher = createSearcher(dir);
|
||||
synchronized (searcherLock) {
|
||||
if(closed){
|
||||
indexSearcher.close();
|
||||
throw new AlreadyClosedException("Spellchecker has been closed");
|
||||
}
|
||||
if (searcher != null) {
|
||||
searcher.close();
|
||||
}
|
||||
// set the spellindex in the sync block - ensure consistency.
|
||||
searcher = indexSearcher;
|
||||
this.spellIndex = dir;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new read-only IndexSearcher
|
||||
* @param dir the directory used to open the searcher
|
||||
* @return a new read-only IndexSearcher
|
||||
* @throws IOException f there is a low-level IO error
|
||||
*/
|
||||
// for testing purposes
|
||||
IndexSearcher createSearcher(final Directory dir) throws IOException{
|
||||
return new IndexSearcher(dir, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> if and only if the {@link SpellChecker} is
|
||||
* closed, otherwise <code>false</code>.
|
||||
*
|
||||
* @return <code>true</code> if and only if the {@link SpellChecker} is
|
||||
* closed, otherwise <code>false</code>.
|
||||
*/
|
||||
boolean isClosed(){
|
||||
return closed;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -18,8 +18,13 @@ package org.apache.lucene.search.spell;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -27,9 +32,12 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -37,9 +45,11 @@ import org.apache.lucene.util.English;
|
|||
*
|
||||
*
|
||||
*/
|
||||
public class TestSpellChecker extends TestCase {
|
||||
private SpellChecker spellChecker;
|
||||
public class TestSpellChecker extends LuceneTestCase {
|
||||
private SpellCheckerMock spellChecker;
|
||||
private Directory userindex, spellindex;
|
||||
private final Random random = newRandom();
|
||||
private List<IndexSearcher> searchers;
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
|
@ -56,10 +66,10 @@ public class TestSpellChecker extends TestCase {
|
|||
writer.addDocument(doc);
|
||||
}
|
||||
writer.close();
|
||||
|
||||
searchers = Collections.synchronizedList(new ArrayList<IndexSearcher>());
|
||||
// create the spellChecker
|
||||
spellindex = new RAMDirectory();
|
||||
spellChecker = new SpellChecker(spellindex);
|
||||
spellChecker = new SpellCheckerMock(spellindex);
|
||||
}
|
||||
|
||||
|
||||
|
@ -75,7 +85,9 @@ public class TestSpellChecker extends TestCase {
|
|||
int num_field2 = this.numdoc();
|
||||
|
||||
assertEquals(num_field2, num_field1 + 1);
|
||||
|
||||
|
||||
assertLastSearcherOpen(4);
|
||||
|
||||
checkCommonSuggestions(r);
|
||||
checkLevenshteinSuggestions(r);
|
||||
|
||||
|
@ -201,4 +213,186 @@ public class TestSpellChecker extends TestCase {
|
|||
return num;
|
||||
}
|
||||
|
||||
public void testClose() throws IOException {
|
||||
IndexReader r = IndexReader.open(userindex, true);
|
||||
spellChecker.clearIndex();
|
||||
String field = "field1";
|
||||
addwords(r, "field1");
|
||||
int num_field1 = this.numdoc();
|
||||
addwords(r, "field2");
|
||||
int num_field2 = this.numdoc();
|
||||
assertEquals(num_field2, num_field1 + 1);
|
||||
checkCommonSuggestions(r);
|
||||
assertLastSearcherOpen(4);
|
||||
spellChecker.close();
|
||||
assertSearchersClosed();
|
||||
try {
|
||||
spellChecker.close();
|
||||
fail("spellchecker was already closed");
|
||||
} catch (AlreadyClosedException e) {
|
||||
// expected
|
||||
}
|
||||
try {
|
||||
checkCommonSuggestions(r);
|
||||
fail("spellchecker was already closed");
|
||||
} catch (AlreadyClosedException e) {
|
||||
// expected
|
||||
}
|
||||
|
||||
try {
|
||||
spellChecker.clearIndex();
|
||||
fail("spellchecker was already closed");
|
||||
} catch (AlreadyClosedException e) {
|
||||
// expected
|
||||
}
|
||||
|
||||
try {
|
||||
spellChecker.indexDictionary(new LuceneDictionary(r, field));
|
||||
fail("spellchecker was already closed");
|
||||
} catch (AlreadyClosedException e) {
|
||||
// expected
|
||||
}
|
||||
|
||||
try {
|
||||
spellChecker.setSpellIndex(spellindex);
|
||||
fail("spellchecker was already closed");
|
||||
} catch (AlreadyClosedException e) {
|
||||
// expected
|
||||
}
|
||||
assertEquals(4, searchers.size());
|
||||
assertSearchersClosed();
|
||||
}
|
||||
|
||||
/*
|
||||
* tests if the internally shared indexsearcher is correctly closed
|
||||
* when the spellchecker is concurrently accessed and closed.
|
||||
*/
|
||||
public void testConcurrentAccess() throws IOException, InterruptedException {
|
||||
assertEquals(1, searchers.size());
|
||||
final IndexReader r = IndexReader.open(userindex, true);
|
||||
spellChecker.clearIndex();
|
||||
assertEquals(2, searchers.size());
|
||||
addwords(r, "field1");
|
||||
assertEquals(3, searchers.size());
|
||||
int num_field1 = this.numdoc();
|
||||
addwords(r, "field2");
|
||||
assertEquals(4, searchers.size());
|
||||
int num_field2 = this.numdoc();
|
||||
assertEquals(num_field2, num_field1 + 1);
|
||||
int numThreads = 5 + this.random.nextInt(5);
|
||||
ExecutorService executor = Executors.newFixedThreadPool(numThreads);
|
||||
SpellCheckWorker[] workers = new SpellCheckWorker[numThreads];
|
||||
for (int i = 0; i < numThreads; i++) {
|
||||
SpellCheckWorker spellCheckWorker = new SpellCheckWorker(r);
|
||||
executor.execute(spellCheckWorker);
|
||||
workers[i] = spellCheckWorker;
|
||||
|
||||
}
|
||||
int iterations = 5 + random.nextInt(5);
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
Thread.sleep(100);
|
||||
// concurrently reset the spell index
|
||||
spellChecker.setSpellIndex(this.spellindex);
|
||||
// for debug - prints the internal open searchers
|
||||
// showSearchersOpen();
|
||||
}
|
||||
|
||||
spellChecker.close();
|
||||
executor.shutdown();
|
||||
executor.awaitTermination(5, TimeUnit.SECONDS);
|
||||
|
||||
|
||||
for (int i = 0; i < workers.length; i++) {
|
||||
assertFalse(workers[i].failed);
|
||||
assertTrue(workers[i].terminated);
|
||||
}
|
||||
// 4 searchers more than iterations
|
||||
// 1. at creation
|
||||
// 2. clearIndex()
|
||||
// 2. and 3. during addwords
|
||||
assertEquals(iterations + 4, searchers.size());
|
||||
assertSearchersClosed();
|
||||
|
||||
}
|
||||
|
||||
private void assertLastSearcherOpen(int numSearchers) {
|
||||
assertEquals(numSearchers, searchers.size());
|
||||
IndexSearcher[] searcherArray = searchers.toArray(new IndexSearcher[0]);
|
||||
for (int i = 0; i < searcherArray.length; i++) {
|
||||
if (i == searcherArray.length - 1) {
|
||||
assertTrue("expected last searcher open but was closed",
|
||||
searcherArray[i].getIndexReader().getRefCount() > 0);
|
||||
} else {
|
||||
assertFalse("expected closed searcher but was open - Index: " + i,
|
||||
searcherArray[i].getIndexReader().getRefCount() > 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void assertSearchersClosed() {
|
||||
for (IndexSearcher searcher : searchers) {
|
||||
assertEquals(0, searcher.getIndexReader().getRefCount());
|
||||
}
|
||||
}
|
||||
|
||||
private void showSearchersOpen() {
|
||||
int count = 0;
|
||||
for (IndexSearcher searcher : searchers) {
|
||||
if(searcher.getIndexReader().getRefCount() > 0)
|
||||
++count;
|
||||
}
|
||||
System.out.println(count);
|
||||
}
|
||||
|
||||
|
||||
private class SpellCheckWorker implements Runnable {
|
||||
private final IndexReader reader;
|
||||
boolean terminated = false;
|
||||
boolean failed = false;
|
||||
|
||||
SpellCheckWorker(IndexReader reader) {
|
||||
super();
|
||||
this.reader = reader;
|
||||
}
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
while (true) {
|
||||
try {
|
||||
checkCommonSuggestions(reader);
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
return;
|
||||
} catch (Throwable e) {
|
||||
|
||||
e.printStackTrace();
|
||||
failed = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
terminated = true;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class SpellCheckerMock extends SpellChecker {
|
||||
public SpellCheckerMock(Directory spellIndex) throws IOException {
|
||||
super(spellIndex);
|
||||
}
|
||||
|
||||
public SpellCheckerMock(Directory spellIndex, StringDistance sd)
|
||||
throws IOException {
|
||||
super(spellIndex, sd);
|
||||
}
|
||||
|
||||
@Override
|
||||
IndexSearcher createSearcher(Directory dir) throws IOException {
|
||||
IndexSearcher searcher = super.createSearcher(dir);
|
||||
TestSpellChecker.this.searchers.add(searcher);
|
||||
return searcher;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue