LUCENE-7564: AnalyzingInfixSuggester should close its IndexWriter by default at the end of build()

This commit is contained in:
Steve Rowe 2016-11-16 18:03:51 -05:00
parent e402a304bf
commit f9a0693bf9
3 changed files with 181 additions and 15 deletions

View File

@ -87,6 +87,9 @@ Improvements
* LUCENE-7524: Added more detailed explanation of how IDF is computed in * LUCENE-7524: Added more detailed explanation of how IDF is computed in
ClassicSimilarity and BM25Similarity. (Adrien Grand) ClassicSimilarity and BM25Similarity. (Adrien Grand)
* LUCENE-7564: AnalyzingInfixSuggester should close its IndexWriter by default
at the end of build(). (Steve Rowe)
* LUCENE-7526: Enhanced UnifiedHighlighter's passage relevancy for queries with * LUCENE-7526: Enhanced UnifiedHighlighter's passage relevancy for queries with
wildcards and sometimes just terms. Added shouldPreferPassageRelevancyOverSpeed() wildcards and sometimes just terms. Added shouldPreferPassageRelevancyOverSpeed()

View File

@ -129,9 +129,10 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
private final boolean highlight; private final boolean highlight;
private final boolean commitOnBuild; private final boolean commitOnBuild;
private final boolean closeIndexWriterOnBuild;
/** Used for ongoing NRT additions/updates. */ /** Used for ongoing NRT additions/updates. */
private IndexWriter writer; protected IndexWriter writer;
/** {@link IndexSearcher} used for lookups. */ /** {@link IndexSearcher} used for lookups. */
protected SearcherManager searcherMgr; protected SearcherManager searcherMgr;
@ -146,6 +147,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
/** Default higlighting option. */ /** Default higlighting option. */
public static final boolean DEFAULT_HIGHLIGHT = true; public static final boolean DEFAULT_HIGHLIGHT = true;
/** Default option to close the IndexWriter once the index has been built. */
protected final static boolean DEFAULT_CLOSE_INDEXWRITER_ON_BUILD = true;
/** How we sort the postings and search results. */ /** How we sort the postings and search results. */
private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true)); private static final Sort SORT = new Sort(new SortField("weight", SortField.Type.LONG, true));
@ -198,8 +202,34 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
* *
*/ */
public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars, public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars,
boolean commitOnBuild, boolean commitOnBuild,
boolean allTermsRequired, boolean highlight) throws IOException { boolean allTermsRequired, boolean highlight) throws IOException {
this(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild, allTermsRequired, highlight,
DEFAULT_CLOSE_INDEXWRITER_ON_BUILD);
}
/** Create a new instance, loading from a previously built
* AnalyzingInfixSuggester directory, if it exists. This directory must be
* private to the infix suggester (i.e., not an external
* Lucene index). Note that {@link #close}
* will also close the provided directory.
*
* @param minPrefixChars Minimum number of leading characters
* before PrefixQuery is used (default 4).
* Prefixes shorter than this are indexed as character
* ngrams (increasing index size but making lookups
* faster).
*
* @param commitOnBuild Call commit after the index has finished building. This would persist the
* suggester index to disk and future instances of this suggester can use this pre-built dictionary.
*
* @param allTermsRequired All terms in the suggest query must be matched.
* @param highlight Highlight suggest query in suggestions.
* @param closeIndexWriterOnBuild If true, the IndexWriter will be closed after the index has finished building.
*/
public AnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer, int minPrefixChars,
boolean commitOnBuild, boolean allTermsRequired,
boolean highlight, boolean closeIndexWriterOnBuild) throws IOException {
if (minPrefixChars < 0) { if (minPrefixChars < 0) {
throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars); throw new IllegalArgumentException("minPrefixChars must be >= 0; got: " + minPrefixChars);
@ -212,6 +242,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
this.commitOnBuild = commitOnBuild; this.commitOnBuild = commitOnBuild;
this.allTermsRequired = allTermsRequired; this.allTermsRequired = allTermsRequired;
this.highlight = highlight; this.highlight = highlight;
this.closeIndexWriterOnBuild = closeIndexWriterOnBuild;
if (DirectoryReader.indexExists(dir)) { if (DirectoryReader.indexExists(dir)) {
// Already built; open it: // Already built; open it:
@ -276,15 +307,22 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
} }
//System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec"); //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
if (commitOnBuild) { if (commitOnBuild || closeIndexWriterOnBuild) {
commit(); commit();
} }
searcherMgr = new SearcherManager(writer, null); searcherMgr = new SearcherManager(writer, null);
success = true; success = true;
} finally { } finally {
if (success == false && writer != null) { if (success) {
writer.rollback(); if (closeIndexWriterOnBuild) {
writer = null; writer.close();
writer = null;
}
} else { // failure
if (writer != null) {
writer.rollback();
writer = null;
}
} }
} }
} }
@ -294,9 +332,13 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
* @see IndexWriter#commit */ * @see IndexWriter#commit */
public void commit() throws IOException { public void commit() throws IOException {
if (writer == null) { if (writer == null) {
throw new IllegalStateException("Cannot commit on an closed writer. Add documents first"); if (searcherMgr == null || closeIndexWriterOnBuild == false) {
throw new IllegalStateException("Cannot commit on an closed writer. Add documents first");
}
// else no-op: writer was committed and closed after the index was built, so commit is unnecessary
} else {
writer.commit();
} }
writer.commit();
} }
private Analyzer getGramAnalyzer() { private Analyzer getGramAnalyzer() {
@ -321,13 +363,17 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
private synchronized void ensureOpen() throws IOException { private synchronized void ensureOpen() throws IOException {
if (writer == null) { if (writer == null) {
if (searcherMgr != null) { if (DirectoryReader.indexExists(dir)) {
searcherMgr.close(); // Already built; open it:
searcherMgr = null; writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.APPEND));
} else {
writer = new IndexWriter(dir, getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
} }
writer = new IndexWriter(dir, SearcherManager oldSearcherMgr = searcherMgr;
getIndexWriterConfig(getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
searcherMgr = new SearcherManager(writer, null); searcherMgr = new SearcherManager(writer, null);
if (oldSearcherMgr != null) {
oldSearcherMgr.close();
}
} }
} }
@ -382,7 +428,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
if (searcherMgr == null) { if (searcherMgr == null) {
throw new IllegalStateException("suggester was not built"); throw new IllegalStateException("suggester was not built");
} }
searcherMgr.maybeRefreshBlocking(); if (writer != null) {
searcherMgr.maybeRefreshBlocking();
}
// else no-op: writer was committed and closed after the index was built
// and before searchMgr was constructed, so refresh is unnecessary
} }
/** /**
@ -791,9 +841,11 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
} }
if (writer != null) { if (writer != null) {
writer.close(); writer.close();
dir.close();
writer = null; writer = null;
} }
if (dir != null) {
dir.close();
}
} }
@Override @Override

View File

@ -35,11 +35,14 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.suggest.Input; import org.apache.lucene.search.suggest.Input;
import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
@ -1334,4 +1337,112 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
suggester.close(); suggester.close();
} }
public void testCloseIndexWriterOnBuild() throws Exception {
class MyAnalyzingInfixSuggester extends AnalyzingInfixSuggester {
public MyAnalyzingInfixSuggester(Directory dir, Analyzer indexAnalyzer, Analyzer queryAnalyzer,
int minPrefixChars, boolean commitOnBuild, boolean allTermsRequired,
boolean highlight, boolean closeIndexWriterOnBuild) throws IOException {
super(dir, indexAnalyzer, queryAnalyzer, minPrefixChars, commitOnBuild,
allTermsRequired, highlight, closeIndexWriterOnBuild);
}
public IndexWriter getIndexWriter() {
return writer;
}
public SearcherManager getSearcherManager() {
return searcherMgr;
}
}
// After build(), when closeIndexWriterOnBuild = true:
// * The IndexWriter should be null
// * The SearcherManager should be non-null
// * SearcherManager's IndexWriter reference should be closed
// (as evidenced by maybeRefreshBlocking() throwing AlreadyClosedException)
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
MyAnalyzingInfixSuggester suggester = new MyAnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
suggester.build(new InputArrayIterator(sharedInputs));
assertNull(suggester.getIndexWriter());
assertNotNull(suggester.getSearcherManager());
expectThrows(AlreadyClosedException.class, () -> suggester.getSearcherManager().maybeRefreshBlocking());
suggester.close();
a.close();
}
public void testCommitAfterBuild() throws Exception {
performOperationWithAllOptionCombinations(suggester -> {
suggester.build(new InputArrayIterator(sharedInputs));
suggester.commit();
});
}
public void testRefreshAfterBuild() throws Exception {
performOperationWithAllOptionCombinations(suggester -> {
suggester.build(new InputArrayIterator(sharedInputs));
suggester.refresh();
});
}
public void testDisallowCommitBeforeBuild() throws Exception {
performOperationWithAllOptionCombinations
(suggester -> expectThrows(IllegalStateException.class, suggester::commit));
}
public void testDisallowRefreshBeforeBuild() throws Exception {
performOperationWithAllOptionCombinations
(suggester -> expectThrows(IllegalStateException.class, suggester::refresh));
}
private Input sharedInputs[] = new Input[] {
new Input("lend me your ear", 8, new BytesRef("foobar")),
new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
private interface SuggesterOperation {
void operate(AnalyzingInfixSuggester suggester) throws Exception;
}
/**
* Perform the given operation on suggesters constructed with all combinations of options
* commitOnBuild and closeIndexWriterOnBuild, including defaults.
*/
private void performOperationWithAllOptionCombinations(SuggesterOperation operation) throws Exception {
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, true,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, true);
operation.operate(suggester);
suggester.close();
suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false,
AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED, AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT, false);
operation.operate(suggester);
suggester.close();
a.close();
}
} }