mirror of https://github.com/apache/lucene.git
new IndexModifier class that simplifies access to IndexReader and IndexWriter
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@185069 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0532c41143
commit
e2a79a145f
|
@ -113,6 +113,11 @@ New features
|
||||||
fields in arbitrarily formats can be cached as ints and floats.
|
fields in arbitrarily formats can be cached as ints and floats.
|
||||||
(Doug Cutting)
|
(Doug Cutting)
|
||||||
|
|
||||||
|
18. Added class org.apache.lucene.index.IndexModifier which combines
|
||||||
|
IndexWriter and IndexReader, so you can add and delete documents without
|
||||||
|
worrying about synchronisation/locking issues.
|
||||||
|
(Daniel Naber)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
1. Several methods and fields have been deprecated. The API documentation
|
1. Several methods and fields have been deprecated. The API documentation
|
||||||
|
|
|
@ -0,0 +1,508 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class to modify an index, i.e. to delete and add documents. This
|
||||||
|
* class hides {@link IndexReader} and {@link IndexWriter} so that you
|
||||||
|
* do not need to care about implementation details such as that adding
|
||||||
|
* documents is done via IndexWriter and deletion is done via IndexReader.
|
||||||
|
*
|
||||||
|
* <p>Note that you cannot create more than one <code>IndexModifier</code> object
|
||||||
|
* on the same directory at the same time.
|
||||||
|
*
|
||||||
|
* <p>Example usage:
|
||||||
|
*
|
||||||
|
<!-- ======================================================== -->
|
||||||
|
<!-- = Java Sourcecode to HTML automatically converted code = -->
|
||||||
|
<!-- = Java2Html Converter V4.1 2004 by Markus Gebhard markus@jave.de = -->
|
||||||
|
<!-- = Further information: http://www.java2html.de = -->
|
||||||
|
<div align="left" class="java">
|
||||||
|
<table border="0" cellpadding="3" cellspacing="0" bgcolor="#ffffff">
|
||||||
|
<tr>
|
||||||
|
<!-- start source code -->
|
||||||
|
<td nowrap="nowrap" valign="top" align="left">
|
||||||
|
<code>
|
||||||
|
<font color="#ffffff"> </font><font color="#000000">Analyzer analyzer = </font><font color="#7f0055"><b>new </b></font><font color="#000000">StandardAnalyzer</font><font color="#000000">()</font><font color="#000000">;</font><br/>
|
||||||
|
<font color="#ffffff"> </font><font color="#3f7f5f">// create an index in /tmp/index, overwriting an existing one:</font><br/>
|
||||||
|
<font color="#ffffff"> </font><font color="#000000">IndexModifier indexModifier = </font><font color="#7f0055"><b>new </b></font><font color="#000000">IndexModifier</font><font color="#000000">(</font><font color="#2a00ff">"/tmp/index"</font><font color="#000000">, analyzer, </font><font color="#7f0055"><b>true</b></font><font color="#000000">)</font><font color="#000000">;</font><br/>
|
||||||
|
<font color="#ffffff"> </font><font color="#000000">Document doc = </font><font color="#7f0055"><b>new </b></font><font color="#000000">Document</font><font color="#000000">()</font><font color="#000000">;</font><br/>
|
||||||
|
<font color="#ffffff"> </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">, Field.Store.YES, Field.Index.UN_TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
|
||||||
|
<font color="#ffffff"> </font><font color="#000000">doc.add</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Field</font><font color="#000000">(</font><font color="#2a00ff">"body"</font><font color="#000000">, </font><font color="#2a00ff">"a simple test"</font><font color="#000000">, Field.Store.YES, Field.Index.TOKENIZED</font><font color="#000000">))</font><font color="#000000">;</font><br/>
|
||||||
|
<font color="#ffffff"> </font><font color="#000000">indexModifier.addDocument</font><font color="#000000">(</font><font color="#000000">doc</font><font color="#000000">)</font><font color="#000000">;</font><br/>
|
||||||
|
<font color="#ffffff"> </font><font color="#7f0055"><b>int </b></font><font color="#000000">deleted = indexModifier.delete</font><font color="#000000">(</font><font color="#7f0055"><b>new </b></font><font color="#000000">Term</font><font color="#000000">(</font><font color="#2a00ff">"id"</font><font color="#000000">, </font><font color="#2a00ff">"1"</font><font color="#000000">))</font><font color="#000000">;</font><br/>
|
||||||
|
<font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#2a00ff">"Deleted " </font><font color="#000000">+ deleted + </font><font color="#2a00ff">" document"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
|
||||||
|
<font color="#ffffff"> </font><font color="#000000">indexModifier.flush</font><font color="#000000">()</font><font color="#000000">;</font><br/>
|
||||||
|
<font color="#ffffff"> </font><font color="#000000">System.out.println</font><font color="#000000">(</font><font color="#000000">indexModifier.docCount</font><font color="#000000">() </font><font color="#000000">+ </font><font color="#2a00ff">" docs in index"</font><font color="#000000">)</font><font color="#000000">;</font><br/>
|
||||||
|
<font color="#ffffff"> </font><font color="#000000">indexModifier.close</font><font color="#000000">()</font><font color="#000000">;</font></code>
|
||||||
|
|
||||||
|
</td>
|
||||||
|
<!-- end source code -->
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
<!-- = END of automatically generated HTML code = -->
|
||||||
|
<!-- ======================================================== -->
|
||||||
|
*
|
||||||
|
* <p>Not all methods of IndexReader and IndexWriter are offered by this
|
||||||
|
* class. If you need access to additional methods, either use those classes
|
||||||
|
* directly or implement your own class that extends <code>IndexModifier</code>.
|
||||||
|
*
|
||||||
|
* <p>Although an instance of this class can be used from more than one
|
||||||
|
* thread, you will not get the best performance. You might want to use
|
||||||
|
* IndexReader and IndexWriter directly for that (but you will need to
|
||||||
|
* care about synchronization yourself then).
|
||||||
|
*
|
||||||
|
* <p>While you can freely mix calls to add() and delete() using this class,
|
||||||
|
* you should batch you calls for best performance. For example, if you
|
||||||
|
* want to update 20 documents, you should first delete all those documents,
|
||||||
|
* then add all the new documents.
|
||||||
|
*
|
||||||
|
* @author Daniel Naber
|
||||||
|
*/
|
||||||
|
public class IndexModifier {
|
||||||
|
|
||||||
|
protected IndexWriter indexWriter = null;
|
||||||
|
protected IndexReader indexReader = null;
|
||||||
|
|
||||||
|
protected Directory directory = null;
|
||||||
|
protected Analyzer analyzer = null;
|
||||||
|
protected boolean open = false;
|
||||||
|
|
||||||
|
// Lucene defaults:
|
||||||
|
protected PrintStream infoStream = null;
|
||||||
|
protected boolean useCompoundFile = true;
|
||||||
|
protected int maxBufferedDocs = IndexWriter.DEFAULT_MIN_MERGE_DOCS;
|
||||||
|
protected int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
|
||||||
|
protected int mergeFactor = IndexWriter.DEFAULT_MERGE_FACTOR;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open an index with write access.
|
||||||
|
*
|
||||||
|
* @param directory the index directory
|
||||||
|
* @param analyzer the analyzer to use for adding new documents
|
||||||
|
* @param create <code>true</code> to create the index or overwrite the existing one;
|
||||||
|
* <code>false</code> to append to the existing index
|
||||||
|
*/
|
||||||
|
public IndexModifier(Directory directory, Analyzer analyzer, boolean create) throws IOException {
|
||||||
|
init(directory, analyzer, create);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open an index with write access.
|
||||||
|
*
|
||||||
|
* @param dirName the index directory
|
||||||
|
* @param analyzer the analyzer to use for adding new documents
|
||||||
|
* @param create <code>true</code> to create the index or overwrite the existing one;
|
||||||
|
* <code>false</code> to append to the existing index
|
||||||
|
*/
|
||||||
|
public IndexModifier(String dirName, Analyzer analyzer, boolean create) throws IOException {
|
||||||
|
Directory dir = FSDirectory.getDirectory(dirName, create);
|
||||||
|
init(dir, analyzer, create);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open an index with write access.
|
||||||
|
*
|
||||||
|
* @param file the index directory
|
||||||
|
* @param analyzer the analyzer to use for adding new documents
|
||||||
|
* @param create <code>true</code> to create the index or overwrite the existing one;
|
||||||
|
* <code>false</code> to append to the existing index
|
||||||
|
*/
|
||||||
|
public IndexModifier(File file, Analyzer analyzer, boolean create) throws IOException {
|
||||||
|
Directory dir = FSDirectory.getDirectory(file, create);
|
||||||
|
init(dir, analyzer, create);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize an IndexWriter.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
protected void init(Directory directory, Analyzer analyzer, boolean create) throws IOException {
|
||||||
|
this.directory = directory;
|
||||||
|
synchronized(this.directory) {
|
||||||
|
this.analyzer = analyzer;
|
||||||
|
indexWriter = new IndexWriter(directory, analyzer, create);
|
||||||
|
open = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Throw an IllegalStateException if the index is closed.
|
||||||
|
* @throws IllegalStateException
|
||||||
|
*/
|
||||||
|
protected void assureOpen() {
|
||||||
|
if (!open) {
|
||||||
|
throw new IllegalStateException("Index is closed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the IndexReader and open an IndexWriter.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
protected void createIndexWriter() throws IOException {
|
||||||
|
if (indexWriter == null) {
|
||||||
|
if (indexReader != null) {
|
||||||
|
indexReader.close();
|
||||||
|
indexReader = null;
|
||||||
|
}
|
||||||
|
indexWriter = new IndexWriter(directory, analyzer, false);
|
||||||
|
indexWriter.setInfoStream(infoStream);
|
||||||
|
indexWriter.setUseCompoundFile(useCompoundFile);
|
||||||
|
indexWriter.setMaxBufferedDocs(maxBufferedDocs);
|
||||||
|
indexWriter.setMaxFieldLength(maxFieldLength);
|
||||||
|
indexWriter.setMergeFactor(mergeFactor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the IndexWriter and open an IndexReader.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
protected void createIndexReader() throws IOException {
|
||||||
|
if (indexReader == null) {
|
||||||
|
if (indexWriter != null) {
|
||||||
|
indexWriter.close();
|
||||||
|
indexWriter = null;
|
||||||
|
}
|
||||||
|
indexReader = IndexReader.open(directory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Make sure all changes are written to disk.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void flush() throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
if (indexWriter != null) {
|
||||||
|
indexWriter.close();
|
||||||
|
indexWriter = null;
|
||||||
|
createIndexWriter();
|
||||||
|
} else {
|
||||||
|
indexReader.close();
|
||||||
|
indexReader = null;
|
||||||
|
createIndexReader();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a document to this index, using the provided analyzer instead of the
|
||||||
|
* one specific in the constructor. If the document contains more than
|
||||||
|
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
|
||||||
|
* discarded.
|
||||||
|
* @see IndexWriter#addDocument(Document, Analyzer)
|
||||||
|
* @throws IllegalStateException if the index is closed
|
||||||
|
*/
|
||||||
|
public void addDocument(Document doc, Analyzer docAnalyzer) throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
if (docAnalyzer != null)
|
||||||
|
indexWriter.addDocument(doc, docAnalyzer);
|
||||||
|
else
|
||||||
|
indexWriter.addDocument(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a document to this index. If the document contains more than
|
||||||
|
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
|
||||||
|
* discarded.
|
||||||
|
* @see IndexWriter#addDocument(Document)
|
||||||
|
* @throws IllegalStateException if the index is closed
|
||||||
|
*/
|
||||||
|
public void addDocument(Document doc) throws IOException {
|
||||||
|
addDocument(doc, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deletes all documents containing <code>term</code>.
|
||||||
|
* This is useful if one uses a document field to hold a unique ID string for
|
||||||
|
* the document. Then to delete such a document, one merely constructs a
|
||||||
|
* term with the appropriate field and the unique ID string as its text and
|
||||||
|
* passes it to this method. Returns the number of documents deleted.
|
||||||
|
* @return the number of documents deleted
|
||||||
|
* @see IndexReader#delete(Term)
|
||||||
|
* @throws IllegalStateException if the index is closed
|
||||||
|
*/
|
||||||
|
public int delete(Term term) throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexReader();
|
||||||
|
return indexReader.delete(term);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deletes the document numbered <code>docNum</code>.
|
||||||
|
* @see IndexReader#delete(int)
|
||||||
|
* @throws IllegalStateException if the index is closed
|
||||||
|
*/
|
||||||
|
public void delete(int docNum) throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexReader();
|
||||||
|
indexReader.delete(docNum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of documents currently in this index.
|
||||||
|
* @see IndexWriter#docCount()
|
||||||
|
* @see IndexReader#numDocs()
|
||||||
|
* @throws IllegalStateException if the index is closed
|
||||||
|
*/
|
||||||
|
public int docCount() {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
if (indexWriter != null) {
|
||||||
|
return indexWriter.docCount();
|
||||||
|
} else {
|
||||||
|
return indexReader.numDocs();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges all segments together into a single segment, optimizing an index
|
||||||
|
* for search.
|
||||||
|
* @see IndexWriter#optimize()
|
||||||
|
* @throws IllegalStateException if the index is closed
|
||||||
|
*/
|
||||||
|
public void optimize() throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
indexWriter.optimize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If non-null, information about merges and a message when
|
||||||
|
* {@link #getMaxFieldLength()} is reached will be printed to this.
|
||||||
|
* <p>Example: <tt>index.setInfoStream(System.err);</tt>
|
||||||
|
* @see IndexWriter#setInfoStream(PrintStream)
|
||||||
|
* @throws IllegalStateException if the index is closed
|
||||||
|
*/
|
||||||
|
public void setInfoStream(PrintStream infoStream) throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
indexWriter.setInfoStream(infoStream);
|
||||||
|
this.infoStream = infoStream;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws IOException
|
||||||
|
* @see IndexModifier#setInfoStream(PrintStream)
|
||||||
|
*/
|
||||||
|
public PrintStream getInfoStream() throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
return indexWriter.getInfoStream();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setting to turn on usage of a compound file. When on, multiple files
|
||||||
|
* for each segment are merged into a single file once the segment creation
|
||||||
|
* is finished. This is done regardless of what directory is in use.
|
||||||
|
* @see IndexWriter#setUseCompoundFile(boolean)
|
||||||
|
* @throws IllegalStateException if the index is closed
|
||||||
|
*/
|
||||||
|
public void setUseCompoundFile(boolean useCompoundFile) throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
indexWriter.setUseCompoundFile(useCompoundFile);
|
||||||
|
this.useCompoundFile = useCompoundFile;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws IOException
|
||||||
|
* @see IndexModifier#setUseCompoundFile(boolean)
|
||||||
|
*/
|
||||||
|
public boolean getUseCompoundFile() throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
return indexWriter.getUseCompoundFile();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum number of terms that will be indexed for a single field in a
|
||||||
|
* document. This limits the amount of memory required for indexing, so that
|
||||||
|
* collections with very large files will not crash the indexing process by
|
||||||
|
* running out of memory.<p/>
|
||||||
|
* Note that this effectively truncates large documents, excluding from the
|
||||||
|
* index terms that occur further in the document. If you know your source
|
||||||
|
* documents are large, be sure to set this value high enough to accomodate
|
||||||
|
* the expected size. If you set it to Integer.MAX_VALUE, then the only limit
|
||||||
|
* is your memory, but you should anticipate an OutOfMemoryError.<p/>
|
||||||
|
* By default, no more than 10,000 terms will be indexed for a field.
|
||||||
|
* @see IndexWriter#setMaxFieldLength(int)
|
||||||
|
* @throws IllegalStateException if the index is closed
|
||||||
|
*/
|
||||||
|
public void setMaxFieldLength(int maxFieldLength) throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
indexWriter.setMaxFieldLength(maxFieldLength);
|
||||||
|
this.maxFieldLength = maxFieldLength;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws IOException
|
||||||
|
* @see IndexModifier#setMaxFieldLength(int)
|
||||||
|
*/
|
||||||
|
public int getMaxFieldLength() throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
return indexWriter.getMaxFieldLength();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum number of terms that will be indexed for a single field in a
|
||||||
|
* document. This limits the amount of memory required for indexing, so that
|
||||||
|
* collections with very large files will not crash the indexing process by
|
||||||
|
* running out of memory.<p/>
|
||||||
|
* Note that this effectively truncates large documents, excluding from the
|
||||||
|
* index terms that occur further in the document. If you know your source
|
||||||
|
* documents are large, be sure to set this value high enough to accomodate
|
||||||
|
* the expected size. If you set it to Integer.MAX_VALUE, then the only limit
|
||||||
|
* is your memory, but you should anticipate an OutOfMemoryError.<p/>
|
||||||
|
* By default, no more than 10,000 terms will be indexed for a field.
|
||||||
|
* @see IndexWriter#setMaxBufferedDocs(int)
|
||||||
|
* @throws IllegalStateException if the index is closed
|
||||||
|
*/
|
||||||
|
public void setMaxBufferedDocs(int maxBufferedDocs) throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
indexWriter.setMaxBufferedDocs(maxBufferedDocs);
|
||||||
|
this.maxBufferedDocs = maxBufferedDocs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws IOException
|
||||||
|
* @see IndexModifier#setMaxBufferedDocs(int)
|
||||||
|
*/
|
||||||
|
public int getMaxBufferedDocs() throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
return indexWriter.getMaxBufferedDocs();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines how often segment indices are merged by addDocument(). With
|
||||||
|
* smaller values, less RAM is used while indexing, and searches on
|
||||||
|
* unoptimized indices are faster, but indexing speed is slower. With larger
|
||||||
|
* values, more RAM is used during indexing, and while searches on unoptimized
|
||||||
|
* indices are slower, indexing is faster. Thus larger values (> 10) are best
|
||||||
|
* for batch index creation, and smaller values (< 10) for indices that are
|
||||||
|
* interactively maintained.
|
||||||
|
* <p>This must never be less than 2. The default value is 10.
|
||||||
|
*
|
||||||
|
* @see IndexWriter#setMergeFactor(int)
|
||||||
|
* @throws IllegalStateException if the index is closed
|
||||||
|
*/
|
||||||
|
public void setMergeFactor(int mergeFactor) throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
indexWriter.setMergeFactor(mergeFactor);
|
||||||
|
this.mergeFactor = mergeFactor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws IOException
|
||||||
|
* @see IndexModifier#setMergeFactor(int)
|
||||||
|
*/
|
||||||
|
public int getMergeFactor() throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexWriter();
|
||||||
|
return indexWriter.getMergeFactor();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close this index, writing all pending changes to disk.
|
||||||
|
*
|
||||||
|
* @throws IllegalStateException if the index has been closed before already
|
||||||
|
*/
|
||||||
|
public void close() throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
if (!open)
|
||||||
|
throw new IllegalStateException("Index is closed already");
|
||||||
|
if (indexWriter != null) {
|
||||||
|
indexWriter.close();
|
||||||
|
indexWriter = null;
|
||||||
|
} else {
|
||||||
|
indexReader.close();
|
||||||
|
indexReader = null;
|
||||||
|
}
|
||||||
|
open = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "Index@" + directory;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
// used as an example in the javadoc:
|
||||||
|
public static void main(String[] args) throws IOException {
|
||||||
|
Analyzer analyzer = new StandardAnalyzer();
|
||||||
|
// create an index in /tmp/index, overwriting an existing one:
|
||||||
|
IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("id", "1", Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
|
doc.add(new Field("body", "a simple test", Field.Store.YES, Field.Index.TOKENIZED));
|
||||||
|
indexModifier.addDocument(doc);
|
||||||
|
int deleted = indexModifier.delete(new Term("id", "1"));
|
||||||
|
System.out.println("Deleted " + deleted + " document");
|
||||||
|
indexModifier.flush();
|
||||||
|
System.out.println(indexModifier.docCount() + " docs in index");
|
||||||
|
indexModifier.close();
|
||||||
|
}*/
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,256 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Stack;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for the "Index" class, including accesses from two threads at the
|
||||||
|
* same time.
|
||||||
|
*
|
||||||
|
* @author Daniel Naber
|
||||||
|
*/
|
||||||
|
public class TestIndex extends TestCase {
|
||||||
|
|
||||||
|
private final int ITERATIONS = 500; // iterations of thread test
|
||||||
|
|
||||||
|
private int docCount = 0;
|
||||||
|
|
||||||
|
private final Term allDocTerm = new Term("all", "x");
|
||||||
|
|
||||||
|
public void testIndex() throws IOException {
|
||||||
|
Directory ramDir = new RAMDirectory();
|
||||||
|
IndexModifier i = new IndexModifier(ramDir, new StandardAnalyzer(), true);
|
||||||
|
i.addDocument(getDoc());
|
||||||
|
assertEquals(1, i.docCount());
|
||||||
|
i.flush();
|
||||||
|
i.addDocument(getDoc(), new SimpleAnalyzer());
|
||||||
|
assertEquals(2, i.docCount());
|
||||||
|
i.optimize();
|
||||||
|
assertEquals(2, i.docCount());
|
||||||
|
i.flush();
|
||||||
|
i.delete(0);
|
||||||
|
assertEquals(1, i.docCount());
|
||||||
|
i.flush();
|
||||||
|
assertEquals(1, i.docCount());
|
||||||
|
i.addDocument(getDoc());
|
||||||
|
i.addDocument(getDoc());
|
||||||
|
i.flush();
|
||||||
|
assertEquals(3, i.docCount());
|
||||||
|
i.delete(allDocTerm);
|
||||||
|
assertEquals(0, i.docCount());
|
||||||
|
i.optimize();
|
||||||
|
assertEquals(0, i.docCount());
|
||||||
|
|
||||||
|
// Lucene defaults:
|
||||||
|
assertNull(i.getInfoStream());
|
||||||
|
assertTrue(i.getUseCompoundFile());
|
||||||
|
assertEquals(10, i.getMaxBufferedDocs());
|
||||||
|
assertEquals(10000, i.getMaxFieldLength());
|
||||||
|
assertEquals(10, i.getMergeFactor());
|
||||||
|
i.setMaxBufferedDocs(100);
|
||||||
|
i.setMergeFactor(25);
|
||||||
|
i.setMaxFieldLength(250000);
|
||||||
|
i.addDocument(getDoc());
|
||||||
|
i.setUseCompoundFile(false);
|
||||||
|
i.flush();
|
||||||
|
assertEquals(100, i.getMaxBufferedDocs());
|
||||||
|
assertEquals(25, i.getMergeFactor());
|
||||||
|
assertEquals(250000, i.getMaxFieldLength());
|
||||||
|
assertFalse(i.getUseCompoundFile());
|
||||||
|
|
||||||
|
i.close();
|
||||||
|
try {
|
||||||
|
i.docCount();
|
||||||
|
fail();
|
||||||
|
} catch (IllegalStateException e) {
|
||||||
|
// expected exception
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExtendedIndex() throws IOException {
|
||||||
|
Directory ramDir = new RAMDirectory();
|
||||||
|
PowerIndex powerIndex = new PowerIndex(ramDir, new StandardAnalyzer(), true);
|
||||||
|
powerIndex.addDocument(getDoc());
|
||||||
|
powerIndex.addDocument(getDoc());
|
||||||
|
powerIndex.addDocument(getDoc());
|
||||||
|
powerIndex.addDocument(getDoc());
|
||||||
|
powerIndex.addDocument(getDoc());
|
||||||
|
powerIndex.flush();
|
||||||
|
assertEquals(5, powerIndex.docFreq(allDocTerm));
|
||||||
|
powerIndex.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Document getDoc() {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("body", new Integer(docCount).toString(), Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
|
doc.add(new Field("all", "x", Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
|
docCount++;
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIndexWithThreads() throws IOException {
|
||||||
|
testIndexInternal(0);
|
||||||
|
testIndexInternal(10);
|
||||||
|
testIndexInternal(50);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testIndexInternal(int maxWait) throws IOException {
|
||||||
|
boolean create = true;
|
||||||
|
//Directory rd = new RAMDirectory();
|
||||||
|
// work on disk to make sure potential lock problems are tested:
|
||||||
|
String tempDir = System.getProperty("java.io.tmpdir");
|
||||||
|
if (tempDir == null)
|
||||||
|
throw new IOException("java.io.tmpdir undefined, cannot run test");
|
||||||
|
File indexDir = new File(tempDir, "lucenetestindex");
|
||||||
|
Directory rd = FSDirectory.getDirectory(indexDir, create);
|
||||||
|
IndexModifier index = new IndexModifier(rd, new StandardAnalyzer(), create);
|
||||||
|
IndexThread thread1 = new IndexThread(index, maxWait);
|
||||||
|
thread1.start();
|
||||||
|
IndexThread thread2 = new IndexThread(index, maxWait);
|
||||||
|
thread2.start();
|
||||||
|
while(thread1.isAlive() || thread2.isAlive()) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(100);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
index.optimize();
|
||||||
|
int added = thread1.added + thread2.added;
|
||||||
|
int deleted = thread1.deleted + thread2.deleted;
|
||||||
|
assertEquals(added-deleted, index.docCount());
|
||||||
|
index.close();
|
||||||
|
|
||||||
|
try {
|
||||||
|
index.close();
|
||||||
|
fail();
|
||||||
|
} catch(IllegalStateException e) {
|
||||||
|
// expected exception
|
||||||
|
}
|
||||||
|
rmDir(indexDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void rmDir(File dir) {
|
||||||
|
File[] files = dir.listFiles();
|
||||||
|
for (int i = 0; i < files.length; i++) {
|
||||||
|
files[i].delete();
|
||||||
|
}
|
||||||
|
dir.delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
private int id = 0;
|
||||||
|
private Stack idStack = new Stack();
|
||||||
|
// TODO: test case is not reproducible despite pseudo-random numbers
|
||||||
|
// used for anything:
|
||||||
|
private Random random = new Random(101); // constant seed for reproducability
|
||||||
|
|
||||||
|
private class PowerIndex extends IndexModifier {
|
||||||
|
public PowerIndex(Directory dir, Analyzer analyzer, boolean create) throws IOException {
|
||||||
|
super(dir, analyzer, create);
|
||||||
|
}
|
||||||
|
public int docFreq(Term term) throws IOException {
|
||||||
|
synchronized(directory) {
|
||||||
|
assureOpen();
|
||||||
|
createIndexReader();
|
||||||
|
return indexReader.docFreq(term);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class IndexThread extends Thread {
|
||||||
|
|
||||||
|
private int maxWait = 10;
|
||||||
|
private IndexModifier index;
|
||||||
|
private int added = 0;
|
||||||
|
private int deleted = 0;
|
||||||
|
|
||||||
|
IndexThread(IndexModifier index, int maxWait) {
|
||||||
|
this.index = index;
|
||||||
|
this.maxWait = maxWait;
|
||||||
|
id = 0;
|
||||||
|
idStack.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
for(int i = 0; i < ITERATIONS; i++) {
|
||||||
|
int rand = random.nextInt(101);
|
||||||
|
if (rand < 5) {
|
||||||
|
index.optimize();
|
||||||
|
} else if (rand < 60) {
|
||||||
|
Document doc = getDocument();
|
||||||
|
//System.out.println("add doc id=" + doc.get("id"));
|
||||||
|
index.addDocument(doc);
|
||||||
|
idStack.push(doc.get("id"));
|
||||||
|
added++;
|
||||||
|
} else {
|
||||||
|
if (idStack.size() == 0) {
|
||||||
|
// not enough docs in index, let's wait for next chance
|
||||||
|
} else {
|
||||||
|
// we just delete the last document added and remove it
|
||||||
|
// from the id stack so that it won't be removed twice:
|
||||||
|
String delId = (String)idStack.pop();
|
||||||
|
//System.out.println("delete doc id = " + delId);
|
||||||
|
index.delete(new Term("id", new Integer(delId).toString()));
|
||||||
|
deleted++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (maxWait > 0) {
|
||||||
|
try {
|
||||||
|
rand = random.nextInt(maxWait);
|
||||||
|
//System.out.println("waiting " + rand + "ms");
|
||||||
|
Thread.sleep(rand);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Document getDocument() {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("id", new Integer(id++).toString(), Field.Store.YES,
|
||||||
|
Field.Index.UN_TOKENIZED));
|
||||||
|
// add random stuff:
|
||||||
|
doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
|
||||||
|
Field.Index.TOKENIZED));
|
||||||
|
doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
|
||||||
|
Field.Index.TOKENIZED));
|
||||||
|
doc.add(new Field("all", "x", Field.Store.YES, Field.Index.TOKENIZED));
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue