mirror of https://github.com/apache/lucene.git
Added IndexWriter.setTermIndexInterval().
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@156669 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4a4221e0a9
commit
85cc5c9fc3
|
@ -78,6 +78,10 @@ New features
|
|||
compound index file.
|
||||
(adapted from code contributed by Garrett Rooney; committed by Bernhard)
|
||||
|
||||
13. Add IndexWriter.setTermIndexInterval() method. See javadocs.
|
||||
(Doug Cutting)
|
||||
|
||||
|
||||
API Changes
|
||||
|
||||
1. Several methods and fields have been deprecated. The API documentation
|
||||
|
|
|
@ -39,10 +39,11 @@ final class DocumentWriter {
|
|||
private Similarity similarity;
|
||||
private FieldInfos fieldInfos;
|
||||
private int maxFieldLength;
|
||||
private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
|
||||
private PrintStream infoStream;
|
||||
|
||||
/**
|
||||
*
|
||||
/** This ctor used by test code only.
|
||||
*
|
||||
* @param directory The directory to write the document information to
|
||||
* @param analyzer The analyzer to use for the document
|
||||
* @param similarity The Similarity function
|
||||
|
@ -56,6 +57,14 @@ final class DocumentWriter {
|
|||
this.maxFieldLength = maxFieldLength;
|
||||
}
|
||||
|
||||
DocumentWriter(Directory directory, Analyzer analyzer, IndexWriter writer) {
|
||||
this.directory = directory;
|
||||
this.analyzer = analyzer;
|
||||
this.similarity = writer.getSimilarity();
|
||||
this.maxFieldLength = writer.getMaxFieldLength();
|
||||
this.termIndexInterval = writer.getTermIndexInterval();
|
||||
}
|
||||
|
||||
final void addDocument(String segment, Document doc)
|
||||
throws IOException {
|
||||
// write field names
|
||||
|
@ -295,7 +304,8 @@ final class DocumentWriter {
|
|||
//open files for inverse index storage
|
||||
freq = directory.createOutput(segment + ".frq");
|
||||
prox = directory.createOutput(segment + ".prx");
|
||||
tis = new TermInfosWriter(directory, segment, fieldInfos);
|
||||
tis = new TermInfosWriter(directory, segment, fieldInfos,
|
||||
termIndexInterval);
|
||||
TermInfo ti = new TermInfo();
|
||||
String currentField = null;
|
||||
|
||||
|
|
|
@ -103,6 +103,16 @@ public class IndexWriter {
|
|||
"10000"));
|
||||
|
||||
|
||||
/** The default value for {@link #getTermIndexInterval()}. This is
|
||||
* determined by the <code>org.apache.lucene.termIndexInterval</code> system
|
||||
* property. The default is 128.
|
||||
*/
|
||||
public static final int DEFAULT_TERM_INDEX_INTERVAL =
|
||||
Integer.parseInt(System.getProperty("org.apache.lucene.termIndexInterval",
|
||||
"128"));
|
||||
|
||||
|
||||
|
||||
private Directory directory; // where this index resides
|
||||
private Analyzer analyzer; // how to analyze text
|
||||
|
||||
|
@ -113,6 +123,8 @@ public class IndexWriter {
|
|||
|
||||
private Lock writeLock;
|
||||
|
||||
private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
|
||||
|
||||
/** Use compound file setting. Defaults to true, minimizing the number of
|
||||
* files used. Setting this to false may improve indexing performance, but
|
||||
* may also cause file handle problems.
|
||||
|
@ -154,6 +166,26 @@ public class IndexWriter {
|
|||
return this.similarity;
|
||||
}
|
||||
|
||||
/** Expert: Set the interval between indexed terms. Large values cause less
|
||||
* memory to be used by IndexReader, but slow random-access to terms. Small
|
||||
* values cause more memory to be used by an IndexReader, and speed
|
||||
* random-access to terms. In particular,
|
||||
* <code>numUniqueTerms/interval</code> terms are read into memory by an
|
||||
* IndexReader, and, on average, <code>interval/2</code> terms must be
|
||||
* scanned for each random term access.
|
||||
*
|
||||
* @see #DEFAULT_TERM_INDEX_INTERVAL
|
||||
*/
|
||||
public void setTermIndexInterval(int interval) {
|
||||
this.termIndexInterval = interval;
|
||||
}
|
||||
|
||||
/** Expert: Return the interval between indexed terms.
|
||||
*
|
||||
* @see #setTermIndexInterval(int)
|
||||
*/
|
||||
public int getTermIndexInterval() { return termIndexInterval; }
|
||||
|
||||
/**
|
||||
* Constructs an IndexWriter for the index in <code>path</code>.
|
||||
* Text will be analyzed with <code>a</code>. If <code>create</code>
|
||||
|
@ -359,6 +391,11 @@ public class IndexWriter {
|
|||
}
|
||||
}
|
||||
|
||||
/** Returns the Directory used by this index. */
|
||||
public Directory getDirectory() {
|
||||
return directory;
|
||||
}
|
||||
|
||||
/** Returns the analyzer used by this index. */
|
||||
public Analyzer getAnalyzer() {
|
||||
return analyzer;
|
||||
|
@ -408,7 +445,7 @@ public class IndexWriter {
|
|||
*/
|
||||
public void addDocument(Document doc, Analyzer analyzer) throws IOException {
|
||||
DocumentWriter dw =
|
||||
new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength);
|
||||
new DocumentWriter(ramDirectory, analyzer, this);
|
||||
dw.setInfoStream(infoStream);
|
||||
String segmentName = newSegmentName();
|
||||
dw.addDocument(segmentName, doc);
|
||||
|
@ -514,7 +551,7 @@ public class IndexWriter {
|
|||
optimize(); // start with zero or 1 seg
|
||||
|
||||
final String mergedName = newSegmentName();
|
||||
SegmentMerger merger = new SegmentMerger(directory, mergedName);
|
||||
SegmentMerger merger = new SegmentMerger(this, mergedName);
|
||||
|
||||
final Vector segmentsToDelete = new Vector();
|
||||
IndexReader sReader = null;
|
||||
|
@ -609,7 +646,7 @@ public class IndexWriter {
|
|||
final String mergedName = newSegmentName();
|
||||
if (infoStream != null) infoStream.print("merging segments");
|
||||
SegmentMerger merger =
|
||||
new SegmentMerger(directory, mergedName);
|
||||
new SegmentMerger(this, mergedName);
|
||||
|
||||
final Vector segmentsToDelete = new Vector();
|
||||
for (int i = minSegment; i < segmentInfos.size(); i++) {
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.lucene.store.RAMOutputStream;
|
|||
final class SegmentMerger {
|
||||
private Directory directory;
|
||||
private String segment;
|
||||
private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
|
||||
|
||||
private Vector readers = new Vector();
|
||||
private FieldInfos fieldInfos;
|
||||
|
@ -51,7 +52,7 @@ final class SegmentMerger {
|
|||
"tvx", "tvd", "tvf"
|
||||
};
|
||||
|
||||
/**
|
||||
/** This ctor used only by test code.
|
||||
*
|
||||
* @param dir The Directory to merge the other segments into
|
||||
* @param name The name of the new segment
|
||||
|
@ -61,6 +62,12 @@ final class SegmentMerger {
|
|||
segment = name;
|
||||
}
|
||||
|
||||
SegmentMerger(IndexWriter writer, String name) {
|
||||
directory = writer.getDirectory();
|
||||
segment = name;
|
||||
termIndexInterval = writer.getTermIndexInterval();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an IndexReader to the collection of readers that are to be merged
|
||||
* @param reader
|
||||
|
@ -220,7 +227,8 @@ final class SegmentMerger {
|
|||
freqOutput = directory.createOutput(segment + ".frq");
|
||||
proxOutput = directory.createOutput(segment + ".prx");
|
||||
termInfosWriter =
|
||||
new TermInfosWriter(directory, segment, fieldInfos);
|
||||
new TermInfosWriter(directory, segment, fieldInfos,
|
||||
termIndexInterval);
|
||||
skipInterval = termInfosWriter.skipInterval;
|
||||
queue = new SegmentMergeQueue(readers.size());
|
||||
|
||||
|
|
|
@ -61,20 +61,22 @@ final class TermInfosWriter {
|
|||
|
||||
private TermInfosWriter other = null;
|
||||
|
||||
TermInfosWriter(Directory directory, String segment, FieldInfos fis)
|
||||
TermInfosWriter(Directory directory, String segment, FieldInfos fis,
|
||||
int interval)
|
||||
throws IOException {
|
||||
initialize(directory, segment, fis, false);
|
||||
other = new TermInfosWriter(directory, segment, fis, true);
|
||||
initialize(directory, segment, fis, interval, false);
|
||||
other = new TermInfosWriter(directory, segment, fis, interval, true);
|
||||
other.other = this;
|
||||
}
|
||||
|
||||
private TermInfosWriter(Directory directory, String segment, FieldInfos fis,
|
||||
boolean isIndex) throws IOException {
|
||||
initialize(directory, segment, fis, isIndex);
|
||||
int interval, boolean isIndex) throws IOException {
|
||||
initialize(directory, segment, fis, interval, isIndex);
|
||||
}
|
||||
|
||||
private void initialize(Directory directory, String segment, FieldInfos fis,
|
||||
boolean isi) throws IOException {
|
||||
int interval, boolean isi) throws IOException {
|
||||
indexInterval = interval;
|
||||
fieldInfos = fis;
|
||||
isIndex = isi;
|
||||
output = directory.createOutput(segment + (isIndex ? ".tii" : ".tis"));
|
||||
|
|
Loading…
Reference in New Issue