Added IndexWriter.setTermIndexInterval().

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@156669 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2005-03-09 18:58:26 +00:00
parent 4a4221e0a9
commit 85cc5c9fc3
5 changed files with 75 additions and 14 deletions

View File

@ -78,6 +78,10 @@ New features
compound index file.
(adapted from code contributed by Garrett Rooney; committed by Bernhard)
13. Add IndexWriter.setTermIndexInterval() method. See javadocs.
(Doug Cutting)
API Changes
1. Several methods and fields have been deprecated. The API documentation

View File

@ -39,10 +39,11 @@ final class DocumentWriter {
private Similarity similarity;
private FieldInfos fieldInfos;
private int maxFieldLength;
private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
private PrintStream infoStream;
/**
*
/** This ctor used by test code only.
*
* @param directory The directory to write the document information to
* @param analyzer The analyzer to use for the document
* @param similarity The Similarity function
@ -56,6 +57,14 @@ final class DocumentWriter {
this.maxFieldLength = maxFieldLength;
}
DocumentWriter(Directory directory, Analyzer analyzer, IndexWriter writer) {
this.directory = directory;
this.analyzer = analyzer;
this.similarity = writer.getSimilarity();
this.maxFieldLength = writer.getMaxFieldLength();
this.termIndexInterval = writer.getTermIndexInterval();
}
final void addDocument(String segment, Document doc)
throws IOException {
// write field names
@ -295,7 +304,8 @@ final class DocumentWriter {
//open files for inverse index storage
freq = directory.createOutput(segment + ".frq");
prox = directory.createOutput(segment + ".prx");
tis = new TermInfosWriter(directory, segment, fieldInfos);
tis = new TermInfosWriter(directory, segment, fieldInfos,
termIndexInterval);
TermInfo ti = new TermInfo();
String currentField = null;

View File

@ -103,6 +103,16 @@ public class IndexWriter {
"10000"));
/** The default value for {@link #getTermIndexInterval()}. This is
* determined by the <code>org.apache.lucene.termIndexInterval</code> system
* property. The default is 128.
*/
public static final int DEFAULT_TERM_INDEX_INTERVAL =
Integer.parseInt(System.getProperty("org.apache.lucene.termIndexInterval",
"128"));
private Directory directory; // where this index resides
private Analyzer analyzer; // how to analyze text
@ -113,6 +123,8 @@ public class IndexWriter {
private Lock writeLock;
private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
/** Use compound file setting. Defaults to true, minimizing the number of
* files used. Setting this to false may improve indexing performance, but
* may also cause file handle problems.
@ -154,6 +166,26 @@ public class IndexWriter {
return this.similarity;
}
/** Expert: Set the interval between indexed terms. Large values cause less
* memory to be used by IndexReader, but slow random-access to terms. Small
* values cause more memory to be used by an IndexReader, and speed
* random-access to terms. In particular,
* <code>numUniqueTerms/interval</code> terms are read into memory by an
* IndexReader, and, on average, <code>interval/2</code> terms must be
* scanned for each random term access.
*
* @see #DEFAULT_TERM_INDEX_INTERVAL
*/
public void setTermIndexInterval(int interval) {
this.termIndexInterval = interval;
}
/** Expert: Return the interval between indexed terms.
*
* @see #setTermIndexInterval(int)
*/
public int getTermIndexInterval() { return termIndexInterval; }
/**
* Constructs an IndexWriter for the index in <code>path</code>.
* Text will be analyzed with <code>a</code>. If <code>create</code>
@ -359,6 +391,11 @@ public class IndexWriter {
}
}
/** Returns the Directory used by this index. */
public Directory getDirectory() {
return directory;
}
/** Returns the analyzer used by this index. */
public Analyzer getAnalyzer() {
return analyzer;
@ -408,7 +445,7 @@ public class IndexWriter {
*/
public void addDocument(Document doc, Analyzer analyzer) throws IOException {
DocumentWriter dw =
new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength);
new DocumentWriter(ramDirectory, analyzer, this);
dw.setInfoStream(infoStream);
String segmentName = newSegmentName();
dw.addDocument(segmentName, doc);
@ -514,7 +551,7 @@ public class IndexWriter {
optimize(); // start with zero or 1 seg
final String mergedName = newSegmentName();
SegmentMerger merger = new SegmentMerger(directory, mergedName);
SegmentMerger merger = new SegmentMerger(this, mergedName);
final Vector segmentsToDelete = new Vector();
IndexReader sReader = null;
@ -609,7 +646,7 @@ public class IndexWriter {
final String mergedName = newSegmentName();
if (infoStream != null) infoStream.print("merging segments");
SegmentMerger merger =
new SegmentMerger(directory, mergedName);
new SegmentMerger(this, mergedName);
final Vector segmentsToDelete = new Vector();
for (int i = minSegment; i < segmentInfos.size(); i++) {

View File

@ -39,6 +39,7 @@ import org.apache.lucene.store.RAMOutputStream;
final class SegmentMerger {
private Directory directory;
private String segment;
private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
private Vector readers = new Vector();
private FieldInfos fieldInfos;
@ -51,7 +52,7 @@ final class SegmentMerger {
"tvx", "tvd", "tvf"
};
/**
/** This ctor used only by test code.
*
* @param dir The Directory to merge the other segments into
* @param name The name of the new segment
@ -61,6 +62,12 @@ final class SegmentMerger {
segment = name;
}
SegmentMerger(IndexWriter writer, String name) {
directory = writer.getDirectory();
segment = name;
termIndexInterval = writer.getTermIndexInterval();
}
/**
* Add an IndexReader to the collection of readers that are to be merged
* @param reader
@ -220,7 +227,8 @@ final class SegmentMerger {
freqOutput = directory.createOutput(segment + ".frq");
proxOutput = directory.createOutput(segment + ".prx");
termInfosWriter =
new TermInfosWriter(directory, segment, fieldInfos);
new TermInfosWriter(directory, segment, fieldInfos,
termIndexInterval);
skipInterval = termInfosWriter.skipInterval;
queue = new SegmentMergeQueue(readers.size());

View File

@ -61,20 +61,22 @@ final class TermInfosWriter {
private TermInfosWriter other = null;
TermInfosWriter(Directory directory, String segment, FieldInfos fis)
TermInfosWriter(Directory directory, String segment, FieldInfos fis,
int interval)
throws IOException {
initialize(directory, segment, fis, false);
other = new TermInfosWriter(directory, segment, fis, true);
initialize(directory, segment, fis, interval, false);
other = new TermInfosWriter(directory, segment, fis, interval, true);
other.other = this;
}
private TermInfosWriter(Directory directory, String segment, FieldInfos fis,
boolean isIndex) throws IOException {
initialize(directory, segment, fis, isIndex);
int interval, boolean isIndex) throws IOException {
initialize(directory, segment, fis, interval, isIndex);
}
private void initialize(Directory directory, String segment, FieldInfos fis,
boolean isi) throws IOException {
int interval, boolean isi) throws IOException {
indexInterval = interval;
fieldInfos = fis;
isIndex = isi;
output = directory.createOutput(segment + (isIndex ? ".tii" : ".tis"));