LUCENE-2324: Committing second version of the patch to the real-time branch. It's not done yet, but easier to track progress using the branch.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@966168 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Busch 2010-07-21 10:27:20 +00:00
parent f18502ed41
commit fc970ef01c
3 changed files with 742 additions and 2239 deletions

File diff suppressed because it is too large Load Diff

View File

@ -240,7 +240,7 @@ public class IndexWriter implements Closeable {
* printed to infoStream, if set (see {@link * printed to infoStream, if set (see {@link
* #setInfoStream}). * #setInfoStream}).
*/ */
public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH_UTF8; public final static int MAX_TERM_LENGTH = DocumentsWriterRAMAllocator.MAX_TERM_LENGTH_UTF8;
// The normal read buffer size defaults to 1024, but // The normal read buffer size defaults to 1024, but
// increasing this during merging seems to yield // increasing this during merging seems to yield
@ -271,10 +271,12 @@ public class IndexWriter implements Closeable {
volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit()) volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit())
volatile long pendingCommitChangeCount; volatile long pendingCommitChangeCount;
private SegmentInfos segmentInfos = new SegmentInfos(); // the segments // nocommit - private
SegmentInfos segmentInfos = new SegmentInfos(); // the segments
private DocumentsWriter docWriter; private DocumentsWriter docWriter;
private IndexFileDeleter deleter; //nocommit - private
IndexFileDeleter deleter;
private Set<SegmentInfo> segmentsToOptimize = new HashSet<SegmentInfo>(); // used by optimize to note those needing optimization private Set<SegmentInfo> segmentsToOptimize = new HashSet<SegmentInfo>(); // used by optimize to note those needing optimization
@ -289,8 +291,8 @@ public class IndexWriter implements Closeable {
// Holds all SegmentInfo instances currently involved in // Holds all SegmentInfo instances currently involved in
// merges // merges
private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>(); private HashSet<SegmentInfo> mergingSegments = new HashSet<SegmentInfo>();
// nocommit - private
private MergePolicy mergePolicy; MergePolicy mergePolicy;
// TODO 4.0: this should be made final once the setter is removed // TODO 4.0: this should be made final once the setter is removed
private /*final*/MergeScheduler mergeScheduler; private /*final*/MergeScheduler mergeScheduler;
private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>(); private LinkedList<MergePolicy.OneMerge> pendingMerges = new LinkedList<MergePolicy.OneMerge>();
@ -733,113 +735,6 @@ public class IndexWriter implements Closeable {
throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy"); throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
} }
/** <p>Get the current setting of whether newly flushed
* segments will use the compound file format. Note that
* this just returns the value previously set with
* setUseCompoundFile(boolean), or the default value
* (true). You cannot use this to query the status of
* previously flushed segments.</p>
*
* <p>Note that this method is a convenience method: it
* just calls mergePolicy.getUseCompoundFile as long as
* mergePolicy is an instance of {@link LogMergePolicy}.
* Otherwise an IllegalArgumentException is thrown.</p>
*
* @see #setUseCompoundFile(boolean)
* @deprecated use {@link LogMergePolicy#getUseCompoundDocStore()} and
* {@link LogMergePolicy#getUseCompoundFile()} directly.
*/
public boolean getUseCompoundFile() {
return getLogMergePolicy().getUseCompoundFile();
}
/**
* <p>
* Setting to turn on usage of a compound file. When on, multiple files for
* each segment are merged into a single file when a new segment is flushed.
* </p>
*
* <p>
* Note that this method is a convenience method: it just calls
* mergePolicy.setUseCompoundFile as long as mergePolicy is an instance of
* {@link LogMergePolicy}. Otherwise an IllegalArgumentException is thrown.
* </p>
*
* @deprecated use {@link LogMergePolicy#setUseCompoundDocStore(boolean)} and
* {@link LogMergePolicy#setUseCompoundFile(boolean)} directly.
* Note that this method set the given value on both, therefore
* you should consider doing the same.
*/
public void setUseCompoundFile(boolean value) {
getLogMergePolicy().setUseCompoundFile(value);
getLogMergePolicy().setUseCompoundDocStore(value);
}
/** Expert: Set the Similarity implementation used by this IndexWriter.
*
* @see Similarity#setDefault(Similarity)
* @deprecated use {@link IndexWriterConfig#setSimilarity(Similarity)} instead
*/
public void setSimilarity(Similarity similarity) {
ensureOpen();
this.similarity = similarity;
docWriter.setSimilarity(similarity);
// Required so config.getSimilarity returns the right value. But this will
// go away together with the method in 4.0.
config.setSimilarity(similarity);
}
/** Expert: Return the Similarity implementation used by this IndexWriter.
*
* <p>This defaults to the current value of {@link Similarity#getDefault()}.
* @deprecated use {@link IndexWriterConfig#getSimilarity()} instead
*/
public Similarity getSimilarity() {
ensureOpen();
return similarity;
}
/** Expert: Set the interval between indexed terms. Large values cause less
* memory to be used by IndexReader, but slow random-access to terms. Small
* values cause more memory to be used by an IndexReader, and speed
* random-access to terms.
*
* This parameter determines the amount of computation required per query
* term, regardless of the number of documents that contain that term. In
* particular, it is the maximum number of other terms that must be
* scanned before a term is located and its frequency and position information
* may be processed. In a large index with user-entered query terms, query
* processing time is likely to be dominated not by term lookup but rather
* by the processing of frequency and positional data. In a small index
* or when many uncommon query terms are generated (e.g., by wildcard
* queries) term lookup may become a dominant cost.
*
* In particular, <code>numUniqueTerms/interval</code> terms are read into
* memory by an IndexReader, and, on average, <code>interval/2</code> terms
* must be scanned for each random term access.
*
* @see #DEFAULT_TERM_INDEX_INTERVAL
* @deprecated use {@link IndexWriterConfig#setTermIndexInterval(int)}
*/
public void setTermIndexInterval(int interval) {
ensureOpen();
this.termIndexInterval = interval;
// Required so config.getTermIndexInterval returns the right value. But this
// will go away together with the method in 4.0.
config.setTermIndexInterval(interval);
}
/** Expert: Return the interval between indexed terms.
*
* @see #setTermIndexInterval(int)
* @deprecated use {@link IndexWriterConfig#getTermIndexInterval()}
*/
public int getTermIndexInterval() {
// We pass false because this method is called by SegmentMerger while we are in the process of closing
ensureOpen(false);
return termIndexInterval;
}
/** /**
* Constructs an IndexWriter for the index in <code>d</code>. * Constructs an IndexWriter for the index in <code>d</code>.
* Text will be analyzed with <code>a</code>. If <code>create</code> * Text will be analyzed with <code>a</code>. If <code>create</code>
@ -1028,7 +923,6 @@ public class IndexWriter implements Closeable {
directory = d; directory = d;
analyzer = conf.getAnalyzer(); analyzer = conf.getAnalyzer();
setMessageID(defaultInfoStream); setMessageID(defaultInfoStream);
maxFieldLength = conf.getMaxFieldLength();
termIndexInterval = conf.getTermIndexInterval(); termIndexInterval = conf.getTermIndexInterval();
writeLockTimeout = conf.getWriteLockTimeout(); writeLockTimeout = conf.getWriteLockTimeout();
similarity = conf.getSimilarity(); similarity = conf.getSimilarity();
@ -1102,9 +996,10 @@ public class IndexWriter implements Closeable {
setRollbackSegmentInfos(segmentInfos); setRollbackSegmentInfos(segmentInfos);
docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates()); docWriter = new DocumentsWriter(directory, this, conf);
docWriter.setInfoStream(infoStream); // nocommit
docWriter.setMaxFieldLength(maxFieldLength); //docWriter.setInfoStream(infoStream);
//docWriter.setMaxFieldLength(maxFieldLength);
// Default deleter (for backwards compatibility) is // Default deleter (for backwards compatibility) is
// KeepOnlyLastCommitDeleter: // KeepOnlyLastCommitDeleter:
@ -1167,149 +1062,6 @@ public class IndexWriter implements Closeable {
return config; return config;
} }
/**
* Expert: set the merge policy used by this writer.
*
* @deprecated use {@link IndexWriterConfig#setMergePolicy(MergePolicy)} instead.
*/
public void setMergePolicy(MergePolicy mp) {
ensureOpen();
if (mp == null)
throw new NullPointerException("MergePolicy must be non-null");
if (mergePolicy != mp)
mergePolicy.close();
mergePolicy = mp;
mergePolicy.setIndexWriter(this);
pushMaxBufferedDocs();
if (infoStream != null)
message("setMergePolicy " + mp);
// Required so config.getMergePolicy returns the right value. But this will
// go away together with the method in 4.0.
config.setMergePolicy(mp);
}
/**
* Expert: returns the current MergePolicy in use by this writer.
* @see #setMergePolicy
*
* @deprecated use {@link IndexWriterConfig#getMergePolicy()} instead
*/
public MergePolicy getMergePolicy() {
ensureOpen();
return mergePolicy;
}
/**
* Expert: set the merge scheduler used by this writer.
* @deprecated use {@link IndexWriterConfig#setMergeScheduler(MergeScheduler)} instead
*/
synchronized public void setMergeScheduler(MergeScheduler mergeScheduler) throws CorruptIndexException, IOException {
ensureOpen();
if (mergeScheduler == null)
throw new NullPointerException("MergeScheduler must be non-null");
if (this.mergeScheduler != mergeScheduler) {
finishMerges(true);
this.mergeScheduler.close();
}
this.mergeScheduler = mergeScheduler;
if (infoStream != null)
message("setMergeScheduler " + mergeScheduler);
// Required so config.getMergeScheduler returns the right value. But this will
// go away together with the method in 4.0.
config.setMergeScheduler(mergeScheduler);
}
/**
* Expert: returns the current MergeScheduler in use by this
* writer.
* @see #setMergeScheduler(MergeScheduler)
* @deprecated use {@link IndexWriterConfig#getMergeScheduler()} instead
*/
public MergeScheduler getMergeScheduler() {
ensureOpen();
return mergeScheduler;
}
/** <p>Determines the largest segment (measured by
* document count) that may be merged with other segments.
* Small values (e.g., less than 10,000) are best for
* interactive indexing, as this limits the length of
* pauses while indexing to a few seconds. Larger values
* are best for batched indexing and speedier
* searches.</p>
*
* <p>The default value is {@link Integer#MAX_VALUE}.</p>
*
* <p>Note that this method is a convenience method: it
* just calls mergePolicy.setMaxMergeDocs as long as
* mergePolicy is an instance of {@link LogMergePolicy}.
* Otherwise an IllegalArgumentException is thrown.</p>
*
* <p>The default merge policy ({@link
* LogByteSizeMergePolicy}) also allows you to set this
* limit by net size (in MB) of the segment, using {@link
* LogByteSizeMergePolicy#setMaxMergeMB}.</p>
* @deprecated use {@link LogMergePolicy#setMaxMergeDocs(int)} directly.
*/
public void setMaxMergeDocs(int maxMergeDocs) {
getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
}
/**
* <p>Returns the largest segment (measured by document
* count) that may be merged with other segments.</p>
*
* <p>Note that this method is a convenience method: it
* just calls mergePolicy.getMaxMergeDocs as long as
* mergePolicy is an instance of {@link LogMergePolicy}.
* Otherwise an IllegalArgumentException is thrown.</p>
*
* @see #setMaxMergeDocs
* @deprecated use {@link LogMergePolicy#getMaxMergeDocs()} directly.
*/
public int getMaxMergeDocs() {
return getLogMergePolicy().getMaxMergeDocs();
}
/**
* The maximum number of terms that will be indexed for a single field in a
* document. This limits the amount of memory required for indexing, so that
* collections with very large files will not crash the indexing process by
* running out of memory. This setting refers to the number of running terms,
* not to the number of different terms.<p/>
* <strong>Note:</strong> this silently truncates large documents, excluding from the
* index all terms that occur further in the document. If you know your source
* documents are large, be sure to set this value high enough to accomodate
* the expected size. If you set it to Integer.MAX_VALUE, then the only limit
* is your memory, but you should anticipate an OutOfMemoryError.<p/>
* By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms
* will be indexed for a field.
* @deprecated use {@link IndexWriterConfig#setMaxFieldLength(int)} instead
*/
public void setMaxFieldLength(int maxFieldLength) {
ensureOpen();
this.maxFieldLength = maxFieldLength;
docWriter.setMaxFieldLength(maxFieldLength);
if (infoStream != null)
message("setMaxFieldLength " + maxFieldLength);
// Required so config.getMaxFieldLength returns the right value. But this
// will go away together with the method in 4.0.
config.setMaxFieldLength(maxFieldLength);
}
/**
* Returns the maximum number of terms that will be
* indexed for a single field in a document.
* @see #setMaxFieldLength
* @deprecated use {@link IndexWriterConfig#getMaxFieldLength()} instead
*/
public int getMaxFieldLength() {
ensureOpen();
return maxFieldLength;
}
/** Determines the minimal number of documents required /** Determines the minimal number of documents required
* before the buffered in-memory documents are flushed as * before the buffered in-memory documents are flushed as
* a new Segment. Large values generally gives faster * a new Segment. Large values generally gives faster
@ -1543,7 +1295,8 @@ public class IndexWriter implements Closeable {
public void setInfoStream(PrintStream infoStream) { public void setInfoStream(PrintStream infoStream) {
ensureOpen(); ensureOpen();
setMessageID(infoStream); setMessageID(infoStream);
docWriter.setInfoStream(infoStream); // nocommit
//docWriter.setInfoStream(infoStream);
deleter.setInfoStream(infoStream); deleter.setInfoStream(infoStream);
if (infoStream != null) if (infoStream != null)
messageState(); messageState();
@ -1571,48 +1324,6 @@ public class IndexWriter implements Closeable {
return infoStream != null; return infoStream != null;
} }
/**
* Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see
* @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
* @deprecated use {@link IndexWriterConfig#setWriteLockTimeout(long)} instead
*/
public void setWriteLockTimeout(long writeLockTimeout) {
ensureOpen();
this.writeLockTimeout = writeLockTimeout;
// Required so config.getWriteLockTimeout returns the right value. But this
// will go away together with the method in 4.0.
config.setWriteLockTimeout(writeLockTimeout);
}
/**
* Returns allowed timeout when acquiring the write lock.
* @see #setWriteLockTimeout
* @deprecated use {@link IndexWriterConfig#getWriteLockTimeout()}
*/
public long getWriteLockTimeout() {
ensureOpen();
return writeLockTimeout;
}
/**
* Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in
* milliseconds).
* @deprecated use {@link IndexWriterConfig#setDefaultWriteLockTimeout(long)} instead
*/
public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
IndexWriterConfig.setDefaultWriteLockTimeout(writeLockTimeout);
}
/**
* Returns default write lock timeout for newly
* instantiated IndexWriters.
* @see #setDefaultWriteLockTimeout
* @deprecated use {@link IndexWriterConfig#getDefaultWriteLockTimeout()} instead
*/
public static long getDefaultWriteLockTimeout() {
return IndexWriterConfig.getDefaultWriteLockTimeout();
}
/** /**
* Commits all changes to an index and closes all * Commits all changes to an index and closes all
* associated files. Note that this may be a costly * associated files. Note that this may be a costly
@ -1774,8 +1485,9 @@ public class IndexWriter implements Closeable {
closing = false; closing = false;
notifyAll(); notifyAll();
if (!closed) { if (!closed) {
if (docWriter != null) if (docWriter != null) {
docWriter.resumeAllThreads(); docWriter.resumeAllThreads();
}
if (infoStream != null) if (infoStream != null)
message("hit exception while closing"); message("hit exception while closing");
} }
@ -1783,76 +1495,6 @@ public class IndexWriter implements Closeable {
} }
} }
/** Tells the docWriter to close its currently open shared
* doc stores (stored fields & vectors files).
* Return value specifices whether new doc store files are compound or not.
*/
private synchronized boolean flushDocStores() throws IOException {
boolean useCompoundDocStore = false;
String docStoreSegment;
boolean success = false;
try {
docStoreSegment = docWriter.closeDocStore();
success = true;
} finally {
if (!success && infoStream != null) {
message("hit exception closing doc store segment");
}
}
useCompoundDocStore = mergePolicy.useCompoundDocStore(segmentInfos);
if (useCompoundDocStore && docStoreSegment != null && docWriter.closedFiles().size() != 0) {
// Now build compound doc store file
if (infoStream != null) {
message("create compound file " + IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION));
}
success = false;
final int numSegments = segmentInfos.size();
final String compoundFileName = IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
try {
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName);
for (final String file : docWriter.closedFiles() ) {
cfsWriter.addFile(file);
}
// Perform the merge
cfsWriter.close();
success = true;
} finally {
if (!success) {
if (infoStream != null)
message("hit exception building compound file doc store for segment " + docStoreSegment);
deleter.deleteFile(compoundFileName);
docWriter.abort();
}
}
for(int i=0;i<numSegments;i++) {
SegmentInfo si = segmentInfos.info(i);
if (si.getDocStoreOffset() != -1 &&
si.getDocStoreSegment().equals(docStoreSegment))
si.setDocStoreIsCompoundFile(true);
}
checkpoint();
// In case the files we just merged into a CFS were
// not previously checkpointed:
deleter.deleteNewFiles(docWriter.closedFiles());
}
return useCompoundDocStore;
}
/** Returns the Directory used by this index. */ /** Returns the Directory used by this index. */
public Directory getDirectory() { public Directory getDirectory() {
// Pass false because the flush during closing calls getDirectory // Pass false because the flush during closing calls getDirectory
@ -1912,22 +1554,6 @@ public class IndexWriter implements Closeable {
return false; return false;
} }
/**
* The maximum number of terms that will be indexed for a single field in a
* document. This limits the amount of memory required for indexing, so that
* collections with very large files will not crash the indexing process by
* running out of memory.<p/>
* Note that this effectively truncates large documents, excluding from the
* index terms that occur further in the document. If you know your source
* documents are large, be sure to set this value high enough to accommodate
* the expected size. If you set it to Integer.MAX_VALUE, then the only limit
* is your memory, but you should anticipate an OutOfMemoryError.<p/>
* By default, no more than 10,000 terms will be indexed for a field.
*
* @see MaxFieldLength
*/
private int maxFieldLength;
/** /**
* Adds a document to this index. If the document contains more than * Adds a document to this index. If the document contains more than
* {@link #setMaxFieldLength(int)} terms for a given field, the remainder are * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
@ -1972,8 +1598,8 @@ public class IndexWriter implements Closeable {
* @throws CorruptIndexException if the index is corrupt * @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error * @throws IOException if there is a low-level IO error
*/ */
public void addDocument(Document doc) throws CorruptIndexException, IOException { public long addDocument(Document doc) throws CorruptIndexException, IOException {
addDocument(doc, analyzer); return addDocument(doc, analyzer);
} }
/** /**
@ -1993,36 +1619,36 @@ public class IndexWriter implements Closeable {
* @throws CorruptIndexException if the index is corrupt * @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error * @throws IOException if there is a low-level IO error
*/ */
public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { public long addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
boolean doFlush = false;
boolean success = false; boolean success = false;
try { try {
try { try {
doFlush = docWriter.addDocument(doc, analyzer); long sequenceID = docWriter.addDocument(doc, analyzer);
success = true; success = true;
return sequenceID;
} finally { } finally {
if (!success) { if (!success) {
if (infoStream != null) {
if (infoStream != null)
message("hit exception adding document"); message("hit exception adding document");
}
synchronized (this) { synchronized (this) {
// If docWriter has some aborted files that were // If docWriter has some aborted files that were
// never incref'd, then we clean them up here // never incref'd, then we clean them up here
if (docWriter != null) { if (docWriter != null) {
final Collection<String> files = docWriter.abortedFiles(); final Collection<String> files = docWriter.abortedFiles();
if (files != null) if (files != null) {
deleter.deleteNewFiles(files); deleter.deleteNewFiles(files);
}
} }
} }
} }
} }
if (doFlush)
flush(true, false, false);
} catch (OutOfMemoryError oom) { } catch (OutOfMemoryError oom) {
handleOOM(oom, "addDocument"); handleOOM(oom, "addDocument");
} }
return -1;
} }
/** /**
@ -2036,15 +1662,14 @@ public class IndexWriter implements Closeable {
* @throws CorruptIndexException if the index is corrupt * @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error * @throws IOException if there is a low-level IO error
*/ */
public void deleteDocuments(Term term) throws CorruptIndexException, IOException { public long deleteDocuments(Term term) throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
try { try {
boolean doFlush = docWriter.bufferDeleteTerm(term); return docWriter.bufferDeleteTerm(term);
if (doFlush)
flush(true, false, false);
} catch (OutOfMemoryError oom) { } catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Term)"); handleOOM(oom, "deleteDocuments(Term)");
} }
return -1;
} }
/** /**
@ -2060,15 +1685,14 @@ public class IndexWriter implements Closeable {
* @throws CorruptIndexException if the index is corrupt * @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error * @throws IOException if there is a low-level IO error
*/ */
public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException { public long deleteDocuments(Term... terms) throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
try { try {
boolean doFlush = docWriter.bufferDeleteTerms(terms); return docWriter.bufferDeleteTerms(terms);
if (doFlush)
flush(true, false, false);
} catch (OutOfMemoryError oom) { } catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Term..)"); handleOOM(oom, "deleteDocuments(Term..)");
} }
return -1;
} }
/** /**
@ -2082,11 +1706,9 @@ public class IndexWriter implements Closeable {
* @throws CorruptIndexException if the index is corrupt * @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error * @throws IOException if there is a low-level IO error
*/ */
public void deleteDocuments(Query query) throws CorruptIndexException, IOException { public long deleteDocuments(Query query) throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
boolean doFlush = docWriter.bufferDeleteQuery(query); return docWriter.bufferDeleteQuery(query);
if (doFlush)
flush(true, false, false);
} }
/** /**
@ -2102,11 +1724,9 @@ public class IndexWriter implements Closeable {
* @throws CorruptIndexException if the index is corrupt * @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error * @throws IOException if there is a low-level IO error
*/ */
public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException { public long deleteDocuments(Query... queries) throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
boolean doFlush = docWriter.bufferDeleteQueries(queries); return docWriter.bufferDeleteQueries(queries);
if (doFlush)
flush(true, false, false);
} }
/** /**
@ -2149,35 +1769,37 @@ public class IndexWriter implements Closeable {
* @throws CorruptIndexException if the index is corrupt * @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error * @throws IOException if there is a low-level IO error
*/ */
public void updateDocument(Term term, Document doc, Analyzer analyzer) public long updateDocument(Term term, Document doc, Analyzer analyzer)
throws CorruptIndexException, IOException { throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
try { try {
boolean doFlush = false;
boolean success = false; boolean success = false;
try { try {
doFlush = docWriter.updateDocument(term, doc, analyzer); long sequenceID = docWriter.updateDocument(term, doc, analyzer);
success = true; success = true;
return sequenceID;
} finally { } finally {
if (!success) { if (!success) {
if (infoStream != null) if (infoStream != null) {
message("hit exception updating document"); message("hit exception updating document");
}
synchronized (this) { synchronized (this) {
// If docWriter has some aborted files that were // If docWriter has some aborted files that were
// never incref'd, then we clean them up here // never incref'd, then we clean them up here
final Collection<String> files = docWriter.abortedFiles(); final Collection<String> files = docWriter.abortedFiles();
if (files != null) if (files != null) {
deleter.deleteNewFiles(files); deleter.deleteNewFiles(files);
}
} }
} }
} }
if (doFlush)
flush(true, false, false);
} catch (OutOfMemoryError oom) { } catch (OutOfMemoryError oom) {
handleOOM(oom, "updateDocument"); handleOOM(oom, "updateDocument");
} }
return -1;
} }
// for test purpose // for test purpose
@ -2697,7 +2319,8 @@ public class IndexWriter implements Closeable {
// Remove any buffered docs // Remove any buffered docs
docWriter.abort(); docWriter.abort();
docWriter.setFlushedDocCount(0); // nocommit
//docWriter.setFlushedDocCount(0);
// Remove all segments // Remove all segments
segmentInfos.clear(); segmentInfos.clear();
@ -2790,7 +2413,8 @@ public class IndexWriter implements Closeable {
* the index files referenced exist (correctly) in the * the index files referenced exist (correctly) in the
* index directory. * index directory.
*/ */
private synchronized void checkpoint() throws IOException { // nocommit - private
synchronized void checkpoint() throws IOException {
changeCount++; changeCount++;
deleter.checkpoint(segmentInfos, false); deleter.checkpoint(segmentInfos, false);
} }
@ -2925,7 +2549,8 @@ public class IndexWriter implements Closeable {
ensureOpen(); ensureOpen();
segmentInfos.addAll(infos); segmentInfos.addAll(infos);
// Notify DocumentsWriter that the flushed count just increased // Notify DocumentsWriter that the flushed count just increased
docWriter.updateFlushedDocCount(docCount); // nocommit
//docWriter.updateFlushedDocCount(docCount);
checkpoint(); checkpoint();
} }
@ -2977,11 +2602,12 @@ public class IndexWriter implements Closeable {
checkpoint(); checkpoint();
// Notify DocumentsWriter that the flushed count just increased // Notify DocumentsWriter that the flushed count just increased
docWriter.updateFlushedDocCount(docCount); // nocommit
//docWriter.updateFlushedDocCount(docCount);
} }
// Now create the compound file if needed // Now create the compound file if needed
if (mergePolicy instanceof LogMergePolicy && getUseCompoundFile()) { if (mergePolicy instanceof LogMergePolicy && getLogMergePolicy().getUseCompoundFile()) {
List<String> files = null; List<String> files = null;
@ -3211,183 +2837,17 @@ public class IndexWriter implements Closeable {
// synchronized, ie, merges should be allowed to commit // synchronized, ie, merges should be allowed to commit
// even while a flush is happening // even while a flush is happening
private synchronized final boolean doFlush(boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException { private synchronized final boolean doFlush(boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
try { return docWriter.flushAllThreads(flushDocStores, flushDeletes);
try { // nocommit
return doFlushInternal(flushDocStores, flushDeletes); // try {
} finally { // try {
docWriter.balanceRAM(); // return doFlushInternal(flushDocStores, flushDeletes);
} // } finally {
} finally { // docWriter.balanceRAM();
docWriter.clearFlushPending(); // }
} // } finally {
} // docWriter.clearFlushPending();
// }
// TODO: this method should not have to be entirely
// synchronized, ie, merges should be allowed to commit
// even while a flush is happening
private synchronized final boolean doFlushInternal(boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException {
if (hitOOM) {
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");
}
ensureOpen(false);
assert testPoint("startDoFlush");
doBeforeFlush();
flushCount++;
// If we are flushing because too many deletes
// accumulated, then we should apply the deletes to free
// RAM:
flushDeletes |= docWriter.doApplyDeletes();
// Make sure no threads are actively adding a document.
// Returns true if docWriter is currently aborting, in
// which case we skip flushing this segment
if (infoStream != null) {
message("flush: now pause all indexing threads");
}
if (docWriter.pauseAllThreads()) {
docWriter.resumeAllThreads();
return false;
}
try {
SegmentInfo newSegment = null;
final int numDocs = docWriter.getNumDocsInRAM();
// Always flush docs if there are any
boolean flushDocs = numDocs > 0;
String docStoreSegment = docWriter.getDocStoreSegment();
assert docStoreSegment != null || numDocs == 0: "dss=" + docStoreSegment + " numDocs=" + numDocs;
if (docStoreSegment == null)
flushDocStores = false;
int docStoreOffset = docWriter.getDocStoreOffset();
boolean docStoreIsCompoundFile = false;
if (infoStream != null) {
message(" flush: segment=" + docWriter.getSegment() +
" docStoreSegment=" + docWriter.getDocStoreSegment() +
" docStoreOffset=" + docStoreOffset +
" flushDocs=" + flushDocs +
" flushDeletes=" + flushDeletes +
" flushDocStores=" + flushDocStores +
" numDocs=" + numDocs +
" numBufDelTerms=" + docWriter.getNumBufferedDeleteTerms());
message(" index before flush " + segString());
}
// Check if the doc stores must be separately flushed
// because other segments, besides the one we are about
// to flush, reference it
if (flushDocStores && (!flushDocs || !docWriter.getSegment().equals(docWriter.getDocStoreSegment()))) {
// We must separately flush the doc store
if (infoStream != null)
message(" flush shared docStore segment " + docStoreSegment);
docStoreIsCompoundFile = flushDocStores();
flushDocStores = false;
}
String segment = docWriter.getSegment();
// If we are flushing docs, segment must not be null:
assert segment != null || !flushDocs;
if (flushDocs) {
boolean success = false;
final int flushedDocCount;
try {
flushedDocCount = docWriter.flush(flushDocStores);
success = true;
} finally {
if (!success) {
if (infoStream != null)
message("hit exception flushing segment " + segment);
deleter.refresh(segment);
}
}
if (0 == docStoreOffset && flushDocStores) {
// This means we are flushing private doc stores
// with this segment, so it will not be shared
// with other segments
assert docStoreSegment != null;
assert docStoreSegment.equals(segment);
docStoreOffset = -1;
docStoreIsCompoundFile = false;
docStoreSegment = null;
}
// Create new SegmentInfo, but do not add to our
// segmentInfos until deletes are flushed
// successfully.
newSegment = new SegmentInfo(segment,
flushedDocCount,
directory, false, docStoreOffset,
docStoreSegment, docStoreIsCompoundFile,
docWriter.hasProx(),
docWriter.getCodec());
setDiagnostics(newSegment, "flush");
}
docWriter.pushDeletes();
if (flushDocs) {
segmentInfos.add(newSegment);
checkpoint();
}
if (flushDocs && mergePolicy.useCompoundFile(segmentInfos, newSegment)) {
// Now build compound file
boolean success = false;
try {
docWriter.createCompoundFile(segment);
success = true;
} finally {
if (!success) {
if (infoStream != null)
message("hit exception creating compound file for newly flushed segment " + segment);
deleter.deleteFile(IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
}
}
newSegment.setUseCompoundFile(true);
checkpoint();
}
if (flushDeletes) {
applyDeletes();
}
if (flushDocs)
checkpoint();
doAfterFlush();
return flushDocs;
} catch (OutOfMemoryError oom) {
handleOOM(oom, "doFlush");
// never hit
return false;
} finally {
docWriter.clearFlushPending();
docWriter.resumeAllThreads();
}
} }
/** Expert: Return the total size of all index files currently cached in memory. /** Expert: Return the total size of all index files currently cached in memory.
@ -3535,7 +2995,8 @@ public class IndexWriter implements Closeable {
final int start = ensureContiguousMerge(merge); final int start = ensureContiguousMerge(merge);
commitMergedDeletes(merge, mergedReader); commitMergedDeletes(merge, mergedReader);
docWriter.remapDeletes(segmentInfos, merger.getDocMaps(), merger.getDelCounts(), merge, mergedDocCount); // nocommit
//docWriter.remapDeletes(segmentInfos, merger.getDocMaps(), merger.getDelCounts(), merge, mergedDocCount);
setMergeDocStoreIsCompoundFile(merge); setMergeDocStoreIsCompoundFile(merge);
merge.info.setHasProx(merger.hasProx()); merge.info.setHasProx(merger.hasProx());
@ -3749,7 +3210,8 @@ public class IndexWriter implements Closeable {
boolean mergeDocStores = false; boolean mergeDocStores = false;
boolean doFlushDocStore = false; boolean doFlushDocStore = false;
final String currentDocStoreSegment = docWriter.getDocStoreSegment(); // nocommit
//final String currentDocStoreSegment = docWriter.getDocStoreSegment();
// Test each segment to be merged: check if we need to // Test each segment to be merged: check if we need to
// flush/merge doc stores // flush/merge doc stores
@ -3793,9 +3255,10 @@ public class IndexWriter implements Closeable {
// If the segment is referencing the current "live" // If the segment is referencing the current "live"
// doc store outputs then we must merge // doc store outputs then we must merge
if (si.getDocStoreOffset() != -1 && currentDocStoreSegment != null && si.getDocStoreSegment().equals(currentDocStoreSegment)) { // nocommit
doFlushDocStore = true; // if (si.getDocStoreOffset() != -1 && currentDocStoreSegment != null && si.getDocStoreSegment().equals(currentDocStoreSegment)) {
} // doFlushDocStore = true;
// }
} }
final int docStoreOffset; final int docStoreOffset;
@ -3854,12 +3317,13 @@ public class IndexWriter implements Closeable {
// CFS: // CFS:
mergingSegments.add(merge.info); mergingSegments.add(merge.info);
} }
private void setDiagnostics(SegmentInfo info, String source) { // nocommit - private
static void setDiagnostics(SegmentInfo info, String source) {
setDiagnostics(info, source, null); setDiagnostics(info, source, null);
} }
private void setDiagnostics(SegmentInfo info, String source, Map<String,String> details) { private static void setDiagnostics(SegmentInfo info, String source, Map<String,String> details) {
Map<String,String> diagnostics = new HashMap<String,String>(); Map<String,String> diagnostics = new HashMap<String,String>();
diagnostics.put("source", source); diagnostics.put("source", source);
diagnostics.put("lucene.version", Constants.LUCENE_VERSION); diagnostics.put("lucene.version", Constants.LUCENE_VERSION);
@ -4003,11 +3467,12 @@ public class IndexWriter implements Closeable {
// readers will attempt to open an IndexInput // readers will attempt to open an IndexInput
// on files that have still-open IndexOutputs // on files that have still-open IndexOutputs
// against them: // against them:
if (dss.contains(docWriter.getDocStoreSegment())) { // nocommit
if (infoStream != null) // if (dss.contains(docWriter.getDocStoreSegment())) {
message("now flush at mergeMiddle"); // if (infoStream != null)
doFlush(true, false); // message("now flush at mergeMiddle");
} // doFlush(true, false);
// }
} }
for(int i=0;i<numSegments;i++) { for(int i=0;i<numSegments;i++) {
@ -4227,12 +3692,16 @@ public class IndexWriter implements Closeable {
// For test purposes. // For test purposes.
final synchronized int getBufferedDeleteTermsSize() { final synchronized int getBufferedDeleteTermsSize() {
return docWriter.getBufferedDeleteTerms().size(); // nocommit
return 0;
//return docWriter.getBufferedDeleteTerms().size();
} }
// For test purposes. // For test purposes.
final synchronized int getNumBufferedDeleteTerms() { final synchronized int getNumBufferedDeleteTerms() {
return docWriter.getNumBufferedDeleteTerms(); // nocommit
return 0;
//return docWriter.getNumBufferedDeleteTerms();
} }
// utility routines for tests // utility routines for tests

View File

@ -1,114 +0,0 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import java.io.IOException;
final class TermsHashPerThread extends InvertedDocConsumerPerThread {
final TermsHash termsHash;
final TermsHashConsumerPerThread consumer;
final TermsHashPerThread nextPerThread;
final IntBlockPool intPool;
final ByteBlockPool bytePool;
final ByteBlockPool termBytePool;
final boolean primary;
final DocumentsWriter.DocState docState;
// Used when comparing postings via termRefComp, in TermsHashPerField
final BytesRef tr1 = new BytesRef();
final BytesRef tr2 = new BytesRef();
// Used by perField:
final BytesRef utf8 = new BytesRef(10);
public TermsHashPerThread(DocInverterPerThread docInverterPerThread, final TermsHash termsHash, final TermsHash nextTermsHash, final TermsHashPerThread primaryPerThread) {
docState = docInverterPerThread.docState;
this.termsHash = termsHash;
this.consumer = termsHash.consumer.addThread(this);
intPool = new IntBlockPool(termsHash.docWriter);
bytePool = new ByteBlockPool(termsHash.docWriter.byteBlockAllocator);
if (nextTermsHash != null) {
// We are primary
primary = true;
termBytePool = bytePool;
} else {
primary = false;
termBytePool = primaryPerThread.bytePool;
}
if (nextTermsHash != null)
nextPerThread = nextTermsHash.addThread(docInverterPerThread, this);
else
nextPerThread = null;
}
@Override
InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
return new TermsHashPerField(docInverterPerField, this, nextPerThread, fieldInfo);
}
@Override
synchronized public void abort() {
reset(true);
consumer.abort();
if (nextPerThread != null)
nextPerThread.abort();
}
@Override
public void startDocument() throws IOException {
consumer.startDocument();
if (nextPerThread != null)
nextPerThread.consumer.startDocument();
}
@Override
public DocumentsWriter.DocWriter finishDocument() throws IOException {
final DocumentsWriter.DocWriter doc = consumer.finishDocument();
final DocumentsWriter.DocWriter doc2;
if (nextPerThread != null)
doc2 = nextPerThread.consumer.finishDocument();
else
doc2 = null;
if (doc == null)
return doc2;
else {
doc.setNext(doc2);
return doc;
}
}
// Clear all state
void reset(boolean recyclePostings) {
intPool.reset();
bytePool.reset();
if (primary) {
bytePool.reset();
}
}
}