From 2d93f7e288327bc6a4ee77bd4f0d1edbf6f913a1 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 23 Jun 2009 20:32:36 +0000 Subject: [PATCH] LUCENE-1708: optimize deletes X matching reader when merging stored fields & vectors git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@787827 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 3 + common-build.xml | 2 +- .../org/apache/lucene/index/IndexReader.java | 46 ++- .../apache/lucene/index/SegmentMerger.java | 388 +++++++++++------- .../apache/lucene/index/SegmentReader.java | 7 - .../lucene/index/TestSegmentReader.java | 6 - 6 files changed, 276 insertions(+), 176 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 083548ec326..57677416c64 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -112,6 +112,9 @@ Changes in runtime behavior rely on this behavior by the 3.0 release of Lucene. (Jonathan Mamou, Mark Miller via Mike McCandless) + 7. LUCENE-1708 - IndexReader.document() no longer checks if the document is + deleted. You can call IndexReader.isDeleted(n) prior to calling document(n). + (Shai Erera via Mike McCandless) API Changes diff --git a/common-build.xml b/common-build.xml index f4ca41faba3..1d27e3be304 100644 --- a/common-build.xml +++ b/common-build.xml @@ -42,7 +42,7 @@ - + diff --git a/src/java/org/apache/lucene/index/IndexReader.java b/src/java/org/apache/lucene/index/IndexReader.java index 8dd89d8be49..4bb7f5a1558 100644 --- a/src/java/org/apache/lucene/index/IndexReader.java +++ b/src/java/org/apache/lucene/index/IndexReader.java @@ -819,8 +819,16 @@ public abstract class IndexReader implements Cloneable { return maxDoc() - numDocs(); } - /** Returns the stored fields of the nth - Document in this index. + /** + * Returns the stored fields of the nth + * Document in this index. + *

+ * NOTE: for performance reasons, this method does not check if the + * requested document is deleted, and therefore asking for a deleted document + * may yield unspecified results. Usually this is not required, however you + * can call {@link #isDeleted(int)} with the requested document ID to verify + * the document is not deleted. + * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ @@ -830,30 +838,38 @@ public abstract class IndexReader implements Cloneable { } /** - * Get the {@link org.apache.lucene.document.Document} at the nth position. The {@link org.apache.lucene.document.FieldSelector} - * may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded. + * Get the {@link org.apache.lucene.document.Document} at the n + * th position. The {@link FieldSelector} may be used to determine + * what {@link org.apache.lucene.document.Field}s to load and how they should + * be loaded. NOTE: If this Reader (more specifically, the underlying + * FieldsReader) is closed before the lazy + * {@link org.apache.lucene.document.Field} is loaded an exception may be + * thrown. If you want the value of a lazy + * {@link org.apache.lucene.document.Field} to be available after closing you + * must explicitly load it or fetch the Document again with a new loader. + *

+ * NOTE: for performance reasons, this method does not check if the + * requested document is deleted, and therefore asking for a deleted document + * may yield unspecified results. Usually this is not required, however you + * can call {@link #isDeleted(int)} with the requested document ID to verify + * the document is not deleted. * - * NOTE: If this Reader (more specifically, the underlying FieldsReader) is closed before the lazy {@link org.apache.lucene.document.Field} is - * loaded an exception may be thrown. If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must - * explicitly load it or fetch the Document again with a new loader. - * - * * @param n Get the document at the nth position - * @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded. - * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position + * @param fieldSelector The {@link FieldSelector} to use to determine what + * Fields should be loaded on the Document. May be null, in which case + * all Fields will be loaded. + * @return The stored fields of the + * {@link org.apache.lucene.document.Document} at the nth position * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error - * * @see org.apache.lucene.document.Fieldable * @see org.apache.lucene.document.FieldSelector * @see org.apache.lucene.document.SetBasedFieldSelector * @see org.apache.lucene.document.LoadFirstFieldSelector */ - //When we convert to JDK 1.5 make this Set + // TODO (1.5): When we convert to JDK 1.5 make this Set public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException; - - /** Returns true if document n has been deleted */ public abstract boolean isDeleted(int n); diff --git a/src/java/org/apache/lucene/index/SegmentMerger.java b/src/java/org/apache/lucene/index/SegmentMerger.java index 3d30bfe0c65..4bc809f6f5e 100644 --- a/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/src/java/org/apache/lucene/index/SegmentMerger.java @@ -26,6 +26,8 @@ import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelectorResult; +import org.apache.lucene.index.IndexReader.FieldOption; +import org.apache.lucene.index.MergePolicy.MergeAbortedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -55,7 +57,7 @@ final class SegmentMerger { private int mergedDocs; - private CheckAbort checkAbort; + private final CheckAbort checkAbort; // Whether we should merge doc stores (stored fields and // vectors files). When all segments we are merging @@ -75,13 +77,25 @@ final class SegmentMerger { SegmentMerger(Directory dir, String name) { directory = dir; segment = name; + checkAbort = new CheckAbort(null, null) { + public void work(double units) throws MergeAbortedException { + // do nothing + } + }; } SegmentMerger(IndexWriter writer, String name, MergePolicy.OneMerge merge) { directory = writer.getDirectory(); segment = name; - if (merge != null) + if (merge != null) { checkAbort = new CheckAbort(merge, directory); + } else { + checkAbort = new CheckAbort(null, null) { + public void work(double units) throws MergeAbortedException { + // do nothing + } + }; + } termIndexInterval = writer.getTermIndexInterval(); } @@ -152,9 +166,8 @@ final class SegmentMerger { * @throws IOException */ final void closeReaders() throws IOException { - for (int i = 0; i < readers.size(); i++) { // close readers - IndexReader reader = (IndexReader) readers.get(i); - reader.close(); + for (Iterator iter = readers.iterator(); iter.hasNext();) { + ((IndexReader) iter.next()).close(); } } @@ -206,12 +219,17 @@ final class SegmentMerger { return files; } - private void addIndexed(IndexReader reader, FieldInfos fieldInfos, Collection names, boolean storeTermVectors, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean storePayloads, boolean omitTermFreqAndPositions) throws IOException { + private void addIndexed(IndexReader reader, FieldInfos fInfos, + Collection names, boolean storeTermVectors, + boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, + boolean storePayloads, boolean omitTFAndPositions) + throws IOException { Iterator i = names.iterator(); while (i.hasNext()) { - String field = (String)i.next(); - fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field), storePayloads, omitTermFreqAndPositions); + String field = (String) i.next(); + fInfos.add(field, true, storeTermVectors, + storePositionWithTermVector, storeOffsetWithTermVector, !reader + .hasNorms(field), storePayloads, omitTFAndPositions); } } @@ -223,22 +241,26 @@ final class SegmentMerger { // If the i'th reader is a SegmentReader and has // identical fieldName -> number mapping, then this // array will be non-null at position i: - matchingSegmentReaders = new SegmentReader[readers.size()]; + int numReaders = readers.size(); + matchingSegmentReaders = new SegmentReader[numReaders]; // If this reader is a SegmentReader, and all of its // field name -> number mappings match the "merged" // FieldInfos, then we can do a bulk copy of the // stored fields: - for (int i = 0; i < readers.size(); i++) { + for (int i = 0; i < numReaders; i++) { IndexReader reader = (IndexReader) readers.get(i); if (reader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; boolean same = true; FieldInfos segmentFieldInfos = segmentReader.getFieldInfos(); - for (int j = 0; same && j < segmentFieldInfos.size(); j++) + int numFieldInfos = segmentFieldInfos.size(); + for (int j = 0; same && j < numFieldInfos; j++) { same = fieldInfos.fieldName(j).equals(segmentFieldInfos.fieldName(j)); - if (same) + } + if (same) { matchingSegmentReaders[i] = segmentReader; + } } } @@ -268,23 +290,28 @@ final class SegmentMerger { fieldInfos = new FieldInfos(); // merge field names } - for (int i = 0; i < readers.size(); i++) { - IndexReader reader = (IndexReader) readers.get(i); + for (Iterator iter = readers.iterator(); iter.hasNext();) { + IndexReader reader = (IndexReader) iter.next(); if (reader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; - for (int j = 0; j < segmentReader.getFieldInfos().size(); j++) { - FieldInfo fi = segmentReader.getFieldInfos().fieldInfo(j); - fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.hasNorms(fi.name), fi.storePayloads, fi.omitTermFreqAndPositions); + FieldInfos readerFieldInfos = segmentReader.getFieldInfos(); + int numReaderFieldInfos = readerFieldInfos.size(); + for (int j = 0; j < numReaderFieldInfos; j++) { + FieldInfo fi = readerFieldInfos.fieldInfo(j); + fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector, + fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, + !reader.hasNorms(fi.name), fi.storePayloads, + fi.omitTermFreqAndPositions); } } else { - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false); - fieldInfos.add(reader.getFieldNames(IndexReader.FieldOption.UNINDEXED), false); + addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.TERMVECTOR), true, false, false, false, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); + addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false); + addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false); + fieldInfos.add(reader.getFieldNames(FieldOption.UNINDEXED), false); } } fieldInfos.write(directory, segment + ".fnm"); @@ -307,64 +334,23 @@ final class SegmentMerger { final FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); try { - for (int i = 0; i < readers.size(); i++) { - final IndexReader reader = (IndexReader) readers.get(i); - final SegmentReader matchingSegmentReader = matchingSegmentReaders[i]; - final FieldsReader matchingFieldsReader; - final boolean hasMatchingReader; + int idx = 0; + for (Iterator iter = readers.iterator(); iter.hasNext();) { + final IndexReader reader = (IndexReader) iter.next(); + final SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; + FieldsReader matchingFieldsReader = null; if (matchingSegmentReader != null) { final FieldsReader fieldsReader = matchingSegmentReader.getFieldsReader(); - if (fieldsReader != null && !fieldsReader.canReadRawDocs()) { - matchingFieldsReader = null; - hasMatchingReader = false; - } else { + if (fieldsReader != null && fieldsReader.canReadRawDocs()) { matchingFieldsReader = fieldsReader; - hasMatchingReader = true; } - } else { - hasMatchingReader = false; - matchingFieldsReader = null; } - final int maxDoc = reader.maxDoc(); - final boolean hasDeletions = reader.hasDeletions(); - for (int j = 0; j < maxDoc;) { - if (!hasDeletions || !reader.isDeleted(j)) { // skip deleted docs - if (hasMatchingReader) { - // We can optimize this case (doing a bulk - // byte copy) since the field numbers are - // identical - int start = j; - int numDocs = 0; - do { - j++; - numDocs++; - if (j >= maxDoc) - break; - if (hasDeletions && matchingSegmentReader.isDeleted(j)) { - j++; - break; - } - } while(numDocs < MAX_RAW_MERGE_DOCS); - - IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs); - fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs); - docCount += numDocs; - if (checkAbort != null) - checkAbort.work(300*numDocs); - } else { - // NOTE: it's very important to first assign - // to doc then pass it to - // termVectorsWriter.addAllDocVectors; see - // LUCENE-1282 - Document doc = reader.document(j, fieldSelectorMerge); - fieldsWriter.addDocument(doc); - j++; - docCount++; - if (checkAbort != null) - checkAbort.work(300); - } - } else - j++; + if (reader.hasDeletions()) { + docCount += copyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter, + reader, matchingFieldsReader); + } else { + docCount += copyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter, + reader, matchingFieldsReader); } } } finally { @@ -385,12 +371,89 @@ final class SegmentMerger { // If we are skipping the doc stores, that means there // are no deletions in any of these segments, so we // just sum numDocs() of each segment to get total docCount - for (int i = 0; i < readers.size(); i++) - docCount += ((IndexReader) readers.get(i)).numDocs(); + for (Iterator iter = readers.iterator(); iter.hasNext();) { + docCount += ((IndexReader) iter.next()).numDocs(); + } return docCount; } + private int copyFieldsWithDeletions(final FieldSelector fieldSelectorMerge, + final FieldsWriter fieldsWriter, final IndexReader reader, + final FieldsReader matchingFieldsReader) + throws IOException, MergeAbortedException, CorruptIndexException { + int docCount = 0; + final int maxDoc = reader.maxDoc(); + if (matchingFieldsReader != null) { + // We can bulk-copy because the fieldInfos are "congruent" + for (int j = 0; j < maxDoc;) { + if (reader.isDeleted(j)) { + // skip deleted docs + ++j; + continue; + } + // We can optimize this case (doing a bulk byte copy) since the field + // numbers are identical + int start = j, numDocs = 0; + do { + j++; + numDocs++; + if (j >= maxDoc) break; + if (reader.isDeleted(j)) { + j++; + break; + } + } while(numDocs < MAX_RAW_MERGE_DOCS); + + IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs); + fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs); + docCount += numDocs; + checkAbort.work(300 * numDocs); + } + } else { + for (int j = 0; j < maxDoc; j++) { + if (reader.isDeleted(j)) { + // skip deleted docs + continue; + } + // NOTE: it's very important to first assign to doc then pass it to + // termVectorsWriter.addAllDocVectors; see LUCENE-1282 + Document doc = reader.document(j, fieldSelectorMerge); + fieldsWriter.addDocument(doc); + docCount++; + checkAbort.work(300); + } + } + return docCount; + } + + private int copyFieldsNoDeletions(FieldSelector fieldSelectorMerge, + final FieldsWriter fieldsWriter, final IndexReader reader, + final FieldsReader matchingFieldsReader) + throws IOException, MergeAbortedException, CorruptIndexException { + final int maxDoc = reader.maxDoc(); + int docCount = 0; + if (matchingFieldsReader != null) { + // We can bulk-copy because the fieldInfos are "congruent" + while (docCount < maxDoc) { + int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); + IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, docCount, len); + fieldsWriter.addRawDocuments(stream, rawDocLengths, len); + docCount += len; + checkAbort.work(300 * len); + } + } else { + for (; docCount < maxDoc; docCount++) { + // NOTE: it's very important to first assign to doc then pass it to + // termVectorsWriter.addAllDocVectors; see LUCENE-1282 + Document doc = reader.document(docCount, fieldSelectorMerge); + fieldsWriter.addDocument(doc); + checkAbort.work(300); + } + } + return docCount; + } + /** * Merge the TermVectors from each of the segments into the new one. * @throws IOException @@ -400,65 +463,24 @@ final class SegmentMerger { new TermVectorsWriter(directory, segment, fieldInfos); try { - for (int r = 0; r < readers.size(); r++) { - final SegmentReader matchingSegmentReader = matchingSegmentReaders[r]; - TermVectorsReader matchingVectorsReader; - final boolean hasMatchingReader; + int idx = 0; + for (Iterator iter = readers.iterator(); iter.hasNext();) { + final SegmentReader matchingSegmentReader = matchingSegmentReaders[idx++]; + TermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { - matchingVectorsReader = matchingSegmentReader.termVectorsReaderOrig; + TermVectorsReader vectorsReader = matchingSegmentReader.termVectorsReaderOrig; - // If the TV* files are an older format then they - // cannot read raw docs: - if (matchingVectorsReader != null && !matchingVectorsReader.canReadRawDocs()) { - matchingVectorsReader = null; - hasMatchingReader = false; - } else - hasMatchingReader = matchingVectorsReader != null; - - } else { - hasMatchingReader = false; - matchingVectorsReader = null; + // If the TV* files are an older format then they cannot read raw docs: + if (vectorsReader != null && vectorsReader.canReadRawDocs()) { + matchingVectorsReader = vectorsReader; + } } - IndexReader reader = (IndexReader) readers.get(r); - final boolean hasDeletions = reader.hasDeletions(); - int maxDoc = reader.maxDoc(); - for (int docNum = 0; docNum < maxDoc;) { - // skip deleted docs - if (!hasDeletions || !reader.isDeleted(docNum)) { - if (hasMatchingReader) { - // We can optimize this case (doing a bulk - // byte copy) since the field numbers are - // identical - int start = docNum; - int numDocs = 0; - do { - docNum++; - numDocs++; - if (docNum >= maxDoc) - break; - if (hasDeletions && matchingSegmentReader.isDeleted(docNum)) { - docNum++; - break; - } - } while(numDocs < MAX_RAW_MERGE_DOCS); - - matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs); - termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); - if (checkAbort != null) - checkAbort.work(300*numDocs); - } else { - // NOTE: it's very important to first assign - // to vectors then pass it to - // termVectorsWriter.addAllDocVectors; see - // LUCENE-1282 - TermFreqVector[] vectors = reader.getTermFreqVectors(docNum); - termVectorsWriter.addAllDocVectors(vectors); - docNum++; - if (checkAbort != null) - checkAbort.work(300); - } - } else - docNum++; + final IndexReader reader = (IndexReader) iter.next(); + if (reader.hasDeletions()) { + copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader); + } else { + copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader); + } } } finally { @@ -476,6 +498,78 @@ final class SegmentMerger { throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index corruption"); } + private void copyVectorsWithDeletions(final TermVectorsWriter termVectorsWriter, + final TermVectorsReader matchingVectorsReader, + final IndexReader reader) + throws IOException, MergeAbortedException { + final int maxDoc = reader.maxDoc(); + if (matchingVectorsReader != null) { + // We can bulk-copy because the fieldInfos are "congruent" + for (int docNum = 0; docNum < maxDoc;) { + if (reader.isDeleted(docNum)) { + // skip deleted docs + ++docNum; + continue; + } + // We can optimize this case (doing a bulk byte copy) since the field + // numbers are identical + int start = docNum, numDocs = 0; + do { + docNum++; + numDocs++; + if (docNum >= maxDoc) break; + if (reader.isDeleted(docNum)) { + docNum++; + break; + } + } while(numDocs < MAX_RAW_MERGE_DOCS); + + matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs); + termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); + checkAbort.work(300 * numDocs); + } + } else { + for (int docNum = 0; docNum < maxDoc; docNum++) { + if (reader.isDeleted(docNum)) { + // skip deleted docs + continue; + } + + // NOTE: it's very important to first assign to vectors then pass it to + // termVectorsWriter.addAllDocVectors; see LUCENE-1282 + TermFreqVector[] vectors = reader.getTermFreqVectors(docNum); + termVectorsWriter.addAllDocVectors(vectors); + checkAbort.work(300); + } + } + } + + private void copyVectorsNoDeletions(final TermVectorsWriter termVectorsWriter, + final TermVectorsReader matchingVectorsReader, + final IndexReader reader) + throws IOException, MergeAbortedException { + final int maxDoc = reader.maxDoc(); + if (matchingVectorsReader != null) { + // We can bulk-copy because the fieldInfos are "congruent" + int docCount = 0; + while (docCount < maxDoc) { + int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); + matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, docCount, len); + termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); + docCount += len; + checkAbort.work(300 * len); + } + } else { + for (int docNum = 0; docNum < maxDoc; docNum++) { + // NOTE: it's very important to first assign to vectors then pass it to + // termVectorsWriter.addAllDocVectors; see LUCENE-1282 + TermFreqVector[] vectors = reader.getTermFreqVectors(docNum); + termVectorsWriter.addAllDocVectors(vectors); + checkAbort.work(300); + } + } + } + private SegmentMergeQueue queue = null; private final void mergeTerms() throws CorruptIndexException, IOException { @@ -519,7 +613,7 @@ final class SegmentMerger { assert reader.numDocs() == reader.maxDoc() - smi.delCount; if (smi.next()) - queue.put(smi); // initialize queue + queue.add(smi); // initialize queue else smi.close(); } @@ -551,13 +645,12 @@ final class SegmentMerger { int df = appendPostings(termsConsumer, match, matchSize); // add new TermInfo - if (checkAbort != null) - checkAbort.work(df/3.0); + checkAbort.work(df/3.0); while (matchSize > 0) { SegmentMergeInfo smi = match[--matchSize]; if (smi.next()) - queue.put(smi); // restore queue + queue.add(smi); // restore queue else smi.close(); // done with a segment } @@ -631,15 +724,16 @@ final class SegmentMerger { byte[] normBuffer = null; IndexOutput output = null; try { - for (int i = 0; i < fieldInfos.size(); i++) { + int numFieldInfos = fieldInfos.size(); + for (int i = 0; i < numFieldInfos; i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { if (output == null) { output = directory.createOutput(segment + "." + IndexFileNames.NORMS_EXTENSION); output.writeBytes(NORMS_HEADER,NORMS_HEADER.length); } - for (int j = 0; j < readers.size(); j++) { - IndexReader reader = (IndexReader) readers.get(j); + for (Iterator iter = readers.iterator(); iter.hasNext();) { + IndexReader reader = (IndexReader) iter.next(); int maxDoc = reader.maxDoc(); if (normBuffer == null || normBuffer.length < maxDoc) { // the buffer is too small for the current segment @@ -658,8 +752,7 @@ final class SegmentMerger { } } } - if (checkAbort != null) - checkAbort.work(maxDoc); + checkAbort.work(maxDoc); } } } @@ -670,7 +763,7 @@ final class SegmentMerger { } } - final static class CheckAbort { + static class CheckAbort { private double workCount; private MergePolicy.OneMerge merge; private Directory dir; @@ -695,4 +788,5 @@ final class SegmentMerger { } } } + } diff --git a/src/java/org/apache/lucene/index/SegmentReader.java b/src/java/org/apache/lucene/index/SegmentReader.java index 57cc080a991..49ac0fea333 100644 --- a/src/java/org/apache/lucene/index/SegmentReader.java +++ b/src/java/org/apache/lucene/index/SegmentReader.java @@ -849,15 +849,8 @@ class SegmentReader extends IndexReader implements Cloneable { return fieldInfos; } - /** - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { ensureOpen(); - if (isDeleted(n)) - throw new IllegalArgumentException - ("attempt to access a deleted document"); return getFieldsReader().doc(n, fieldSelector); } diff --git a/src/test/org/apache/lucene/index/TestSegmentReader.java b/src/test/org/apache/lucene/index/TestSegmentReader.java index 1bbe37879da..515a4e4b926 100644 --- a/src/test/org/apache/lucene/index/TestSegmentReader.java +++ b/src/test/org/apache/lucene/index/TestSegmentReader.java @@ -81,12 +81,6 @@ public class TestSegmentReader extends LuceneTestCase { assertTrue(deleteReader.isDeleted(0) == true); assertTrue(deleteReader.hasDeletions() == true); assertTrue(deleteReader.numDocs() == 0); - try { - deleteReader.document(0); - fail(); - } catch (IllegalArgumentException e) { - // expcected exception - } } public void testGetFieldNameVariations() {