LUCENE-3606: move norms merging to codec

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3606@1210295 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-12-05 00:27:50 +00:00
parent f678b69ccf
commit a81ef9a3d3
4 changed files with 95 additions and 42 deletions

View File

@ -407,7 +407,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
public boolean hasNorms() {
for (FieldInfo fi : this) {
if (!fi.omitNorms) {
if (fi.isIndexed && !fi.omitNorms) {
return true;
}
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
@ -28,13 +27,12 @@ import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.FieldInfosWriter;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.NormsWriter;
import org.apache.lucene.index.codecs.StoredFieldsWriter;
import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.codecs.TermVectorsWriter;
import org.apache.lucene.index.codecs.lucene40.Lucene40NormsWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
@ -122,7 +120,11 @@ final class SegmentMerger {
final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, segment, mergeState.fieldInfos, mergeState.mergedDocCount, termIndexInterval, codec, null, context);
mergeTerms(segmentWriteState);
mergePerDoc(segmentWriteState);
mergeNorms();
if (mergeState.fieldInfos.hasNorms()) {
int numMerged = mergeNorms(segmentWriteState);
assert numMerged == mergeState.mergedDocCount;
}
if (mergeState.fieldInfos.hasVectors()) {
int numMerged = mergeVectors();
@ -325,49 +327,19 @@ final class SegmentMerger {
}
}
// TODO: implement merge in normsformat instead.
private void mergeNorms() throws IOException {
IndexOutput output = null;
private int mergeNorms(SegmentWriteState segmentWriteState) throws IOException {
final NormsWriter writer = codec.normsFormat().normsWriter(segmentWriteState);
boolean success = false;
try {
for (FieldInfo fi : mergeState.fieldInfos) {
if (fi.isIndexed && !fi.omitNorms) {
if (output == null) {
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION), context);
output.writeBytes(Lucene40NormsWriter.NORMS_HEADER, Lucene40NormsWriter.NORMS_HEADER.length);
}
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
final int maxDoc = reader.reader.maxDoc();
byte normBuffer[] = reader.reader.norms(fi.name);
if (normBuffer == null) {
// Can be null if this segment doesn't have
// any docs with this field
normBuffer = new byte[maxDoc];
Arrays.fill(normBuffer, (byte)0);
}
if (reader.liveDocs == null) {
//optimized case for segments without deleted docs
output.writeBytes(normBuffer, maxDoc);
} else {
// this segment has deleted docs, so we have to
// check for every doc if it is deleted or not
final Bits liveDocs = reader.liveDocs;
for (int k = 0; k < maxDoc; k++) {
if (liveDocs.get(k)) {
output.writeByte(normBuffer[k]);
}
}
}
mergeState.checkAbort.work(maxDoc);
}
}
}
int numMerged = writer.merge(mergeState);
success = true;
return numMerged;
} finally {
if (success) {
IOUtils.close(output);
IOUtils.close(writer);
} else {
IOUtils.closeWhileHandlingException(output);
IOUtils.closeWhileHandlingException(writer);
}
}
}

View File

@ -19,8 +19,11 @@ package org.apache.lucene.index.codecs;
import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.util.Bits;
// simple api just for now before switching to docvalues apis
public abstract class NormsWriter implements Closeable {
@ -31,4 +34,37 @@ public abstract class NormsWriter implements Closeable {
public abstract void writeNorm(byte norm) throws IOException;
public abstract void finish(int numDocs) throws IOException;
public int merge(MergeState mergeState) throws IOException {
int numMergedDocs = 0;
for (FieldInfo fi : mergeState.fieldInfos) {
if (fi.isIndexed && !fi.omitNorms) {
startField(fi);
int numMergedDocsForField = 0;
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
final int maxDoc = reader.reader.maxDoc();
byte normBuffer[] = reader.reader.norms(fi.name);
if (normBuffer == null) {
// Can be null if this segment doesn't have
// any docs with this field
normBuffer = new byte[maxDoc];
Arrays.fill(normBuffer, (byte)0);
}
// this segment has deleted docs, so we have to
// check for every doc if it is deleted or not
final Bits liveDocs = reader.liveDocs;
for (int k = 0; k < maxDoc; k++) {
if (liveDocs == null || liveDocs.get(k)) {
writeNorm(normBuffer[k]);
numMergedDocsForField++;
}
}
mergeState.checkAbort.work(maxDoc);
}
assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField;
numMergedDocs = numMergedDocsForField;
}
}
finish(numMergedDocs);
return numMergedDocs;
}
}

View File

@ -18,13 +18,16 @@ package org.apache.lucene.index.codecs.lucene40;
*/
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.codecs.NormsWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
public class Lucene40NormsWriter extends NormsWriter {
@ -66,6 +69,48 @@ public class Lucene40NormsWriter extends NormsWriter {
throw new RuntimeException(".nrm file size mismatch: expected=" + (4+normCount*(long)numDocs) + " actual=" + out.getFilePointer());
}
}
/** we override merge and bulk-merge norms when there are no deletions */
@Override
public int merge(MergeState mergeState) throws IOException {
int numMergedDocs = 0;
for (FieldInfo fi : mergeState.fieldInfos) {
if (fi.isIndexed && !fi.omitNorms) {
startField(fi);
int numMergedDocsForField = 0;
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
final int maxDoc = reader.reader.maxDoc();
byte normBuffer[] = reader.reader.norms(fi.name);
if (normBuffer == null) {
// Can be null if this segment doesn't have
// any docs with this field
normBuffer = new byte[maxDoc];
Arrays.fill(normBuffer, (byte)0);
}
if (reader.liveDocs == null) {
//optimized case for segments without deleted docs
out.writeBytes(normBuffer, maxDoc);
numMergedDocsForField += maxDoc;
} else {
// this segment has deleted docs, so we have to
// check for every doc if it is deleted or not
final Bits liveDocs = reader.liveDocs;
for (int k = 0; k < maxDoc; k++) {
if (liveDocs.get(k)) {
numMergedDocsForField++;
out.writeByte(normBuffer[k]);
}
}
}
mergeState.checkAbort.work(maxDoc);
}
assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField;
numMergedDocs = numMergedDocsForField;
}
}
finish(numMergedDocs);
return numMergedDocs;
}
@Override
public void close() throws IOException {