diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index 8e12ef5e864..4a962b6e289 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -407,7 +407,7 @@ public final class FieldInfos implements Iterable { public boolean hasNorms() { for (FieldInfo fi : this) { - if (!fi.omitNorms) { + if (fi.isIndexed && !fi.omitNorms) { return true; } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 0940d8e63eb..cfb7301ded9 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -19,7 +19,6 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -28,13 +27,12 @@ import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.FieldInfosWriter; import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.NormsWriter; import org.apache.lucene.index.codecs.StoredFieldsWriter; import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.TermVectorsWriter; -import org.apache.lucene.index.codecs.lucene40.Lucene40NormsWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; @@ -122,7 +120,11 @@ final class SegmentMerger { final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, segment, mergeState.fieldInfos, mergeState.mergedDocCount, termIndexInterval, codec, null, context); mergeTerms(segmentWriteState); mergePerDoc(segmentWriteState); - mergeNorms(); + + if (mergeState.fieldInfos.hasNorms()) { + int numMerged = mergeNorms(segmentWriteState); + assert numMerged == mergeState.mergedDocCount; + } if (mergeState.fieldInfos.hasVectors()) { int numMerged = mergeVectors(); @@ -325,49 +327,19 @@ final class SegmentMerger { } } - // TODO: implement merge in normsformat instead. - private void mergeNorms() throws IOException { - IndexOutput output = null; + private int mergeNorms(SegmentWriteState segmentWriteState) throws IOException { + final NormsWriter writer = codec.normsFormat().normsWriter(segmentWriteState); + boolean success = false; try { - for (FieldInfo fi : mergeState.fieldInfos) { - if (fi.isIndexed && !fi.omitNorms) { - if (output == null) { - output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION), context); - output.writeBytes(Lucene40NormsWriter.NORMS_HEADER, Lucene40NormsWriter.NORMS_HEADER.length); - } - for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) { - final int maxDoc = reader.reader.maxDoc(); - byte normBuffer[] = reader.reader.norms(fi.name); - if (normBuffer == null) { - // Can be null if this segment doesn't have - // any docs with this field - normBuffer = new byte[maxDoc]; - Arrays.fill(normBuffer, (byte)0); - } - if (reader.liveDocs == null) { - //optimized case for segments without deleted docs - output.writeBytes(normBuffer, maxDoc); - } else { - // this segment has deleted docs, so we have to - // check for every doc if it is deleted or not - final Bits liveDocs = reader.liveDocs; - for (int k = 0; k < maxDoc; k++) { - if (liveDocs.get(k)) { - output.writeByte(normBuffer[k]); - } - } - } - mergeState.checkAbort.work(maxDoc); - } - } - } + int numMerged = writer.merge(mergeState); success = true; + return numMerged; } finally { if (success) { - IOUtils.close(output); + IOUtils.close(writer); } else { - IOUtils.closeWhileHandlingException(output); + IOUtils.closeWhileHandlingException(writer); } } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/NormsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/NormsWriter.java index 2be8db57a70..aa1e5b55dfb 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/NormsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/NormsWriter.java @@ -19,8 +19,11 @@ package org.apache.lucene.index.codecs; import java.io.Closeable; import java.io.IOException; +import java.util.Arrays; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.MergeState; +import org.apache.lucene.util.Bits; // simple api just for now before switching to docvalues apis public abstract class NormsWriter implements Closeable { @@ -31,4 +34,37 @@ public abstract class NormsWriter implements Closeable { public abstract void writeNorm(byte norm) throws IOException; public abstract void finish(int numDocs) throws IOException; + public int merge(MergeState mergeState) throws IOException { + int numMergedDocs = 0; + for (FieldInfo fi : mergeState.fieldInfos) { + if (fi.isIndexed && !fi.omitNorms) { + startField(fi); + int numMergedDocsForField = 0; + for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) { + final int maxDoc = reader.reader.maxDoc(); + byte normBuffer[] = reader.reader.norms(fi.name); + if (normBuffer == null) { + // Can be null if this segment doesn't have + // any docs with this field + normBuffer = new byte[maxDoc]; + Arrays.fill(normBuffer, (byte)0); + } + // this segment has deleted docs, so we have to + // check for every doc if it is deleted or not + final Bits liveDocs = reader.liveDocs; + for (int k = 0; k < maxDoc; k++) { + if (liveDocs == null || liveDocs.get(k)) { + writeNorm(normBuffer[k]); + numMergedDocsForField++; + } + } + mergeState.checkAbort.work(maxDoc); + } + assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField; + numMergedDocs = numMergedDocsForField; + } + } + finish(numMergedDocs); + return numMergedDocs; + } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsWriter.java index ac693badbab..576b01626fc 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsWriter.java @@ -18,13 +18,16 @@ package org.apache.lucene.index.codecs.lucene40; */ import java.io.IOException; +import java.util.Arrays; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.MergeState; import org.apache.lucene.index.codecs.NormsWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.IOUtils; public class Lucene40NormsWriter extends NormsWriter { @@ -66,6 +69,48 @@ public class Lucene40NormsWriter extends NormsWriter { throw new RuntimeException(".nrm file size mismatch: expected=" + (4+normCount*(long)numDocs) + " actual=" + out.getFilePointer()); } } + + /** we override merge and bulk-merge norms when there are no deletions */ + @Override + public int merge(MergeState mergeState) throws IOException { + int numMergedDocs = 0; + for (FieldInfo fi : mergeState.fieldInfos) { + if (fi.isIndexed && !fi.omitNorms) { + startField(fi); + int numMergedDocsForField = 0; + for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) { + final int maxDoc = reader.reader.maxDoc(); + byte normBuffer[] = reader.reader.norms(fi.name); + if (normBuffer == null) { + // Can be null if this segment doesn't have + // any docs with this field + normBuffer = new byte[maxDoc]; + Arrays.fill(normBuffer, (byte)0); + } + if (reader.liveDocs == null) { + //optimized case for segments without deleted docs + out.writeBytes(normBuffer, maxDoc); + numMergedDocsForField += maxDoc; + } else { + // this segment has deleted docs, so we have to + // check for every doc if it is deleted or not + final Bits liveDocs = reader.liveDocs; + for (int k = 0; k < maxDoc; k++) { + if (liveDocs.get(k)) { + numMergedDocsForField++; + out.writeByte(normBuffer[k]); + } + } + } + mergeState.checkAbort.work(maxDoc); + } + assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField; + numMergedDocs = numMergedDocsForField; + } + } + finish(numMergedDocs); + return numMergedDocs; + } @Override public void close() throws IOException {