mirror of https://github.com/apache/lucene.git
LUCENE-3606: move norms merging to codec
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3606@1210295 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f678b69ccf
commit
a81ef9a3d3
|
@ -407,7 +407,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
|
||||
public boolean hasNorms() {
|
||||
for (FieldInfo fi : this) {
|
||||
if (!fi.omitNorms) {
|
||||
if (fi.isIndexed && !fi.omitNorms) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -28,13 +27,12 @@ import org.apache.lucene.index.IndexReader.FieldOption;
|
|||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.FieldInfosWriter;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.NormsWriter;
|
||||
import org.apache.lucene.index.codecs.StoredFieldsWriter;
|
||||
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||
import org.apache.lucene.index.codecs.TermVectorsWriter;
|
||||
import org.apache.lucene.index.codecs.lucene40.Lucene40NormsWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
|
@ -122,7 +120,11 @@ final class SegmentMerger {
|
|||
final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, segment, mergeState.fieldInfos, mergeState.mergedDocCount, termIndexInterval, codec, null, context);
|
||||
mergeTerms(segmentWriteState);
|
||||
mergePerDoc(segmentWriteState);
|
||||
mergeNorms();
|
||||
|
||||
if (mergeState.fieldInfos.hasNorms()) {
|
||||
int numMerged = mergeNorms(segmentWriteState);
|
||||
assert numMerged == mergeState.mergedDocCount;
|
||||
}
|
||||
|
||||
if (mergeState.fieldInfos.hasVectors()) {
|
||||
int numMerged = mergeVectors();
|
||||
|
@ -325,49 +327,19 @@ final class SegmentMerger {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: implement merge in normsformat instead.
|
||||
private void mergeNorms() throws IOException {
|
||||
IndexOutput output = null;
|
||||
private int mergeNorms(SegmentWriteState segmentWriteState) throws IOException {
|
||||
final NormsWriter writer = codec.normsFormat().normsWriter(segmentWriteState);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
for (FieldInfo fi : mergeState.fieldInfos) {
|
||||
if (fi.isIndexed && !fi.omitNorms) {
|
||||
if (output == null) {
|
||||
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION), context);
|
||||
output.writeBytes(Lucene40NormsWriter.NORMS_HEADER, Lucene40NormsWriter.NORMS_HEADER.length);
|
||||
}
|
||||
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
|
||||
final int maxDoc = reader.reader.maxDoc();
|
||||
byte normBuffer[] = reader.reader.norms(fi.name);
|
||||
if (normBuffer == null) {
|
||||
// Can be null if this segment doesn't have
|
||||
// any docs with this field
|
||||
normBuffer = new byte[maxDoc];
|
||||
Arrays.fill(normBuffer, (byte)0);
|
||||
}
|
||||
if (reader.liveDocs == null) {
|
||||
//optimized case for segments without deleted docs
|
||||
output.writeBytes(normBuffer, maxDoc);
|
||||
} else {
|
||||
// this segment has deleted docs, so we have to
|
||||
// check for every doc if it is deleted or not
|
||||
final Bits liveDocs = reader.liveDocs;
|
||||
for (int k = 0; k < maxDoc; k++) {
|
||||
if (liveDocs.get(k)) {
|
||||
output.writeByte(normBuffer[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
mergeState.checkAbort.work(maxDoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
int numMerged = writer.merge(mergeState);
|
||||
success = true;
|
||||
return numMerged;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(output);
|
||||
IOUtils.close(writer);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(output);
|
||||
IOUtils.closeWhileHandlingException(writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,8 +19,11 @@ package org.apache.lucene.index.codecs;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
// simple api just for now before switching to docvalues apis
|
||||
public abstract class NormsWriter implements Closeable {
|
||||
|
@ -31,4 +34,37 @@ public abstract class NormsWriter implements Closeable {
|
|||
public abstract void writeNorm(byte norm) throws IOException;
|
||||
public abstract void finish(int numDocs) throws IOException;
|
||||
|
||||
public int merge(MergeState mergeState) throws IOException {
|
||||
int numMergedDocs = 0;
|
||||
for (FieldInfo fi : mergeState.fieldInfos) {
|
||||
if (fi.isIndexed && !fi.omitNorms) {
|
||||
startField(fi);
|
||||
int numMergedDocsForField = 0;
|
||||
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
|
||||
final int maxDoc = reader.reader.maxDoc();
|
||||
byte normBuffer[] = reader.reader.norms(fi.name);
|
||||
if (normBuffer == null) {
|
||||
// Can be null if this segment doesn't have
|
||||
// any docs with this field
|
||||
normBuffer = new byte[maxDoc];
|
||||
Arrays.fill(normBuffer, (byte)0);
|
||||
}
|
||||
// this segment has deleted docs, so we have to
|
||||
// check for every doc if it is deleted or not
|
||||
final Bits liveDocs = reader.liveDocs;
|
||||
for (int k = 0; k < maxDoc; k++) {
|
||||
if (liveDocs == null || liveDocs.get(k)) {
|
||||
writeNorm(normBuffer[k]);
|
||||
numMergedDocsForField++;
|
||||
}
|
||||
}
|
||||
mergeState.checkAbort.work(maxDoc);
|
||||
}
|
||||
assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField;
|
||||
numMergedDocs = numMergedDocsForField;
|
||||
}
|
||||
}
|
||||
finish(numMergedDocs);
|
||||
return numMergedDocs;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,13 +18,16 @@ package org.apache.lucene.index.codecs.lucene40;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.codecs.NormsWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
public class Lucene40NormsWriter extends NormsWriter {
|
||||
|
@ -66,6 +69,48 @@ public class Lucene40NormsWriter extends NormsWriter {
|
|||
throw new RuntimeException(".nrm file size mismatch: expected=" + (4+normCount*(long)numDocs) + " actual=" + out.getFilePointer());
|
||||
}
|
||||
}
|
||||
|
||||
/** we override merge and bulk-merge norms when there are no deletions */
|
||||
@Override
|
||||
public int merge(MergeState mergeState) throws IOException {
|
||||
int numMergedDocs = 0;
|
||||
for (FieldInfo fi : mergeState.fieldInfos) {
|
||||
if (fi.isIndexed && !fi.omitNorms) {
|
||||
startField(fi);
|
||||
int numMergedDocsForField = 0;
|
||||
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
|
||||
final int maxDoc = reader.reader.maxDoc();
|
||||
byte normBuffer[] = reader.reader.norms(fi.name);
|
||||
if (normBuffer == null) {
|
||||
// Can be null if this segment doesn't have
|
||||
// any docs with this field
|
||||
normBuffer = new byte[maxDoc];
|
||||
Arrays.fill(normBuffer, (byte)0);
|
||||
}
|
||||
if (reader.liveDocs == null) {
|
||||
//optimized case for segments without deleted docs
|
||||
out.writeBytes(normBuffer, maxDoc);
|
||||
numMergedDocsForField += maxDoc;
|
||||
} else {
|
||||
// this segment has deleted docs, so we have to
|
||||
// check for every doc if it is deleted or not
|
||||
final Bits liveDocs = reader.liveDocs;
|
||||
for (int k = 0; k < maxDoc; k++) {
|
||||
if (liveDocs.get(k)) {
|
||||
numMergedDocsForField++;
|
||||
out.writeByte(normBuffer[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
mergeState.checkAbort.work(maxDoc);
|
||||
}
|
||||
assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField;
|
||||
numMergedDocs = numMergedDocsForField;
|
||||
}
|
||||
}
|
||||
finish(numMergedDocs);
|
||||
return numMergedDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
|
|
Loading…
Reference in New Issue