mirror of https://github.com/apache/lucene.git
LUCENE-3606: move norms merging to codec
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3606@1210295 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f678b69ccf
commit
a81ef9a3d3
|
@ -407,7 +407,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
|
|
||||||
public boolean hasNorms() {
|
public boolean hasNorms() {
|
||||||
for (FieldInfo fi : this) {
|
for (FieldInfo fi : this) {
|
||||||
if (!fi.omitNorms) {
|
if (fi.isIndexed && !fi.omitNorms) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -28,13 +27,12 @@ import org.apache.lucene.index.IndexReader.FieldOption;
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
import org.apache.lucene.index.codecs.FieldInfosWriter;
|
import org.apache.lucene.index.codecs.FieldInfosWriter;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.NormsWriter;
|
||||||
import org.apache.lucene.index.codecs.StoredFieldsWriter;
|
import org.apache.lucene.index.codecs.StoredFieldsWriter;
|
||||||
import org.apache.lucene.index.codecs.PerDocConsumer;
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
import org.apache.lucene.index.codecs.TermVectorsWriter;
|
import org.apache.lucene.index.codecs.TermVectorsWriter;
|
||||||
import org.apache.lucene.index.codecs.lucene40.Lucene40NormsWriter;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
|
@ -122,7 +120,11 @@ final class SegmentMerger {
|
||||||
final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, segment, mergeState.fieldInfos, mergeState.mergedDocCount, termIndexInterval, codec, null, context);
|
final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, segment, mergeState.fieldInfos, mergeState.mergedDocCount, termIndexInterval, codec, null, context);
|
||||||
mergeTerms(segmentWriteState);
|
mergeTerms(segmentWriteState);
|
||||||
mergePerDoc(segmentWriteState);
|
mergePerDoc(segmentWriteState);
|
||||||
mergeNorms();
|
|
||||||
|
if (mergeState.fieldInfos.hasNorms()) {
|
||||||
|
int numMerged = mergeNorms(segmentWriteState);
|
||||||
|
assert numMerged == mergeState.mergedDocCount;
|
||||||
|
}
|
||||||
|
|
||||||
if (mergeState.fieldInfos.hasVectors()) {
|
if (mergeState.fieldInfos.hasVectors()) {
|
||||||
int numMerged = mergeVectors();
|
int numMerged = mergeVectors();
|
||||||
|
@ -325,49 +327,19 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: implement merge in normsformat instead.
|
private int mergeNorms(SegmentWriteState segmentWriteState) throws IOException {
|
||||||
private void mergeNorms() throws IOException {
|
final NormsWriter writer = codec.normsFormat().normsWriter(segmentWriteState);
|
||||||
IndexOutput output = null;
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
for (FieldInfo fi : mergeState.fieldInfos) {
|
int numMerged = writer.merge(mergeState);
|
||||||
if (fi.isIndexed && !fi.omitNorms) {
|
|
||||||
if (output == null) {
|
|
||||||
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION), context);
|
|
||||||
output.writeBytes(Lucene40NormsWriter.NORMS_HEADER, Lucene40NormsWriter.NORMS_HEADER.length);
|
|
||||||
}
|
|
||||||
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
|
|
||||||
final int maxDoc = reader.reader.maxDoc();
|
|
||||||
byte normBuffer[] = reader.reader.norms(fi.name);
|
|
||||||
if (normBuffer == null) {
|
|
||||||
// Can be null if this segment doesn't have
|
|
||||||
// any docs with this field
|
|
||||||
normBuffer = new byte[maxDoc];
|
|
||||||
Arrays.fill(normBuffer, (byte)0);
|
|
||||||
}
|
|
||||||
if (reader.liveDocs == null) {
|
|
||||||
//optimized case for segments without deleted docs
|
|
||||||
output.writeBytes(normBuffer, maxDoc);
|
|
||||||
} else {
|
|
||||||
// this segment has deleted docs, so we have to
|
|
||||||
// check for every doc if it is deleted or not
|
|
||||||
final Bits liveDocs = reader.liveDocs;
|
|
||||||
for (int k = 0; k < maxDoc; k++) {
|
|
||||||
if (liveDocs.get(k)) {
|
|
||||||
output.writeByte(normBuffer[k]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mergeState.checkAbort.work(maxDoc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
success = true;
|
success = true;
|
||||||
|
return numMerged;
|
||||||
} finally {
|
} finally {
|
||||||
if (success) {
|
if (success) {
|
||||||
IOUtils.close(output);
|
IOUtils.close(writer);
|
||||||
} else {
|
} else {
|
||||||
IOUtils.closeWhileHandlingException(output);
|
IOUtils.closeWhileHandlingException(writer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,8 +19,11 @@ package org.apache.lucene.index.codecs;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.MergeState;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
// simple api just for now before switching to docvalues apis
|
// simple api just for now before switching to docvalues apis
|
||||||
public abstract class NormsWriter implements Closeable {
|
public abstract class NormsWriter implements Closeable {
|
||||||
|
@ -31,4 +34,37 @@ public abstract class NormsWriter implements Closeable {
|
||||||
public abstract void writeNorm(byte norm) throws IOException;
|
public abstract void writeNorm(byte norm) throws IOException;
|
||||||
public abstract void finish(int numDocs) throws IOException;
|
public abstract void finish(int numDocs) throws IOException;
|
||||||
|
|
||||||
|
public int merge(MergeState mergeState) throws IOException {
|
||||||
|
int numMergedDocs = 0;
|
||||||
|
for (FieldInfo fi : mergeState.fieldInfos) {
|
||||||
|
if (fi.isIndexed && !fi.omitNorms) {
|
||||||
|
startField(fi);
|
||||||
|
int numMergedDocsForField = 0;
|
||||||
|
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
|
||||||
|
final int maxDoc = reader.reader.maxDoc();
|
||||||
|
byte normBuffer[] = reader.reader.norms(fi.name);
|
||||||
|
if (normBuffer == null) {
|
||||||
|
// Can be null if this segment doesn't have
|
||||||
|
// any docs with this field
|
||||||
|
normBuffer = new byte[maxDoc];
|
||||||
|
Arrays.fill(normBuffer, (byte)0);
|
||||||
|
}
|
||||||
|
// this segment has deleted docs, so we have to
|
||||||
|
// check for every doc if it is deleted or not
|
||||||
|
final Bits liveDocs = reader.liveDocs;
|
||||||
|
for (int k = 0; k < maxDoc; k++) {
|
||||||
|
if (liveDocs == null || liveDocs.get(k)) {
|
||||||
|
writeNorm(normBuffer[k]);
|
||||||
|
numMergedDocsForField++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mergeState.checkAbort.work(maxDoc);
|
||||||
|
}
|
||||||
|
assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField;
|
||||||
|
numMergedDocs = numMergedDocsForField;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finish(numMergedDocs);
|
||||||
|
return numMergedDocs;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,13 +18,16 @@ package org.apache.lucene.index.codecs.lucene40;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.MergeState;
|
||||||
import org.apache.lucene.index.codecs.NormsWriter;
|
import org.apache.lucene.index.codecs.NormsWriter;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
public class Lucene40NormsWriter extends NormsWriter {
|
public class Lucene40NormsWriter extends NormsWriter {
|
||||||
|
@ -66,6 +69,48 @@ public class Lucene40NormsWriter extends NormsWriter {
|
||||||
throw new RuntimeException(".nrm file size mismatch: expected=" + (4+normCount*(long)numDocs) + " actual=" + out.getFilePointer());
|
throw new RuntimeException(".nrm file size mismatch: expected=" + (4+normCount*(long)numDocs) + " actual=" + out.getFilePointer());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** we override merge and bulk-merge norms when there are no deletions */
|
||||||
|
@Override
|
||||||
|
public int merge(MergeState mergeState) throws IOException {
|
||||||
|
int numMergedDocs = 0;
|
||||||
|
for (FieldInfo fi : mergeState.fieldInfos) {
|
||||||
|
if (fi.isIndexed && !fi.omitNorms) {
|
||||||
|
startField(fi);
|
||||||
|
int numMergedDocsForField = 0;
|
||||||
|
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
|
||||||
|
final int maxDoc = reader.reader.maxDoc();
|
||||||
|
byte normBuffer[] = reader.reader.norms(fi.name);
|
||||||
|
if (normBuffer == null) {
|
||||||
|
// Can be null if this segment doesn't have
|
||||||
|
// any docs with this field
|
||||||
|
normBuffer = new byte[maxDoc];
|
||||||
|
Arrays.fill(normBuffer, (byte)0);
|
||||||
|
}
|
||||||
|
if (reader.liveDocs == null) {
|
||||||
|
//optimized case for segments without deleted docs
|
||||||
|
out.writeBytes(normBuffer, maxDoc);
|
||||||
|
numMergedDocsForField += maxDoc;
|
||||||
|
} else {
|
||||||
|
// this segment has deleted docs, so we have to
|
||||||
|
// check for every doc if it is deleted or not
|
||||||
|
final Bits liveDocs = reader.liveDocs;
|
||||||
|
for (int k = 0; k < maxDoc; k++) {
|
||||||
|
if (liveDocs.get(k)) {
|
||||||
|
numMergedDocsForField++;
|
||||||
|
out.writeByte(normBuffer[k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mergeState.checkAbort.work(maxDoc);
|
||||||
|
}
|
||||||
|
assert numMergedDocs == 0 || numMergedDocs == numMergedDocsForField;
|
||||||
|
numMergedDocs = numMergedDocsForField;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finish(numMergedDocs);
|
||||||
|
return numMergedDocs;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
|
Loading…
Reference in New Issue