diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java index 1e3112e49c3..255a5099073 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java @@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.appending; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.DocValuesFormat; import org.apache.lucene.index.codecs.FieldInfosFormat; +import org.apache.lucene.index.codecs.NormsFormat; import org.apache.lucene.index.codecs.StoredFieldsFormat; import org.apache.lucene.index.codecs.PostingsFormat; import org.apache.lucene.index.codecs.SegmentInfosFormat; @@ -27,6 +28,7 @@ import org.apache.lucene.index.codecs.TermVectorsFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40Codec; import org.apache.lucene.index.codecs.lucene40.Lucene40FieldInfosFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40DocValuesFormat; +import org.apache.lucene.index.codecs.lucene40.Lucene40NormsFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40TermVectorsFormat; @@ -47,6 +49,7 @@ public class AppendingCodec extends Codec { private final FieldInfosFormat fieldInfos = new Lucene40FieldInfosFormat(); private final TermVectorsFormat vectors = new Lucene40TermVectorsFormat(); private final DocValuesFormat docValues = new Lucene40DocValuesFormat(); + private final NormsFormat norms = new Lucene40NormsFormat(); @Override public PostingsFormat postingsFormat() { @@ -77,4 +80,9 @@ public class AppendingCodec extends Codec { public FieldInfosFormat fieldInfosFormat() { return fieldInfos; } + + @Override + public NormsFormat normsFormat() { + return norms; + } } diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java index ea370d4fc93..afb6f43da12 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -52,7 +52,7 @@ import org.apache.lucene.util.InfoStream; * are processing the document). * * Other consumers, eg {@link FreqProxTermsWriter} and - * {@link NormsWriter}, buffer bytes in RAM and flush only + * {@link NormsConsumer}, buffer bytes in RAM and flush only * when a new segment is produced. * Once we have used our allowed RAM buffer, or the number diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index 15a3b848230..7a577bd407e 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -79,7 +79,7 @@ public class DocumentsWriterPerThread { final InvertedDocConsumer termsHash = new TermsHash(documentsWriterPerThread, freqProxWriter, true, new TermsHash(documentsWriterPerThread, termVectorsWriter, false, null)); - final NormsWriter normsWriter = new NormsWriter(); + final NormsConsumer normsWriter = new NormsConsumer(documentsWriterPerThread); final DocInverter docInverter = new DocInverter(documentsWriterPerThread.docState, termsHash, normsWriter); return new DocFieldProcessor(documentsWriterPerThread, docInverter); } diff --git a/lucene/src/java/org/apache/lucene/index/NormsWriter.java b/lucene/src/java/org/apache/lucene/index/NormsConsumer.java similarity index 71% rename from lucene/src/java/org/apache/lucene/index/NormsWriter.java rename to lucene/src/java/org/apache/lucene/index/NormsConsumer.java index 21a2a2511ad..7797eb527c6 100644 --- a/lucene/src/java/org/apache/lucene/index/NormsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/NormsConsumer.java @@ -21,8 +21,8 @@ import java.io.IOException; import java.util.Collection; import java.util.Map; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.IOContext.Context; +import org.apache.lucene.index.codecs.NormsFormat; +import org.apache.lucene.index.codecs.NormsWriter; import org.apache.lucene.util.IOUtils; // TODO FI: norms could actually be stored as doc store @@ -32,8 +32,12 @@ import org.apache.lucene.util.IOUtils; * merges all of these together into a single _X.nrm file. */ -final class NormsWriter extends InvertedDocEndConsumer { - +final class NormsConsumer extends InvertedDocEndConsumer { + final NormsFormat normsFormat; + + public NormsConsumer(DocumentsWriterPerThread dwpt) { + normsFormat = dwpt.codec.normsFormat(); + } @Override public void abort() {} @@ -49,27 +53,23 @@ final class NormsWriter extends InvertedDocEndConsumer { return; } - final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION); - IndexOutput normsOut = state.directory.createOutput(normsFileName, state.context); + NormsWriter normsOut = null; boolean success = false; try { - normsOut.writeBytes(SegmentNorms.NORMS_HEADER, 0, SegmentNorms.NORMS_HEADER.length); - - int normCount = 0; + normsOut = normsFormat.normsWriter(state); for (FieldInfo fi : state.fieldInfos) { - final NormsWriterPerField toWrite = (NormsWriterPerField) fieldsToFlush.get(fi); + final NormsConsumerPerField toWrite = (NormsConsumerPerField) fieldsToFlush.get(fi); int upto = 0; - if (toWrite != null && toWrite.upto > 0) { - normCount++; - + if (!fi.omitNorms && toWrite != null && toWrite.upto > 0) { + normsOut.startField(fi); int docID = 0; for (; docID < state.numDocs; docID++) { if (upto < toWrite.upto && toWrite.docIDs[upto] == docID) { - normsOut.writeByte(toWrite.norms[upto]); + normsOut.writeNorm(toWrite.norms[upto]); upto++; } else { - normsOut.writeByte((byte) 0); + normsOut.writeNorm((byte) 0); } } @@ -78,14 +78,13 @@ final class NormsWriter extends InvertedDocEndConsumer { toWrite.reset(); } else if (fi.isIndexed && !fi.omitNorms) { - normCount++; // Fill entire field with default norm: + normsOut.startField(fi); for(;upto files) throws IOException; +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/NormsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/NormsWriter.java new file mode 100644 index 00000000000..2be8db57a70 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/NormsWriter.java @@ -0,0 +1,34 @@ +package org.apache.lucene.index.codecs; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.io.Closeable; +import java.io.IOException; + +import org.apache.lucene.index.FieldInfo; + +// simple api just for now before switching to docvalues apis +public abstract class NormsWriter implements Closeable { + + // TODO: I think IW should set info.normValueType from Similarity, + // and then this method just returns DocValuesConsumer + public abstract void startField(FieldInfo info) throws IOException; + public abstract void writeNorm(byte norm) throws IOException; + public abstract void finish(int numDocs) throws IOException; + +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xCodec.java index 34ea9a6a62b..ab74a789676 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xCodec.java @@ -26,6 +26,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.DocValuesFormat; import org.apache.lucene.index.codecs.FieldInfosFormat; +import org.apache.lucene.index.codecs.NormsFormat; import org.apache.lucene.index.codecs.StoredFieldsFormat; import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.PerDocValues; @@ -33,6 +34,7 @@ import org.apache.lucene.index.codecs.PostingsFormat; import org.apache.lucene.index.codecs.SegmentInfosFormat; import org.apache.lucene.index.codecs.TermVectorsFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40FieldInfosFormat; +import org.apache.lucene.index.codecs.lucene40.Lucene40NormsFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40SegmentInfosFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40TermVectorsFormat; @@ -62,6 +64,9 @@ public class Lucene3xCodec extends Codec { // this way IR.commit fails on delete/undelete/setNorm/etc ? private final SegmentInfosFormat infosFormat = new Lucene40SegmentInfosFormat(); + // TODO: this should really be a different impl + private final NormsFormat normsFormat = new Lucene40NormsFormat(); + // 3.x doesn't support docvalues private final DocValuesFormat docValuesFormat = new DocValuesFormat() { @Override @@ -107,4 +112,9 @@ public class Lucene3xCodec extends Codec { public SegmentInfosFormat segmentInfosFormat() { return infosFormat; } + + @Override + public NormsFormat normsFormat() { + return normsFormat; + } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40Codec.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40Codec.java index 890bf7b800f..e75e9c586a8 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40Codec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40Codec.java @@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.lucene40; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.DocValuesFormat; import org.apache.lucene.index.codecs.FieldInfosFormat; +import org.apache.lucene.index.codecs.NormsFormat; import org.apache.lucene.index.codecs.StoredFieldsFormat; import org.apache.lucene.index.codecs.PostingsFormat; import org.apache.lucene.index.codecs.SegmentInfosFormat; @@ -40,6 +41,7 @@ public class Lucene40Codec extends Codec { private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat(); private final DocValuesFormat docValuesFormat = new Lucene40DocValuesFormat(); private final SegmentInfosFormat infosFormat = new Lucene40SegmentInfosFormat(); + private final NormsFormat normsFormat = new Lucene40NormsFormat(); private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { @Override public PostingsFormat getPostingsFormatForField(String field) { @@ -81,6 +83,11 @@ public class Lucene40Codec extends Codec { return infosFormat; } + @Override + public NormsFormat normsFormat() { + return normsFormat; + } + /** Returns the postings format that should be used for writing * new segments of field. * diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsFormat.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsFormat.java new file mode 100644 index 00000000000..f718b19569f --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsFormat.java @@ -0,0 +1,41 @@ +package org.apache.lucene.index.codecs.lucene40; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.codecs.NormsFormat; +import org.apache.lucene.index.codecs.NormsWriter; +import org.apache.lucene.store.Directory; + +public class Lucene40NormsFormat extends NormsFormat { + + @Override + public NormsWriter normsWriter(SegmentWriteState state) throws IOException { + return new Lucene40NormsWriter(state.directory, state.segmentName, state.context); + } + + @Override + public void files(Directory dir, SegmentInfo info, Set files) throws IOException { + // nocommit: hairy calculations involving .s files, etc + } + +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsWriter.java new file mode 100644 index 00000000000..b117550fe9e --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsWriter.java @@ -0,0 +1,78 @@ +package org.apache.lucene.index.codecs.lucene40; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.codecs.NormsWriter; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.IOUtils; + +public class Lucene40NormsWriter extends NormsWriter { + private IndexOutput out; + private int normCount = 0; + + /** norms header placeholder */ + // nocommit: not public + public static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; + + public Lucene40NormsWriter(Directory directory, String segment, IOContext context) throws IOException { + final String normsFileName = IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION); + boolean success = false; + try { + out = directory.createOutput(normsFileName, context); + out.writeBytes(NORMS_HEADER, 0, NORMS_HEADER.length); + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(out); + } + } + } + + @Override + public void startField(FieldInfo info) throws IOException { + assert info.omitNorms == false; + normCount++; + } + + @Override + public void writeNorm(byte norm) throws IOException { + out.writeByte(norm); + } + + @Override + public void finish(int numDocs) throws IOException { + if (4+normCount*numDocs != out.getFilePointer()) { + throw new RuntimeException(".nrm file size mismatch: expected=" + (4+normCount*numDocs) + " actual=" + out.getFilePointer()); + } + } + + @Override + public void close() throws IOException { + try { + IOUtils.close(out); + } finally { + out = null; + } + } +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java index 04bcf4c40d2..f2d2cbfa2ad 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextCodec.java @@ -20,11 +20,13 @@ package org.apache.lucene.index.codecs.simpletext; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.DocValuesFormat; import org.apache.lucene.index.codecs.FieldInfosFormat; +import org.apache.lucene.index.codecs.NormsFormat; import org.apache.lucene.index.codecs.PostingsFormat; import org.apache.lucene.index.codecs.SegmentInfosFormat; import org.apache.lucene.index.codecs.StoredFieldsFormat; import org.apache.lucene.index.codecs.TermVectorsFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40DocValuesFormat; +import org.apache.lucene.index.codecs.lucene40.Lucene40NormsFormat; /** * plain text index format. @@ -40,6 +42,8 @@ public final class SimpleTextCodec extends Codec { private final TermVectorsFormat vectorsFormat = new SimpleTextTermVectorsFormat(); // TODO: need a plain-text impl private final DocValuesFormat docValues = new Lucene40DocValuesFormat(); + // TODO: need a plain-text impl (using the above) + private final NormsFormat normsFormat = new Lucene40NormsFormat(); public SimpleTextCodec() { super("SimpleText"); @@ -74,4 +78,9 @@ public final class SimpleTextCodec extends Codec { public SegmentInfosFormat segmentInfosFormat() { return segmentInfos; } + + @Override + public NormsFormat normsFormat() { + return normsFormat; + } } diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java index db20c3bb9a0..01c9f34573e 100755 --- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -33,6 +33,7 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.DocValuesFormat; import org.apache.lucene.index.codecs.FieldInfosFormat; +import org.apache.lucene.index.codecs.NormsFormat; import org.apache.lucene.index.codecs.StoredFieldsFormat; import org.apache.lucene.index.codecs.PostingsFormat; import org.apache.lucene.index.codecs.SegmentInfosFormat; @@ -40,6 +41,7 @@ import org.apache.lucene.index.codecs.TermVectorsFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40Codec; import org.apache.lucene.index.codecs.lucene40.Lucene40FieldInfosFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40DocValuesFormat; +import org.apache.lucene.index.codecs.lucene40.Lucene40NormsFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40SegmentInfosFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40StoredFieldsFormat; import org.apache.lucene.index.codecs.lucene40.Lucene40TermVectorsFormat; @@ -1122,6 +1124,11 @@ public class TestAddIndexes extends LuceneTestCase { public SegmentInfosFormat segmentInfosFormat() { return new Lucene40SegmentInfosFormat(); } + + @Override + public NormsFormat normsFormat() { + return new Lucene40NormsFormat(); + } } /*