diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java index 53eb829f208..da08c1c4fc1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java @@ -27,7 +27,6 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; -import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat; import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat; import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat; import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat; @@ -36,7 +35,8 @@ import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; /** - * Implements the Lucene 4.2 index format, with configurable per-field postings formats. + * Implements the Lucene 4.2 index format, with configurable per-field postings + * and docvalues formats. *

* If you want to reuse functionality of this codec in another codec, extend * {@link FilterCodec}. @@ -50,7 +50,7 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; public class Lucene42Codec extends Codec { private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat(); private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat(); - private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat(); + private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat(); private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat(); private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java new file mode 100644 index 00000000000..9a6fed9f348 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java @@ -0,0 +1,121 @@ +package org.apache.lucene.codecs.lucene42; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.FieldInfosReader; +import org.apache.lucene.codecs.FieldInfosWriter; +import org.apache.lucene.index.FieldInfo.DocValuesType; // javadoc +import org.apache.lucene.store.DataOutput; // javadoc + +/** + * Lucene 4.2 Field Infos format. + *

+ *

Field names are stored in the field info file, with suffix .fnm.

+ *

FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber, + * FieldBits,DocValuesBits,Attributes> FieldsCount

+ *

Data types: + *

+ *

+ * Field Descriptions: + * + * + * @lucene.experimental + */ +public class Lucene42FieldInfosFormat extends FieldInfosFormat { + private final FieldInfosReader reader = new Lucene42FieldInfosReader(); + private final FieldInfosWriter writer = new Lucene42FieldInfosWriter(); + + /** Sole constructor. */ + public Lucene42FieldInfosFormat() { + } + + @Override + public FieldInfosReader getFieldInfosReader() throws IOException { + return reader; + } + + @Override + public FieldInfosWriter getFieldInfosWriter() throws IOException { + return writer; + } + + /** Extension of field infos */ + static final String EXTENSION = "fnm"; + + // Codec header + static final String CODEC_NAME = "Lucene42FieldInfos"; + static final int FORMAT_START = 0; + static final int FORMAT_CURRENT = FORMAT_START; + + // Field flags + static final byte IS_INDEXED = 0x1; + static final byte STORE_TERMVECTOR = 0x2; + static final byte STORE_OFFSETS_IN_POSTINGS = 0x4; + static final byte OMIT_NORMS = 0x10; + static final byte STORE_PAYLOADS = 0x20; + static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40; + static final byte OMIT_POSITIONS = -128; +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java new file mode 100644 index 00000000000..96f3dd89e11 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java @@ -0,0 +1,121 @@ +package org.apache.lucene.codecs.lucene42; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collections; +import java.util.Map; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldInfosReader; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.IOUtils; + +/** + * Lucene 4.2 FieldInfos reader. + * + * @lucene.experimental + * @see Lucene42FieldInfosFormat + */ +public class Lucene42FieldInfosReader extends FieldInfosReader { + + /** Sole constructor. */ + public Lucene42FieldInfosReader() { + } + + @Override + public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException { + final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION); + IndexInput input = directory.openInput(fileName, iocontext); + + boolean success = false; + try { + CodecUtil.checkHeader(input, Lucene42FieldInfosFormat.CODEC_NAME, + Lucene42FieldInfosFormat.FORMAT_START, + Lucene42FieldInfosFormat.FORMAT_CURRENT); + + final int size = input.readVInt(); //read in the size + FieldInfo infos[] = new FieldInfo[size]; + + for (int i = 0; i < size; i++) { + String name = input.readString(); + final int fieldNumber = input.readVInt(); + byte bits = input.readByte(); + boolean isIndexed = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0; + boolean storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0; + boolean omitNorms = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0; + boolean storePayloads = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0; + final IndexOptions indexOptions; + if (!isIndexed) { + indexOptions = null; + } else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { + indexOptions = IndexOptions.DOCS_ONLY; + } else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0) { + indexOptions = IndexOptions.DOCS_AND_FREQS; + } else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { + indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + } else { + indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; + } + + // DV Types are packed in one byte + byte val = input.readByte(); + final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F)); + final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F)); + final Map attributes = input.readStringStringMap(); + infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, + omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes)); + } + + if (input.getFilePointer() != input.length()) { + throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")"); + } + FieldInfos fieldInfos = new FieldInfos(infos); + success = true; + return fieldInfos; + } finally { + if (success) { + input.close(); + } else { + IOUtils.closeWhileHandlingException(input); + } + } + } + + private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException { + if (b == 0) { + return null; + } else if (b == 1) { + return DocValuesType.NUMERIC; + } else if (b == 2) { + return DocValuesType.BINARY; + } else if (b == 3) { + return DocValuesType.SORTED; + } else { + throw new CorruptIndexException("invalid docvalues byte: " + b + " (resource=" + input + ")"); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java new file mode 100644 index 00000000000..7e3aacf432b --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java @@ -0,0 +1,106 @@ +package org.apache.lucene.codecs.lucene42; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.FieldInfosWriter; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.IOUtils; + +/** + * Lucene 4.2 FieldInfos writer. + * + * @see Lucene42FieldInfosFormat + * @lucene.experimental + */ +public class Lucene42FieldInfosWriter extends FieldInfosWriter { + + /** Sole constructor. */ + public Lucene42FieldInfosWriter() { + } + + @Override + public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException { + final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION); + IndexOutput output = directory.createOutput(fileName, context); + boolean success = false; + try { + CodecUtil.writeHeader(output, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_CURRENT); + output.writeVInt(infos.size()); + for (FieldInfo fi : infos) { + IndexOptions indexOptions = fi.getIndexOptions(); + byte bits = 0x0; + if (fi.hasVectors()) bits |= Lucene42FieldInfosFormat.STORE_TERMVECTOR; + if (fi.omitsNorms()) bits |= Lucene42FieldInfosFormat.OMIT_NORMS; + if (fi.hasPayloads()) bits |= Lucene42FieldInfosFormat.STORE_PAYLOADS; + if (fi.isIndexed()) { + bits |= Lucene42FieldInfosFormat.IS_INDEXED; + assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads(); + if (indexOptions == IndexOptions.DOCS_ONLY) { + bits |= Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; + } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { + bits |= Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; + } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { + bits |= Lucene42FieldInfosFormat.OMIT_POSITIONS; + } + } + output.writeString(fi.name); + output.writeVInt(fi.number); + output.writeByte(bits); + + // pack the DV types in one byte + final byte dv = docValuesByte(fi.getDocValuesType()); + final byte nrm = docValuesByte(fi.getNormType()); + assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; + byte val = (byte) (0xff & ((nrm << 4) | dv)); + output.writeByte(val); + output.writeStringStringMap(fi.attributes()); + } + success = true; + } finally { + if (success) { + output.close(); + } else { + IOUtils.closeWhileHandlingException(output); + } + } + } + + private static byte docValuesByte(DocValuesType type) { + if (type == null) { + return 0; + } else if (type == DocValuesType.NUMERIC) { + return 1; + } else if (type == DocValuesType.BINARY) { + return 2; + } else if (type == DocValuesType.SORTED) { + return 3; + } else { + throw new AssertionError(); + } + } +} diff --git a/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java index 655fb359b41..2e687657826 100644 --- a/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java +++ b/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java @@ -1,6 +1,7 @@ package org.apache.lucene.document; -import org.apache.lucene.search.FieldCache; +import org.apache.lucene.index.AtomicReader; // javadocs +import org.apache.lucene.search.FieldCache; // javadocs /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -24,7 +25,7 @@ import org.apache.lucene.search.FieldCache; * via {@link Float#floatToRawIntBits(float)}. *

* Per-document floating point values can be retrieved via - * {@link FieldCache#getFloats(org.apache.lucene.index.AtomicReader, String, boolean)}. + * {@link FieldCache#getFloats(AtomicReader, String, boolean)}. *

* NOTE: In most all cases this will be rather inefficient, * requiring four bytes per document. Consider encoding floating