From 44cdb10870f04b2465efba070063c1739ccf8867 Mon Sep 17 00:00:00 2001
From: Robert Muir
Date: Tue, 22 Jan 2013 15:08:25 +0000
Subject: [PATCH] 4.2 fieldinfos
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436986 13f79535-47bb-0310-9956-ffa450edef68
---
.../lucene/codecs/lucene42/Lucene42Codec.java | 6 +-
.../lucene42/Lucene42FieldInfosFormat.java | 121 ++++++++++++++++++
.../lucene42/Lucene42FieldInfosReader.java | 121 ++++++++++++++++++
.../lucene42/Lucene42FieldInfosWriter.java | 106 +++++++++++++++
.../lucene/document/FloatDocValuesField.java | 5 +-
5 files changed, 354 insertions(+), 5 deletions(-)
create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java
create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java
create mode 100644 lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java
index 53eb829f208..da08c1c4fc1 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42Codec.java
@@ -27,7 +27,6 @@ import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
-import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat;
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
@@ -36,7 +35,8 @@ import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
/**
- * Implements the Lucene 4.2 index format, with configurable per-field postings formats.
+ * Implements the Lucene 4.2 index format, with configurable per-field postings
+ * and docvalues formats.
*
* If you want to reuse functionality of this codec in another codec, extend
* {@link FilterCodec}.
@@ -50,7 +50,7 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
public class Lucene42Codec extends Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
- private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat();
+ private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat();
private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat();
private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java
new file mode 100644
index 00000000000..9a6fed9f348
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosFormat.java
@@ -0,0 +1,121 @@
+package org.apache.lucene.codecs.lucene42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.FieldInfosReader;
+import org.apache.lucene.codecs.FieldInfosWriter;
+import org.apache.lucene.index.FieldInfo.DocValuesType; // javadoc
+import org.apache.lucene.store.DataOutput; // javadoc
+
+/**
+ * Lucene 4.2 Field Infos format.
+ *
+ *
Field names are stored in the field info file, with suffix .fnm.
+ * FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber,
+ * FieldBits,DocValuesBits,Attributes> FieldsCount
+ * Data types:
+ *
+ * - Header --> {@link CodecUtil#checkHeader CodecHeader}
+ * - FieldsCount --> {@link DataOutput#writeVInt VInt}
+ * - FieldName --> {@link DataOutput#writeString String}
+ * - FieldBits, DocValuesBits --> {@link DataOutput#writeByte Byte}
+ * - FieldNumber --> {@link DataOutput#writeInt VInt}
+ * - Attributes --> {@link DataOutput#writeStringStringMap Map<String,String>}
+ *
+ *
+ * Field Descriptions:
+ *
+ * - FieldsCount: the number of fields in this file.
+ * - FieldName: name of the field as a UTF-8 String.
+ * - FieldNumber: the field's number. Note that unlike previous versions of
+ * Lucene, the fields are not numbered implicitly by their order in the
+ * file, instead explicitly.
+ * - FieldBits: a byte containing field options.
+ *
+ * - The low-order bit is one for indexed fields, and zero for non-indexed
+ * fields.
+ * - The second lowest-order bit is one for fields that have term vectors
+ * stored, and zero for fields without term vectors.
+ * - If the third lowest order-bit is set (0x4), offsets are stored into
+ * the postings list in addition to positions.
+ * - Fourth bit is unused.
+ * - If the fifth lowest-order bit is set (0x10), norms are omitted for the
+ * indexed field.
+ * - If the sixth lowest-order bit is set (0x20), payloads are stored for the
+ * indexed field.
+ * - If the seventh lowest-order bit is set (0x40), term frequencies and
+ * positions omitted for the indexed field.
+ * - If the eighth lowest-order bit is set (0x80), positions are omitted for the
+ * indexed field.
+ *
+ *
+ * - DocValuesBits: a byte containing per-document value types. The type
+ * recorded as two four-bit integers, with the high-order bits representing
+ *
norms
options, and the low-order bits representing
+ * {@code DocValues} options. Each four-bit integer can be decoded as such:
+ *
+ * - 0: no DocValues for this field.
+ * - 1: NumericDocValues. ({@link DocValuesType#NUMERIC})
+ * - 2: BinaryDocValues. ({@code DocValuesType#BINARY})
+ * - 3: SortedDocValues. ({@code DocValuesType#SORTED})
+ *
+ *
+ * - Attributes: a key-value map of codec-private attributes.
+ *
+ *
+ * @lucene.experimental
+ */
+public class Lucene42FieldInfosFormat extends FieldInfosFormat {
+ private final FieldInfosReader reader = new Lucene42FieldInfosReader();
+ private final FieldInfosWriter writer = new Lucene42FieldInfosWriter();
+
+ /** Sole constructor. */
+ public Lucene42FieldInfosFormat() {
+ }
+
+ @Override
+ public FieldInfosReader getFieldInfosReader() throws IOException {
+ return reader;
+ }
+
+ @Override
+ public FieldInfosWriter getFieldInfosWriter() throws IOException {
+ return writer;
+ }
+
+ /** Extension of field infos */
+ static final String EXTENSION = "fnm";
+
+ // Codec header
+ static final String CODEC_NAME = "Lucene42FieldInfos";
+ static final int FORMAT_START = 0;
+ static final int FORMAT_CURRENT = FORMAT_START;
+
+ // Field flags
+ static final byte IS_INDEXED = 0x1;
+ static final byte STORE_TERMVECTOR = 0x2;
+ static final byte STORE_OFFSETS_IN_POSTINGS = 0x4;
+ static final byte OMIT_NORMS = 0x10;
+ static final byte STORE_PAYLOADS = 0x20;
+ static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
+ static final byte OMIT_POSITIONS = -128;
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java
new file mode 100644
index 00000000000..96f3dd89e11
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosReader.java
@@ -0,0 +1,121 @@
+package org.apache.lucene.codecs.lucene42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldInfosReader;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Lucene 4.2 FieldInfos reader.
+ *
+ * @lucene.experimental
+ * @see Lucene42FieldInfosFormat
+ */
+public class Lucene42FieldInfosReader extends FieldInfosReader {
+
+ /** Sole constructor. */
+ public Lucene42FieldInfosReader() {
+ }
+
+ @Override
+ public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION);
+ IndexInput input = directory.openInput(fileName, iocontext);
+
+ boolean success = false;
+ try {
+ CodecUtil.checkHeader(input, Lucene42FieldInfosFormat.CODEC_NAME,
+ Lucene42FieldInfosFormat.FORMAT_START,
+ Lucene42FieldInfosFormat.FORMAT_CURRENT);
+
+ final int size = input.readVInt(); //read in the size
+ FieldInfo infos[] = new FieldInfo[size];
+
+ for (int i = 0; i < size; i++) {
+ String name = input.readString();
+ final int fieldNumber = input.readVInt();
+ byte bits = input.readByte();
+ boolean isIndexed = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0;
+ boolean storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0;
+ boolean omitNorms = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0;
+ boolean storePayloads = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0;
+ final IndexOptions indexOptions;
+ if (!isIndexed) {
+ indexOptions = null;
+ } else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
+ indexOptions = IndexOptions.DOCS_ONLY;
+ } else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0) {
+ indexOptions = IndexOptions.DOCS_AND_FREQS;
+ } else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
+ indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+ } else {
+ indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+ }
+
+ // DV Types are packed in one byte
+ byte val = input.readByte();
+ final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
+ final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
+ final Map attributes = input.readStringStringMap();
+ infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
+ omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes));
+ }
+
+ if (input.getFilePointer() != input.length()) {
+ throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
+ }
+ FieldInfos fieldInfos = new FieldInfos(infos);
+ success = true;
+ return fieldInfos;
+ } finally {
+ if (success) {
+ input.close();
+ } else {
+ IOUtils.closeWhileHandlingException(input);
+ }
+ }
+ }
+
+ private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
+ if (b == 0) {
+ return null;
+ } else if (b == 1) {
+ return DocValuesType.NUMERIC;
+ } else if (b == 2) {
+ return DocValuesType.BINARY;
+ } else if (b == 3) {
+ return DocValuesType.SORTED;
+ } else {
+ throw new CorruptIndexException("invalid docvalues byte: " + b + " (resource=" + input + ")");
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java
new file mode 100644
index 00000000000..7e3aacf432b
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42FieldInfosWriter.java
@@ -0,0 +1,106 @@
+package org.apache.lucene.codecs.lucene42;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldInfosWriter;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Lucene 4.2 FieldInfos writer.
+ *
+ * @see Lucene42FieldInfosFormat
+ * @lucene.experimental
+ */
+public class Lucene42FieldInfosWriter extends FieldInfosWriter {
+
+ /** Sole constructor. */
+ public Lucene42FieldInfosWriter() {
+ }
+
+ @Override
+ public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION);
+ IndexOutput output = directory.createOutput(fileName, context);
+ boolean success = false;
+ try {
+ CodecUtil.writeHeader(output, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_CURRENT);
+ output.writeVInt(infos.size());
+ for (FieldInfo fi : infos) {
+ IndexOptions indexOptions = fi.getIndexOptions();
+ byte bits = 0x0;
+ if (fi.hasVectors()) bits |= Lucene42FieldInfosFormat.STORE_TERMVECTOR;
+ if (fi.omitsNorms()) bits |= Lucene42FieldInfosFormat.OMIT_NORMS;
+ if (fi.hasPayloads()) bits |= Lucene42FieldInfosFormat.STORE_PAYLOADS;
+ if (fi.isIndexed()) {
+ bits |= Lucene42FieldInfosFormat.IS_INDEXED;
+ assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
+ if (indexOptions == IndexOptions.DOCS_ONLY) {
+ bits |= Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
+ } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
+ bits |= Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
+ } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
+ bits |= Lucene42FieldInfosFormat.OMIT_POSITIONS;
+ }
+ }
+ output.writeString(fi.name);
+ output.writeVInt(fi.number);
+ output.writeByte(bits);
+
+ // pack the DV types in one byte
+ final byte dv = docValuesByte(fi.getDocValuesType());
+ final byte nrm = docValuesByte(fi.getNormType());
+ assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
+ byte val = (byte) (0xff & ((nrm << 4) | dv));
+ output.writeByte(val);
+ output.writeStringStringMap(fi.attributes());
+ }
+ success = true;
+ } finally {
+ if (success) {
+ output.close();
+ } else {
+ IOUtils.closeWhileHandlingException(output);
+ }
+ }
+ }
+
+ private static byte docValuesByte(DocValuesType type) {
+ if (type == null) {
+ return 0;
+ } else if (type == DocValuesType.NUMERIC) {
+ return 1;
+ } else if (type == DocValuesType.BINARY) {
+ return 2;
+ } else if (type == DocValuesType.SORTED) {
+ return 3;
+ } else {
+ throw new AssertionError();
+ }
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java
index 655fb359b41..2e687657826 100644
--- a/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java
@@ -1,6 +1,7 @@
package org.apache.lucene.document;
-import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.index.AtomicReader; // javadocs
+import org.apache.lucene.search.FieldCache; // javadocs
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -24,7 +25,7 @@ import org.apache.lucene.search.FieldCache;
* via {@link Float#floatToRawIntBits(float)}.
*
* Per-document floating point values can be retrieved via
- * {@link FieldCache#getFloats(org.apache.lucene.index.AtomicReader, String, boolean)}.
+ * {@link FieldCache#getFloats(AtomicReader, String, boolean)}.
*
* NOTE: In most all cases this will be rather inefficient,
* requiring four bytes per document. Consider encoding floating