mirror of
https://github.com/apache/lucene.git
synced 2025-03-06 16:29:30 +00:00
4.2 fieldinfos
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436986 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
40a0fe32e4
commit
44cdb10870
@ -27,7 +27,6 @@ import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfoFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
|
||||
@ -36,7 +35,8 @@ import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
|
||||
/**
|
||||
* Implements the Lucene 4.2 index format, with configurable per-field postings formats.
|
||||
* Implements the Lucene 4.2 index format, with configurable per-field postings
|
||||
* and docvalues formats.
|
||||
* <p>
|
||||
* If you want to reuse functionality of this codec in another codec, extend
|
||||
* {@link FilterCodec}.
|
||||
@ -50,7 +50,7 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
|
||||
public class Lucene42Codec extends Codec {
|
||||
private final StoredFieldsFormat fieldsFormat = new Lucene41StoredFieldsFormat();
|
||||
private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
|
||||
private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat();
|
||||
private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat();
|
||||
private final SegmentInfoFormat infosFormat = new Lucene40SegmentInfoFormat();
|
||||
private final LiveDocsFormat liveDocsFormat = new Lucene40LiveDocsFormat();
|
||||
|
||||
|
@ -0,0 +1,121 @@
|
||||
package org.apache.lucene.codecs.lucene42;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosReader;
|
||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
||||
import org.apache.lucene.index.FieldInfo.DocValuesType; // javadoc
|
||||
import org.apache.lucene.store.DataOutput; // javadoc
|
||||
|
||||
/**
|
||||
* Lucene 4.2 Field Infos format.
|
||||
* <p>
|
||||
* <p>Field names are stored in the field info file, with suffix <tt>.fnm</tt>.</p>
|
||||
* <p>FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber,
|
||||
* FieldBits,DocValuesBits,Attributes> <sup>FieldsCount</sup></p>
|
||||
* <p>Data types:
|
||||
* <ul>
|
||||
* <li>Header --> {@link CodecUtil#checkHeader CodecHeader}</li>
|
||||
* <li>FieldsCount --> {@link DataOutput#writeVInt VInt}</li>
|
||||
* <li>FieldName --> {@link DataOutput#writeString String}</li>
|
||||
* <li>FieldBits, DocValuesBits --> {@link DataOutput#writeByte Byte}</li>
|
||||
* <li>FieldNumber --> {@link DataOutput#writeInt VInt}</li>
|
||||
* <li>Attributes --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
* Field Descriptions:
|
||||
* <ul>
|
||||
* <li>FieldsCount: the number of fields in this file.</li>
|
||||
* <li>FieldName: name of the field as a UTF-8 String.</li>
|
||||
* <li>FieldNumber: the field's number. Note that unlike previous versions of
|
||||
* Lucene, the fields are not numbered implicitly by their order in the
|
||||
* file, instead explicitly.</li>
|
||||
* <li>FieldBits: a byte containing field options.
|
||||
* <ul>
|
||||
* <li>The low-order bit is one for indexed fields, and zero for non-indexed
|
||||
* fields.</li>
|
||||
* <li>The second lowest-order bit is one for fields that have term vectors
|
||||
* stored, and zero for fields without term vectors.</li>
|
||||
* <li>If the third lowest order-bit is set (0x4), offsets are stored into
|
||||
* the postings list in addition to positions.</li>
|
||||
* <li>Fourth bit is unused.</li>
|
||||
* <li>If the fifth lowest-order bit is set (0x10), norms are omitted for the
|
||||
* indexed field.</li>
|
||||
* <li>If the sixth lowest-order bit is set (0x20), payloads are stored for the
|
||||
* indexed field.</li>
|
||||
* <li>If the seventh lowest-order bit is set (0x40), term frequencies and
|
||||
* positions omitted for the indexed field.</li>
|
||||
* <li>If the eighth lowest-order bit is set (0x80), positions are omitted for the
|
||||
* indexed field.</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>DocValuesBits: a byte containing per-document value types. The type
|
||||
* recorded as two four-bit integers, with the high-order bits representing
|
||||
* <code>norms</code> options, and the low-order bits representing
|
||||
* {@code DocValues} options. Each four-bit integer can be decoded as such:
|
||||
* <ul>
|
||||
* <li>0: no DocValues for this field.</li>
|
||||
* <li>1: NumericDocValues. ({@link DocValuesType#NUMERIC})</li>
|
||||
* <li>2: BinaryDocValues. ({@code DocValuesType#BINARY})</li>
|
||||
* <li>3: SortedDocValues. ({@code DocValuesType#SORTED})</li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>Attributes: a key-value map of codec-private attributes.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class Lucene42FieldInfosFormat extends FieldInfosFormat {
|
||||
private final FieldInfosReader reader = new Lucene42FieldInfosReader();
|
||||
private final FieldInfosWriter writer = new Lucene42FieldInfosWriter();
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene42FieldInfosFormat() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfosReader getFieldInfosReader() throws IOException {
|
||||
return reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
||||
return writer;
|
||||
}
|
||||
|
||||
/** Extension of field infos */
|
||||
static final String EXTENSION = "fnm";
|
||||
|
||||
// Codec header
|
||||
static final String CODEC_NAME = "Lucene42FieldInfos";
|
||||
static final int FORMAT_START = 0;
|
||||
static final int FORMAT_CURRENT = FORMAT_START;
|
||||
|
||||
// Field flags
|
||||
static final byte IS_INDEXED = 0x1;
|
||||
static final byte STORE_TERMVECTOR = 0x2;
|
||||
static final byte STORE_OFFSETS_IN_POSTINGS = 0x4;
|
||||
static final byte OMIT_NORMS = 0x10;
|
||||
static final byte STORE_PAYLOADS = 0x20;
|
||||
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
|
||||
static final byte OMIT_POSITIONS = -128;
|
||||
}
|
@ -0,0 +1,121 @@
|
||||
package org.apache.lucene.codecs.lucene42;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldInfosReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Lucene 4.2 FieldInfos reader.
|
||||
*
|
||||
* @lucene.experimental
|
||||
* @see Lucene42FieldInfosFormat
|
||||
*/
|
||||
public class Lucene42FieldInfosReader extends FieldInfosReader {
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene42FieldInfosReader() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION);
|
||||
IndexInput input = directory.openInput(fileName, iocontext);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.checkHeader(input, Lucene42FieldInfosFormat.CODEC_NAME,
|
||||
Lucene42FieldInfosFormat.FORMAT_START,
|
||||
Lucene42FieldInfosFormat.FORMAT_CURRENT);
|
||||
|
||||
final int size = input.readVInt(); //read in the size
|
||||
FieldInfo infos[] = new FieldInfo[size];
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
String name = input.readString();
|
||||
final int fieldNumber = input.readVInt();
|
||||
byte bits = input.readByte();
|
||||
boolean isIndexed = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0;
|
||||
boolean storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0;
|
||||
boolean omitNorms = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0;
|
||||
boolean storePayloads = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0;
|
||||
final IndexOptions indexOptions;
|
||||
if (!isIndexed) {
|
||||
indexOptions = null;
|
||||
} else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
||||
indexOptions = IndexOptions.DOCS_ONLY;
|
||||
} else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0) {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
||||
} else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||
} else {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
}
|
||||
|
||||
// DV Types are packed in one byte
|
||||
byte val = input.readByte();
|
||||
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
|
||||
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
|
||||
final Map<String,String> attributes = input.readStringStringMap();
|
||||
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
||||
omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes));
|
||||
}
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||
}
|
||||
FieldInfos fieldInfos = new FieldInfos(infos);
|
||||
success = true;
|
||||
return fieldInfos;
|
||||
} finally {
|
||||
if (success) {
|
||||
input.close();
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
|
||||
if (b == 0) {
|
||||
return null;
|
||||
} else if (b == 1) {
|
||||
return DocValuesType.NUMERIC;
|
||||
} else if (b == 2) {
|
||||
return DocValuesType.BINARY;
|
||||
} else if (b == 3) {
|
||||
return DocValuesType.SORTED;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid docvalues byte: " + b + " (resource=" + input + ")");
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,106 @@
|
||||
package org.apache.lucene.codecs.lucene42;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Lucene 4.2 FieldInfos writer.
|
||||
*
|
||||
* @see Lucene42FieldInfosFormat
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class Lucene42FieldInfosWriter extends FieldInfosWriter {
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene42FieldInfosWriter() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION);
|
||||
IndexOutput output = directory.createOutput(fileName, context);
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.writeHeader(output, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_CURRENT);
|
||||
output.writeVInt(infos.size());
|
||||
for (FieldInfo fi : infos) {
|
||||
IndexOptions indexOptions = fi.getIndexOptions();
|
||||
byte bits = 0x0;
|
||||
if (fi.hasVectors()) bits |= Lucene42FieldInfosFormat.STORE_TERMVECTOR;
|
||||
if (fi.omitsNorms()) bits |= Lucene42FieldInfosFormat.OMIT_NORMS;
|
||||
if (fi.hasPayloads()) bits |= Lucene42FieldInfosFormat.STORE_PAYLOADS;
|
||||
if (fi.isIndexed()) {
|
||||
bits |= Lucene42FieldInfosFormat.IS_INDEXED;
|
||||
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
bits |= Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
|
||||
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
|
||||
bits |= Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
|
||||
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
||||
bits |= Lucene42FieldInfosFormat.OMIT_POSITIONS;
|
||||
}
|
||||
}
|
||||
output.writeString(fi.name);
|
||||
output.writeVInt(fi.number);
|
||||
output.writeByte(bits);
|
||||
|
||||
// pack the DV types in one byte
|
||||
final byte dv = docValuesByte(fi.getDocValuesType());
|
||||
final byte nrm = docValuesByte(fi.getNormType());
|
||||
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
|
||||
byte val = (byte) (0xff & ((nrm << 4) | dv));
|
||||
output.writeByte(val);
|
||||
output.writeStringStringMap(fi.attributes());
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
output.close();
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static byte docValuesByte(DocValuesType type) {
|
||||
if (type == null) {
|
||||
return 0;
|
||||
} else if (type == DocValuesType.NUMERIC) {
|
||||
return 1;
|
||||
} else if (type == DocValuesType.BINARY) {
|
||||
return 2;
|
||||
} else if (type == DocValuesType.SORTED) {
|
||||
return 3;
|
||||
} else {
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.index.AtomicReader; // javadocs
|
||||
import org.apache.lucene.search.FieldCache; // javadocs
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
@ -24,7 +25,7 @@ import org.apache.lucene.search.FieldCache;
|
||||
* via {@link Float#floatToRawIntBits(float)}.
|
||||
* <p>
|
||||
* Per-document floating point values can be retrieved via
|
||||
* {@link FieldCache#getFloats(org.apache.lucene.index.AtomicReader, String, boolean)}.
|
||||
* {@link FieldCache#getFloats(AtomicReader, String, boolean)}.
|
||||
* <p>
|
||||
* <b>NOTE</b>: In most all cases this will be rather inefficient,
|
||||
* requiring four bytes per document. Consider encoding floating
|
||||
|
Loading…
x
Reference in New Issue
Block a user