LUCENE-3728: split stored fields into 3x/4x so more docstore stuff can go in 3x

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237713 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-01-30 15:19:32 +00:00
parent 1e60692bd7
commit c99756201d
9 changed files with 580 additions and 89 deletions

View File

@ -29,10 +29,8 @@ import org.apache.lucene.codecs.PerDocProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfosFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
@ -53,13 +51,7 @@ public class Lucene3xCodec extends Codec {
private final PostingsFormat postingsFormat = new Lucene3xPostingsFormat();
// TODO: this should really be a different impl
private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat() {
@Override
public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
throw new UnsupportedOperationException("this codec can only be used for reading");
}
};
private final StoredFieldsFormat fieldsFormat = new Lucene3xStoredFieldsFormat();
private final TermVectorsFormat vectorsFormat = new Lucene3xTermVectorsFormat();

View File

@ -66,7 +66,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader {
}
try {
Lucene40StoredFieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
Lucene3xStoredFieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
} finally {
// If we opened the directory, close it
if (dir != directory) dir.close();

View File

@ -0,0 +1,51 @@
package org.apache.lucene.codecs.lucene3x;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/** @deprecated */
@Deprecated
public class Lucene3xStoredFieldsFormat extends StoredFieldsFormat {
@Override
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si,
FieldInfos fn, IOContext context) throws IOException {
return new Lucene3xStoredFieldsReader(directory, si, fn, context);
}
@Override
public StoredFieldsWriter fieldsWriter(Directory directory, String segment,
IOContext context) throws IOException {
throw new UnsupportedOperationException("this codec can only be used for reading");
}
@Override
public void files(SegmentInfo info, Set<String> files) throws IOException {
Lucene3xStoredFieldsReader.files(info, files);
}
}

View File

@ -0,0 +1,333 @@
package org.apache.lucene.codecs.lucene3x;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import java.io.Closeable;
import java.nio.charset.Charset;
import java.util.Set;
/**
* Class responsible for access to stored document fields.
* <p/>
* It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
*
* @deprecated
*/
@Deprecated
public final class Lucene3xStoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
private final static int FORMAT_SIZE = 4;
/** Extension of stored fields file */
public static final String FIELDS_EXTENSION = "fdt";
/** Extension of stored fields index file */
public static final String FIELDS_INDEX_EXTENSION = "fdx";
// Lucene 3.0: Removal of compressed fields
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
// Lucene 3.2: NumericFields are stored in binary format
static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
// NOTE: if you introduce a new format, make it 1 higher
// than the current one, and always change this if you
// switch to a new format!
public static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
// when removing support for old versions, leave the last supported version here
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
// NOTE: bit 0 is free here! You can steal it!
public static final int FIELD_IS_BINARY = 1 << 1;
// the old bit 1 << 2 was compressed, is now left out
private static final int _NUMERIC_BIT_SHIFT = 3;
static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
public static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
public static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
public static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
public static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
private final FieldInfos fieldInfos;
private final IndexInput fieldsStream;
private final IndexInput indexStream;
private int numTotalDocs;
private int size;
private boolean closed;
private final int format;
// The docID offset where our docs begin in the index
// file. This will be 0 if we have our own private file.
private int docStoreOffset;
/** Returns a cloned FieldsReader that shares open
* IndexInputs with the original one. It is the caller's
* job not to close the original FieldsReader until all
* clones are called (eg, currently SegmentReader manages
* this logic). */
@Override
public Lucene3xStoredFieldsReader clone() {
ensureOpen();
return new Lucene3xStoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
}
/** Verifies that the code version which wrote the segment is supported. */
public static void checkCodeVersion(Directory dir, String segment) throws IOException {
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT);
try {
int format = idxStream.readInt();
if (format < FORMAT_MINIMUM)
throw new IndexFormatTooOldException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
if (format > FORMAT_CURRENT)
throw new IndexFormatTooNewException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
} finally {
idxStream.close();
}
}
// Used only by clone
private Lucene3xStoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
IndexInput fieldsStream, IndexInput indexStream) {
this.fieldInfos = fieldInfos;
this.numTotalDocs = numTotalDocs;
this.size = size;
this.format = format;
this.docStoreOffset = docStoreOffset;
this.fieldsStream = fieldsStream;
this.indexStream = indexStream;
}
public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
final String segment = si.getDocStoreSegment();
final int docStoreOffset = si.getDocStoreOffset();
final int size = si.docCount;
boolean success = false;
fieldInfos = fn;
try {
fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
indexStream = d.openInput(indexStreamFN, context);
format = indexStream.readInt();
if (format < FORMAT_MINIMUM)
throw new IndexFormatTooOldException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
if (format > FORMAT_CURRENT)
throw new IndexFormatTooNewException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
final long indexSize = indexStream.length() - FORMAT_SIZE;
if (docStoreOffset != -1) {
// We read only a slice out of this shared fields file
this.docStoreOffset = docStoreOffset;
this.size = size;
// Verify the file is long enough to hold all of our
// docs
assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset;
} else {
this.docStoreOffset = 0;
this.size = (int) (indexSize >> 3);
// Verify two sources of "maxDoc" agree:
if (this.size != si.docCount) {
throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount);
}
}
numTotalDocs = (int) (indexSize >> 3);
success = true;
} finally {
// With lock-less commits, it's entirely possible (and
// fine) to hit a FileNotFound exception above. In
// this case, we want to explicitly close any subset
// of things that were opened so that we don't have to
// wait for a GC to do so.
if (!success) {
close();
}
}
}
/**
* @throws AlreadyClosedException if this FieldsReader is closed
*/
private void ensureOpen() throws AlreadyClosedException {
if (closed) {
throw new AlreadyClosedException("this FieldsReader is closed");
}
}
/**
* Closes the underlying {@link org.apache.lucene.store.IndexInput} streams.
* This means that the Fields values will not be accessible.
*
* @throws IOException
*/
public final void close() throws IOException {
if (!closed) {
IOUtils.close(fieldsStream, indexStream);
closed = true;
}
}
public final int size() {
return size;
}
private void seekIndex(int docID) throws IOException {
indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
}
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
seekIndex(n);
fieldsStream.seek(indexStream.readLong());
final int numFields = fieldsStream.readVInt();
for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
int bits = fieldsStream.readByte() & 0xFF;
assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
switch(visitor.needsField(fieldInfo)) {
case YES:
readField(visitor, fieldInfo, bits);
break;
case NO:
skipField(bits);
break;
case STOP:
return;
}
}
}
private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
final int numeric = bits & FIELD_IS_NUMERIC_MASK;
if (numeric != 0) {
switch(numeric) {
case FIELD_IS_NUMERIC_INT:
visitor.intField(info, fieldsStream.readInt());
return;
case FIELD_IS_NUMERIC_LONG:
visitor.longField(info, fieldsStream.readLong());
return;
case FIELD_IS_NUMERIC_FLOAT:
visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt()));
return;
case FIELD_IS_NUMERIC_DOUBLE:
visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong()));
return;
default:
throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
}
} else {
final int length = fieldsStream.readVInt();
byte bytes[] = new byte[length];
fieldsStream.readBytes(bytes, 0, length);
if ((bits & FIELD_IS_BINARY) != 0) {
visitor.binaryField(info, bytes, 0, bytes.length);
} else {
visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
}
}
}
private void skipField(int bits) throws IOException {
final int numeric = bits & FIELD_IS_NUMERIC_MASK;
if (numeric != 0) {
switch(numeric) {
case FIELD_IS_NUMERIC_INT:
case FIELD_IS_NUMERIC_FLOAT:
fieldsStream.readInt();
return;
case FIELD_IS_NUMERIC_LONG:
case FIELD_IS_NUMERIC_DOUBLE:
fieldsStream.readLong();
return;
default:
throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
}
} else {
final int length = fieldsStream.readVInt();
fieldsStream.seek(fieldsStream.getFilePointer() + length);
}
}
/** Returns the length in bytes of each raw document in a
* contiguous range of length numDocs starting with
* startDocID. Returns the IndexInput (the fieldStream),
* already seeked to the starting point for startDocID.*/
public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
seekIndex(startDocID);
long startOffset = indexStream.readLong();
long lastOffset = startOffset;
int count = 0;
while (count < numDocs) {
final long offset;
final int docID = docStoreOffset + startDocID + count + 1;
assert docID <= numTotalDocs;
if (docID < numTotalDocs)
offset = indexStream.readLong();
else
offset = fieldsStream.length();
lengths[count++] = (int) (offset-lastOffset);
lastOffset = offset;
}
fieldsStream.seek(startOffset);
return fieldsStream;
}
// TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap?
// only preflex segments refer to these?
public static void files(SegmentInfo info, Set<String> files) throws IOException {
if (info.getDocStoreOffset() != -1) {
assert info.getDocStoreSegment() != null;
if (!info.getDocStoreIsCompoundFile()) {
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_EXTENSION));
}
} else {
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION));
}
}
}

View File

@ -24,8 +24,6 @@ import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
@ -35,7 +33,6 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import java.io.Closeable;
import java.nio.charset.Charset;
import java.util.Set;
/**
@ -54,11 +51,6 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
private int numTotalDocs;
private int size;
private boolean closed;
private final int format;
// The docID offset where our docs begin in the index
// file. This will be 0 if we have our own private file.
private int docStoreOffset;
/** Returns a cloned FieldsReader that shares open
* IndexInputs with the original one. It is the caller's
@ -68,41 +60,20 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
@Override
public Lucene40StoredFieldsReader clone() {
ensureOpen();
return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
}
/** Verifies that the code version which wrote the segment is supported. */
public static void checkCodeVersion(Directory dir, String segment) throws IOException {
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT);
try {
int format = idxStream.readInt();
if (format < Lucene40StoredFieldsWriter.FORMAT_MINIMUM)
throw new IndexFormatTooOldException(idxStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
if (format > Lucene40StoredFieldsWriter.FORMAT_CURRENT)
throw new IndexFormatTooNewException(idxStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
} finally {
idxStream.close();
}
return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
}
// Used only by clone
private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
IndexInput fieldsStream, IndexInput indexStream) {
private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, IndexInput fieldsStream, IndexInput indexStream) {
this.fieldInfos = fieldInfos;
this.numTotalDocs = numTotalDocs;
this.size = size;
this.format = format;
this.docStoreOffset = docStoreOffset;
this.fieldsStream = fieldsStream;
this.indexStream = indexStream;
}
public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
final String segment = si.getDocStoreSegment();
final int docStoreOffset = si.getDocStoreOffset();
final int size = si.docCount;
final String segment = si.name;
boolean success = false;
fieldInfos = fn;
try {
@ -110,31 +81,18 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
indexStream = d.openInput(indexStreamFN, context);
format = indexStream.readInt();
if (format < Lucene40StoredFieldsWriter.FORMAT_MINIMUM)
throw new IndexFormatTooOldException(indexStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
if (format > Lucene40StoredFieldsWriter.FORMAT_CURRENT)
throw new IndexFormatTooNewException(indexStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
// its a 4.0 codec: so its not too-old, its corrupt.
// TODO: change this to CodecUtil.checkHeader
if (Lucene40StoredFieldsWriter.FORMAT_CURRENT != indexStream.readInt()) {
throw new CorruptIndexException("unexpected fdx header: " + indexStream);
}
final long indexSize = indexStream.length() - FORMAT_SIZE;
if (docStoreOffset != -1) {
// We read only a slice out of this shared fields file
this.docStoreOffset = docStoreOffset;
this.size = size;
// Verify the file is long enough to hold all of our
// docs
assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset;
} else {
this.docStoreOffset = 0;
this.size = (int) (indexSize >> 3);
// Verify two sources of "maxDoc" agree:
if (this.size != si.docCount) {
throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount);
}
}
numTotalDocs = (int) (indexSize >> 3);
success = true;
} finally {
@ -176,7 +134,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
}
private void seekIndex(int docID) throws IOException {
indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
indexStream.seek(FORMAT_SIZE + docID * 8L);
}
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
@ -204,8 +162,6 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
}
}
static final Charset UTF8 = Charset.forName("UTF-8");
private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK;
if (numeric != 0) {
@ -232,7 +188,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
if ((bits & Lucene40StoredFieldsWriter.FIELD_IS_BINARY) != 0) {
visitor.binaryField(info, bytes, 0, bytes.length);
} else {
visitor.stringField(info, new String(bytes, 0, bytes.length, UTF8));
visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
}
}
}
@ -269,7 +225,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
int count = 0;
while (count < numDocs) {
final long offset;
final int docID = docStoreOffset + startDocID + count + 1;
final int docID = startDocID + count + 1;
assert docID <= numTotalDocs;
if (docID < numTotalDocs)
offset = indexStream.readLong();
@ -284,18 +240,8 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
return fieldsStream;
}
// TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap?
// only preflex segments refer to these?
public static void files(SegmentInfo info, Set<String> files) throws IOException {
if (info.getDocStoreOffset() != -1) {
assert info.getDocStoreSegment() != null;
if (!info.getDocStoreIsCompoundFile()) {
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
}
} else {
files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
}
}
}

View File

@ -55,10 +55,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
// currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
// currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
// Lucene 3.0: Removal of compressed fields
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
// Lucene 3.2: NumericFields are stored in binary format
// (Happens to be the same as for now) Lucene 3.2: NumericFields are stored in binary format
static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
// NOTE: if you introduce a new format, make it 1 higher
@ -67,7 +64,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
// when removing support for old versions, leave the last supported version here
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
/** Extension of stored fields file */
public static final String FIELDS_EXTENSION = "fdt";

View File

@ -45,10 +45,9 @@ public class PreFlexRWCodec extends Lucene3xCodec {
private final FieldInfosFormat fieldInfos = new PreFlexRWFieldInfosFormat();
private final TermVectorsFormat termVectors = new PreFlexRWTermVectorsFormat();
private final SegmentInfosFormat segmentInfos = new PreFlexRWSegmentInfosFormat();
private final StoredFieldsFormat storedFields = new PreFlexRWStoredFieldsFormat();
// TODO: this should really be a different impl
private final LiveDocsFormat liveDocs = new Lucene40LiveDocsFormat();
// TODO: this should really be a different impl
private final StoredFieldsFormat storedFields = new Lucene40StoredFieldsFormat();
@Override
public PostingsFormat postingsFormat() {

View File

@ -0,0 +1,17 @@
package org.apache.lucene.codecs.preflexrw;
import java.io.IOException;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.lucene3x.Lucene3xStoredFieldsFormat;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
public class PreFlexRWStoredFieldsFormat extends Lucene3xStoredFieldsFormat {
@Override
public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
return new PreFlexRWStoredFieldsWriter(directory, segment, context);
}
}

View File

@ -0,0 +1,156 @@
package org.apache.lucene.codecs.preflexrw;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.lucene3x.Lucene3xStoredFieldsReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
/** @lucene.experimental */
public final class PreFlexRWStoredFieldsWriter extends StoredFieldsWriter {
private final Directory directory;
private final String segment;
private IndexOutput fieldsStream;
private IndexOutput indexStream;
public PreFlexRWStoredFieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
assert directory != null;
this.directory = directory;
this.segment = segment;
boolean success = false;
try {
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context);
indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context);
fieldsStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT);
indexStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT);
success = true;
} finally {
if (!success) {
abort();
}
}
}
// Writes the contents of buffer into the fields stream
// and adds a new entry for this document into the index
// stream. This assumes the buffer was already written
// in the correct fields format.
public void startDocument(int numStoredFields) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
fieldsStream.writeVInt(numStoredFields);
}
public void close() throws IOException {
try {
IOUtils.close(fieldsStream, indexStream);
} finally {
fieldsStream = indexStream = null;
}
}
public void abort() {
try {
close();
} catch (IOException ignored) {}
IOUtils.deleteFilesIgnoringExceptions(directory,
IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION),
IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
}
public void writeField(FieldInfo info, IndexableField field) throws IOException {
fieldsStream.writeVInt(info.number);
int bits = 0;
final BytesRef bytes;
final String string;
// TODO: maybe a field should serialize itself?
// this way we don't bake into indexer all these
// specific encodings for different fields? and apps
// can customize...
Number number = field.numericValue();
if (number != null) {
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_INT;
} else if (number instanceof Long) {
bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_LONG;
} else if (number instanceof Float) {
bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_FLOAT;
} else if (number instanceof Double) {
bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_DOUBLE;
} else {
throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
}
string = null;
bytes = null;
} else {
bytes = field.binaryValue();
if (bytes != null) {
bits |= Lucene3xStoredFieldsReader.FIELD_IS_BINARY;
string = null;
} else {
string = field.stringValue();
if (string == null) {
throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
}
}
}
fieldsStream.writeByte((byte) bits);
if (bytes != null) {
fieldsStream.writeVInt(bytes.length);
fieldsStream.writeBytes(bytes.bytes, bytes.offset, bytes.length);
} else if (string != null) {
fieldsStream.writeString(field.stringValue());
} else {
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
fieldsStream.writeInt(number.intValue());
} else if (number instanceof Long) {
fieldsStream.writeLong(number.longValue());
} else if (number instanceof Float) {
fieldsStream.writeInt(Float.floatToIntBits(number.floatValue()));
} else if (number instanceof Double) {
fieldsStream.writeLong(Double.doubleToLongBits(number.doubleValue()));
} else {
assert false;
}
}
}
@Override
public void finish(int numDocs) throws IOException {
if (4+((long) numDocs)*8 != indexStream.getFilePointer())
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
// we detect that the bug has struck, here, and
// throw an exception to prevent the corruption from
// entering the index. See LUCENE-1282 for
// details.
throw new RuntimeException("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexStream.getFilePointer() + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption");
}
}