mirror of https://github.com/apache/lucene.git
LUCENE-3728: split stored fields into 3x/4x so more docstore stuff can go in 3x
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3661@1237713 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1e60692bd7
commit
c99756201d
|
@ -29,10 +29,8 @@ import org.apache.lucene.codecs.PerDocProducer;
|
|||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.SegmentInfosFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.PerDocWriteState;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
|
@ -53,13 +51,7 @@ public class Lucene3xCodec extends Codec {
|
|||
|
||||
private final PostingsFormat postingsFormat = new Lucene3xPostingsFormat();
|
||||
|
||||
// TODO: this should really be a different impl
|
||||
private final StoredFieldsFormat fieldsFormat = new Lucene40StoredFieldsFormat() {
|
||||
@Override
|
||||
public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
|
||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||
}
|
||||
};
|
||||
private final StoredFieldsFormat fieldsFormat = new Lucene3xStoredFieldsFormat();
|
||||
|
||||
private final TermVectorsFormat vectorsFormat = new Lucene3xTermVectorsFormat();
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ public class Lucene3xSegmentInfosReader extends SegmentInfosReader {
|
|||
}
|
||||
|
||||
try {
|
||||
Lucene40StoredFieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
|
||||
Lucene3xStoredFieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
|
||||
} finally {
|
||||
// If we opened the directory, close it
|
||||
if (dir != directory) dir.close();
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
package org.apache.lucene.codecs.lucene3x;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
|
||||
/** @deprecated */
|
||||
@Deprecated
|
||||
public class Lucene3xStoredFieldsFormat extends StoredFieldsFormat {
|
||||
|
||||
@Override
|
||||
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si,
|
||||
FieldInfos fn, IOContext context) throws IOException {
|
||||
return new Lucene3xStoredFieldsReader(directory, si, fn, context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public StoredFieldsWriter fieldsWriter(Directory directory, String segment,
|
||||
IOContext context) throws IOException {
|
||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||
Lucene3xStoredFieldsReader.files(info, files);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,333 @@
|
|||
package org.apache.lucene.codecs.lucene3x;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexFormatTooNewException;
|
||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Class responsible for access to stored document fields.
|
||||
* <p/>
|
||||
* It uses <segment>.fdt and <segment>.fdx; files.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
@Deprecated
|
||||
public final class Lucene3xStoredFieldsReader extends StoredFieldsReader implements Cloneable, Closeable {
|
||||
private final static int FORMAT_SIZE = 4;
|
||||
|
||||
/** Extension of stored fields file */
|
||||
public static final String FIELDS_EXTENSION = "fdt";
|
||||
|
||||
/** Extension of stored fields index file */
|
||||
public static final String FIELDS_INDEX_EXTENSION = "fdx";
|
||||
|
||||
// Lucene 3.0: Removal of compressed fields
|
||||
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
|
||||
|
||||
// Lucene 3.2: NumericFields are stored in binary format
|
||||
static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
|
||||
|
||||
// NOTE: if you introduce a new format, make it 1 higher
|
||||
// than the current one, and always change this if you
|
||||
// switch to a new format!
|
||||
public static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
|
||||
|
||||
// when removing support for old versions, leave the last supported version here
|
||||
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
|
||||
|
||||
// NOTE: bit 0 is free here! You can steal it!
|
||||
public static final int FIELD_IS_BINARY = 1 << 1;
|
||||
|
||||
// the old bit 1 << 2 was compressed, is now left out
|
||||
|
||||
private static final int _NUMERIC_BIT_SHIFT = 3;
|
||||
static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
|
||||
|
||||
public static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
|
||||
public static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
|
||||
public static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
|
||||
public static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
|
||||
|
||||
private final FieldInfos fieldInfos;
|
||||
private final IndexInput fieldsStream;
|
||||
private final IndexInput indexStream;
|
||||
private int numTotalDocs;
|
||||
private int size;
|
||||
private boolean closed;
|
||||
private final int format;
|
||||
|
||||
// The docID offset where our docs begin in the index
|
||||
// file. This will be 0 if we have our own private file.
|
||||
private int docStoreOffset;
|
||||
|
||||
/** Returns a cloned FieldsReader that shares open
|
||||
* IndexInputs with the original one. It is the caller's
|
||||
* job not to close the original FieldsReader until all
|
||||
* clones are called (eg, currently SegmentReader manages
|
||||
* this logic). */
|
||||
@Override
|
||||
public Lucene3xStoredFieldsReader clone() {
|
||||
ensureOpen();
|
||||
return new Lucene3xStoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
|
||||
}
|
||||
|
||||
/** Verifies that the code version which wrote the segment is supported. */
|
||||
public static void checkCodeVersion(Directory dir, String segment) throws IOException {
|
||||
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
|
||||
IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT);
|
||||
|
||||
try {
|
||||
int format = idxStream.readInt();
|
||||
if (format < FORMAT_MINIMUM)
|
||||
throw new IndexFormatTooOldException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
|
||||
if (format > FORMAT_CURRENT)
|
||||
throw new IndexFormatTooNewException(idxStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
|
||||
} finally {
|
||||
idxStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Used only by clone
|
||||
private Lucene3xStoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
|
||||
IndexInput fieldsStream, IndexInput indexStream) {
|
||||
this.fieldInfos = fieldInfos;
|
||||
this.numTotalDocs = numTotalDocs;
|
||||
this.size = size;
|
||||
this.format = format;
|
||||
this.docStoreOffset = docStoreOffset;
|
||||
this.fieldsStream = fieldsStream;
|
||||
this.indexStream = indexStream;
|
||||
}
|
||||
|
||||
public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
|
||||
final String segment = si.getDocStoreSegment();
|
||||
final int docStoreOffset = si.getDocStoreOffset();
|
||||
final int size = si.docCount;
|
||||
boolean success = false;
|
||||
fieldInfos = fn;
|
||||
try {
|
||||
fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
|
||||
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
|
||||
indexStream = d.openInput(indexStreamFN, context);
|
||||
|
||||
format = indexStream.readInt();
|
||||
|
||||
if (format < FORMAT_MINIMUM)
|
||||
throw new IndexFormatTooOldException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
|
||||
if (format > FORMAT_CURRENT)
|
||||
throw new IndexFormatTooNewException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT);
|
||||
|
||||
final long indexSize = indexStream.length() - FORMAT_SIZE;
|
||||
|
||||
if (docStoreOffset != -1) {
|
||||
// We read only a slice out of this shared fields file
|
||||
this.docStoreOffset = docStoreOffset;
|
||||
this.size = size;
|
||||
|
||||
// Verify the file is long enough to hold all of our
|
||||
// docs
|
||||
assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset;
|
||||
} else {
|
||||
this.docStoreOffset = 0;
|
||||
this.size = (int) (indexSize >> 3);
|
||||
// Verify two sources of "maxDoc" agree:
|
||||
if (this.size != si.docCount) {
|
||||
throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount);
|
||||
}
|
||||
}
|
||||
numTotalDocs = (int) (indexSize >> 3);
|
||||
success = true;
|
||||
} finally {
|
||||
// With lock-less commits, it's entirely possible (and
|
||||
// fine) to hit a FileNotFound exception above. In
|
||||
// this case, we want to explicitly close any subset
|
||||
// of things that were opened so that we don't have to
|
||||
// wait for a GC to do so.
|
||||
if (!success) {
|
||||
close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws AlreadyClosedException if this FieldsReader is closed
|
||||
*/
|
||||
private void ensureOpen() throws AlreadyClosedException {
|
||||
if (closed) {
|
||||
throw new AlreadyClosedException("this FieldsReader is closed");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the underlying {@link org.apache.lucene.store.IndexInput} streams.
|
||||
* This means that the Fields values will not be accessible.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public final void close() throws IOException {
|
||||
if (!closed) {
|
||||
IOUtils.close(fieldsStream, indexStream);
|
||||
closed = true;
|
||||
}
|
||||
}
|
||||
|
||||
public final int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
private void seekIndex(int docID) throws IOException {
|
||||
indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
|
||||
}
|
||||
|
||||
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
|
||||
seekIndex(n);
|
||||
fieldsStream.seek(indexStream.readLong());
|
||||
|
||||
final int numFields = fieldsStream.readVInt();
|
||||
for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) {
|
||||
int fieldNumber = fieldsStream.readVInt();
|
||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
|
||||
|
||||
int bits = fieldsStream.readByte() & 0xFF;
|
||||
assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
|
||||
|
||||
switch(visitor.needsField(fieldInfo)) {
|
||||
case YES:
|
||||
readField(visitor, fieldInfo, bits);
|
||||
break;
|
||||
case NO:
|
||||
skipField(bits);
|
||||
break;
|
||||
case STOP:
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
|
||||
final int numeric = bits & FIELD_IS_NUMERIC_MASK;
|
||||
if (numeric != 0) {
|
||||
switch(numeric) {
|
||||
case FIELD_IS_NUMERIC_INT:
|
||||
visitor.intField(info, fieldsStream.readInt());
|
||||
return;
|
||||
case FIELD_IS_NUMERIC_LONG:
|
||||
visitor.longField(info, fieldsStream.readLong());
|
||||
return;
|
||||
case FIELD_IS_NUMERIC_FLOAT:
|
||||
visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt()));
|
||||
return;
|
||||
case FIELD_IS_NUMERIC_DOUBLE:
|
||||
visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong()));
|
||||
return;
|
||||
default:
|
||||
throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
|
||||
}
|
||||
} else {
|
||||
final int length = fieldsStream.readVInt();
|
||||
byte bytes[] = new byte[length];
|
||||
fieldsStream.readBytes(bytes, 0, length);
|
||||
if ((bits & FIELD_IS_BINARY) != 0) {
|
||||
visitor.binaryField(info, bytes, 0, bytes.length);
|
||||
} else {
|
||||
visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void skipField(int bits) throws IOException {
|
||||
final int numeric = bits & FIELD_IS_NUMERIC_MASK;
|
||||
if (numeric != 0) {
|
||||
switch(numeric) {
|
||||
case FIELD_IS_NUMERIC_INT:
|
||||
case FIELD_IS_NUMERIC_FLOAT:
|
||||
fieldsStream.readInt();
|
||||
return;
|
||||
case FIELD_IS_NUMERIC_LONG:
|
||||
case FIELD_IS_NUMERIC_DOUBLE:
|
||||
fieldsStream.readLong();
|
||||
return;
|
||||
default:
|
||||
throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
|
||||
}
|
||||
} else {
|
||||
final int length = fieldsStream.readVInt();
|
||||
fieldsStream.seek(fieldsStream.getFilePointer() + length);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the length in bytes of each raw document in a
|
||||
* contiguous range of length numDocs starting with
|
||||
* startDocID. Returns the IndexInput (the fieldStream),
|
||||
* already seeked to the starting point for startDocID.*/
|
||||
public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
|
||||
seekIndex(startDocID);
|
||||
long startOffset = indexStream.readLong();
|
||||
long lastOffset = startOffset;
|
||||
int count = 0;
|
||||
while (count < numDocs) {
|
||||
final long offset;
|
||||
final int docID = docStoreOffset + startDocID + count + 1;
|
||||
assert docID <= numTotalDocs;
|
||||
if (docID < numTotalDocs)
|
||||
offset = indexStream.readLong();
|
||||
else
|
||||
offset = fieldsStream.length();
|
||||
lengths[count++] = (int) (offset-lastOffset);
|
||||
lastOffset = offset;
|
||||
}
|
||||
|
||||
fieldsStream.seek(startOffset);
|
||||
|
||||
return fieldsStream;
|
||||
}
|
||||
|
||||
// TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap?
|
||||
// only preflex segments refer to these?
|
||||
public static void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||
if (info.getDocStoreOffset() != -1) {
|
||||
assert info.getDocStoreSegment() != null;
|
||||
if (!info.getDocStoreIsCompoundFile()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_INDEX_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", FIELDS_EXTENSION));
|
||||
}
|
||||
} else {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_INDEX_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", FIELDS_EXTENSION));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -24,8 +24,6 @@ import org.apache.lucene.index.CorruptIndexException;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexFormatTooNewException;
|
||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
|
@ -35,7 +33,6 @@ import org.apache.lucene.store.IndexInput;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
|
@ -54,11 +51,6 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||
private int numTotalDocs;
|
||||
private int size;
|
||||
private boolean closed;
|
||||
private final int format;
|
||||
|
||||
// The docID offset where our docs begin in the index
|
||||
// file. This will be 0 if we have our own private file.
|
||||
private int docStoreOffset;
|
||||
|
||||
/** Returns a cloned FieldsReader that shares open
|
||||
* IndexInputs with the original one. It is the caller's
|
||||
|
@ -68,41 +60,20 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||
@Override
|
||||
public Lucene40StoredFieldsReader clone() {
|
||||
ensureOpen();
|
||||
return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
|
||||
}
|
||||
|
||||
/** Verifies that the code version which wrote the segment is supported. */
|
||||
public static void checkCodeVersion(Directory dir, String segment) throws IOException {
|
||||
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
|
||||
IndexInput idxStream = dir.openInput(indexStreamFN, IOContext.DEFAULT);
|
||||
|
||||
try {
|
||||
int format = idxStream.readInt();
|
||||
if (format < Lucene40StoredFieldsWriter.FORMAT_MINIMUM)
|
||||
throw new IndexFormatTooOldException(idxStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
|
||||
if (format > Lucene40StoredFieldsWriter.FORMAT_CURRENT)
|
||||
throw new IndexFormatTooNewException(idxStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
|
||||
} finally {
|
||||
idxStream.close();
|
||||
}
|
||||
return new Lucene40StoredFieldsReader(fieldInfos, numTotalDocs, size, (IndexInput)fieldsStream.clone(), (IndexInput)indexStream.clone());
|
||||
}
|
||||
|
||||
// Used only by clone
|
||||
private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
|
||||
IndexInput fieldsStream, IndexInput indexStream) {
|
||||
private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, IndexInput fieldsStream, IndexInput indexStream) {
|
||||
this.fieldInfos = fieldInfos;
|
||||
this.numTotalDocs = numTotalDocs;
|
||||
this.size = size;
|
||||
this.format = format;
|
||||
this.docStoreOffset = docStoreOffset;
|
||||
this.fieldsStream = fieldsStream;
|
||||
this.indexStream = indexStream;
|
||||
}
|
||||
|
||||
public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
|
||||
final String segment = si.getDocStoreSegment();
|
||||
final int docStoreOffset = si.getDocStoreOffset();
|
||||
final int size = si.docCount;
|
||||
final String segment = si.name;
|
||||
boolean success = false;
|
||||
fieldInfos = fn;
|
||||
try {
|
||||
|
@ -110,31 +81,18 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION);
|
||||
indexStream = d.openInput(indexStreamFN, context);
|
||||
|
||||
format = indexStream.readInt();
|
||||
|
||||
if (format < Lucene40StoredFieldsWriter.FORMAT_MINIMUM)
|
||||
throw new IndexFormatTooOldException(indexStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
|
||||
if (format > Lucene40StoredFieldsWriter.FORMAT_CURRENT)
|
||||
throw new IndexFormatTooNewException(indexStream, format, Lucene40StoredFieldsWriter.FORMAT_MINIMUM, Lucene40StoredFieldsWriter.FORMAT_CURRENT);
|
||||
// its a 4.0 codec: so its not too-old, its corrupt.
|
||||
// TODO: change this to CodecUtil.checkHeader
|
||||
if (Lucene40StoredFieldsWriter.FORMAT_CURRENT != indexStream.readInt()) {
|
||||
throw new CorruptIndexException("unexpected fdx header: " + indexStream);
|
||||
}
|
||||
|
||||
final long indexSize = indexStream.length() - FORMAT_SIZE;
|
||||
|
||||
if (docStoreOffset != -1) {
|
||||
// We read only a slice out of this shared fields file
|
||||
this.docStoreOffset = docStoreOffset;
|
||||
this.size = size;
|
||||
|
||||
// Verify the file is long enough to hold all of our
|
||||
// docs
|
||||
assert ((int) (indexSize / 8)) >= size + this.docStoreOffset: "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset;
|
||||
} else {
|
||||
this.docStoreOffset = 0;
|
||||
this.size = (int) (indexSize >> 3);
|
||||
// Verify two sources of "maxDoc" agree:
|
||||
if (this.size != si.docCount) {
|
||||
throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.docCount);
|
||||
}
|
||||
}
|
||||
numTotalDocs = (int) (indexSize >> 3);
|
||||
success = true;
|
||||
} finally {
|
||||
|
@ -176,7 +134,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||
}
|
||||
|
||||
private void seekIndex(int docID) throws IOException {
|
||||
indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
|
||||
indexStream.seek(FORMAT_SIZE + docID * 8L);
|
||||
}
|
||||
|
||||
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
|
||||
|
@ -204,8 +162,6 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||
}
|
||||
}
|
||||
|
||||
static final Charset UTF8 = Charset.forName("UTF-8");
|
||||
|
||||
private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
|
||||
final int numeric = bits & Lucene40StoredFieldsWriter.FIELD_IS_NUMERIC_MASK;
|
||||
if (numeric != 0) {
|
||||
|
@ -232,7 +188,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||
if ((bits & Lucene40StoredFieldsWriter.FIELD_IS_BINARY) != 0) {
|
||||
visitor.binaryField(info, bytes, 0, bytes.length);
|
||||
} else {
|
||||
visitor.stringField(info, new String(bytes, 0, bytes.length, UTF8));
|
||||
visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -269,7 +225,7 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||
int count = 0;
|
||||
while (count < numDocs) {
|
||||
final long offset;
|
||||
final int docID = docStoreOffset + startDocID + count + 1;
|
||||
final int docID = startDocID + count + 1;
|
||||
assert docID <= numTotalDocs;
|
||||
if (docID < numTotalDocs)
|
||||
offset = indexStream.readLong();
|
||||
|
@ -284,18 +240,8 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme
|
|||
return fieldsStream;
|
||||
}
|
||||
|
||||
// TODO: split into PreFlexFieldsReader so it can handle this shared docstore crap?
|
||||
// only preflex segments refer to these?
|
||||
public static void files(SegmentInfo info, Set<String> files) throws IOException {
|
||||
if (info.getDocStoreOffset() != -1) {
|
||||
assert info.getDocStoreSegment() != null;
|
||||
if (!info.getDocStoreIsCompoundFile()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(info.getDocStoreSegment(), "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
|
||||
}
|
||||
} else {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION));
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,10 +55,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
|
|||
// currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
|
||||
// currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
|
||||
|
||||
// Lucene 3.0: Removal of compressed fields
|
||||
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
|
||||
|
||||
// Lucene 3.2: NumericFields are stored in binary format
|
||||
// (Happens to be the same as for now) Lucene 3.2: NumericFields are stored in binary format
|
||||
static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
|
||||
|
||||
// NOTE: if you introduce a new format, make it 1 higher
|
||||
|
@ -67,7 +64,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter {
|
|||
static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
|
||||
|
||||
// when removing support for old versions, leave the last supported version here
|
||||
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
|
||||
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
|
||||
|
||||
/** Extension of stored fields file */
|
||||
public static final String FIELDS_EXTENSION = "fdt";
|
||||
|
|
|
@ -45,10 +45,9 @@ public class PreFlexRWCodec extends Lucene3xCodec {
|
|||
private final FieldInfosFormat fieldInfos = new PreFlexRWFieldInfosFormat();
|
||||
private final TermVectorsFormat termVectors = new PreFlexRWTermVectorsFormat();
|
||||
private final SegmentInfosFormat segmentInfos = new PreFlexRWSegmentInfosFormat();
|
||||
private final StoredFieldsFormat storedFields = new PreFlexRWStoredFieldsFormat();
|
||||
// TODO: this should really be a different impl
|
||||
private final LiveDocsFormat liveDocs = new Lucene40LiveDocsFormat();
|
||||
// TODO: this should really be a different impl
|
||||
private final StoredFieldsFormat storedFields = new Lucene40StoredFieldsFormat();
|
||||
|
||||
@Override
|
||||
public PostingsFormat postingsFormat() {
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
package org.apache.lucene.codecs.preflexrw;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||
import org.apache.lucene.codecs.lucene3x.Lucene3xStoredFieldsFormat;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
|
||||
public class PreFlexRWStoredFieldsFormat extends Lucene3xStoredFieldsFormat {
|
||||
|
||||
@Override
|
||||
public StoredFieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
|
||||
return new PreFlexRWStoredFieldsWriter(directory, segment, context);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,156 @@
|
|||
package org.apache.lucene.codecs.preflexrw;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
* use this file except in compliance with the License. You may obtain a copy of
|
||||
* the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||
import org.apache.lucene.codecs.lucene3x.Lucene3xStoredFieldsReader;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/** @lucene.experimental */
|
||||
public final class PreFlexRWStoredFieldsWriter extends StoredFieldsWriter {
|
||||
private final Directory directory;
|
||||
private final String segment;
|
||||
private IndexOutput fieldsStream;
|
||||
private IndexOutput indexStream;
|
||||
|
||||
public PreFlexRWStoredFieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
|
||||
assert directory != null;
|
||||
this.directory = directory;
|
||||
this.segment = segment;
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), context);
|
||||
indexStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION), context);
|
||||
|
||||
fieldsStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT);
|
||||
indexStream.writeInt(Lucene3xStoredFieldsReader.FORMAT_CURRENT);
|
||||
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Writes the contents of buffer into the fields stream
|
||||
// and adds a new entry for this document into the index
|
||||
// stream. This assumes the buffer was already written
|
||||
// in the correct fields format.
|
||||
public void startDocument(int numStoredFields) throws IOException {
|
||||
indexStream.writeLong(fieldsStream.getFilePointer());
|
||||
fieldsStream.writeVInt(numStoredFields);
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
IOUtils.close(fieldsStream, indexStream);
|
||||
} finally {
|
||||
fieldsStream = indexStream = null;
|
||||
}
|
||||
}
|
||||
|
||||
public void abort() {
|
||||
try {
|
||||
close();
|
||||
} catch (IOException ignored) {}
|
||||
IOUtils.deleteFilesIgnoringExceptions(directory,
|
||||
IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION),
|
||||
IndexFileNames.segmentFileName(segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION));
|
||||
}
|
||||
|
||||
public void writeField(FieldInfo info, IndexableField field) throws IOException {
|
||||
fieldsStream.writeVInt(info.number);
|
||||
int bits = 0;
|
||||
final BytesRef bytes;
|
||||
final String string;
|
||||
// TODO: maybe a field should serialize itself?
|
||||
// this way we don't bake into indexer all these
|
||||
// specific encodings for different fields? and apps
|
||||
// can customize...
|
||||
|
||||
Number number = field.numericValue();
|
||||
if (number != null) {
|
||||
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
|
||||
bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_INT;
|
||||
} else if (number instanceof Long) {
|
||||
bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_LONG;
|
||||
} else if (number instanceof Float) {
|
||||
bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_FLOAT;
|
||||
} else if (number instanceof Double) {
|
||||
bits |= Lucene3xStoredFieldsReader.FIELD_IS_NUMERIC_DOUBLE;
|
||||
} else {
|
||||
throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
|
||||
}
|
||||
string = null;
|
||||
bytes = null;
|
||||
} else {
|
||||
bytes = field.binaryValue();
|
||||
if (bytes != null) {
|
||||
bits |= Lucene3xStoredFieldsReader.FIELD_IS_BINARY;
|
||||
string = null;
|
||||
} else {
|
||||
string = field.stringValue();
|
||||
if (string == null) {
|
||||
throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fieldsStream.writeByte((byte) bits);
|
||||
|
||||
if (bytes != null) {
|
||||
fieldsStream.writeVInt(bytes.length);
|
||||
fieldsStream.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||
} else if (string != null) {
|
||||
fieldsStream.writeString(field.stringValue());
|
||||
} else {
|
||||
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
|
||||
fieldsStream.writeInt(number.intValue());
|
||||
} else if (number instanceof Long) {
|
||||
fieldsStream.writeLong(number.longValue());
|
||||
} else if (number instanceof Float) {
|
||||
fieldsStream.writeInt(Float.floatToIntBits(number.floatValue()));
|
||||
} else if (number instanceof Double) {
|
||||
fieldsStream.writeLong(Double.doubleToLongBits(number.doubleValue()));
|
||||
} else {
|
||||
assert false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(int numDocs) throws IOException {
|
||||
if (4+((long) numDocs)*8 != indexStream.getFilePointer())
|
||||
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
|
||||
// we detect that the bug has struck, here, and
|
||||
// throw an exception to prevent the corruption from
|
||||
// entering the index. See LUCENE-1282 for
|
||||
// details.
|
||||
throw new RuntimeException("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexStream.getFilePointer() + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue