LUCENE-2621: add hooks to codecprovider (for now) for stored fields

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1179284 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-10-05 15:59:34 +00:00
parent 7576b7c2c3
commit 16095b4452
11 changed files with 163 additions and 43 deletions

View File

@ -26,6 +26,7 @@ import java.text.NumberFormat;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.search.similarities.SimilarityProvider;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FlushInfo;
@ -151,6 +152,7 @@ public class DocumentsWriterPerThread {
}
private final static boolean INFO_VERBOSE = false;
final DocumentsWriter parent;
final CodecProvider codecProvider;
final IndexWriter writer;
final Directory directory;
final DocState docState;
@ -181,6 +183,7 @@ public class DocumentsWriterPerThread {
this.fieldInfos = fieldInfos;
this.writer = parent.indexWriter;
this.infoStream = parent.infoStream;
this.codecProvider = this.writer.codecs;
this.docState = new DocState(this);
this.docState.similarityProvider = parent.indexWriter.getConfig()
.getSimilarityProvider();

View File

@ -22,6 +22,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.FieldsReader;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
@ -164,7 +165,7 @@ final class SegmentCoreReaders {
}
final String storesSegment = si.getDocStoreSegment();
fieldsReaderOrig = new FieldsReader(storeDir, storesSegment, fieldInfos, context,
fieldsReaderOrig = si.getSegmentCodecs().provider.fieldsReader(storeDir, storesSegment, fieldInfos, context,
si.getDocStoreOffset(), si.docCount);
// Verify two sources of "maxDoc" agree:

View File

@ -29,6 +29,8 @@ import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsReader;
import org.apache.lucene.index.codecs.FieldsWriter;
import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.codecs.PerDocValues;
@ -257,7 +259,7 @@ final class SegmentMerger {
int docCount = 0;
setMatchingSegmentReaders();
final FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, context);
final FieldsWriter fieldsWriter = codecInfo.provider.fieldsWriter(directory, segment, context);
try {
int idx = 0;
for (MergeState.IndexReaderAndLiveDocs reader : readers) {

View File

@ -30,6 +30,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.FieldsReader;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BitVector;
@ -76,7 +77,7 @@ public class SegmentReader extends IndexReader implements Cloneable {
private class FieldsReaderLocal extends CloseableThreadLocal<FieldsReader> {
@Override
protected FieldsReader initialValue() {
return (FieldsReader) core.getFieldsReaderOrig().clone();
return core.getFieldsReaderOrig().clone();
}
}

View File

@ -19,6 +19,8 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.FieldsWriter;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@ -33,10 +35,12 @@ final class StoredFieldsWriter {
int freeCount;
final DocumentsWriterPerThread.DocState docState;
final CodecProvider codecProvider;
public StoredFieldsWriter(DocumentsWriterPerThread docWriter) {
this.docWriter = docWriter;
this.docState = docWriter.docState;
this.codecProvider = docWriter.codecProvider;
}
private int numStoredFields;
@ -77,7 +81,7 @@ final class StoredFieldsWriter {
private synchronized void initFieldsWriter(IOContext context) throws IOException {
if (fieldsWriter == null) {
fieldsWriter = new FieldsWriter(docWriter.directory, docWriter.getSegment(), context);
fieldsWriter = codecProvider.fieldsWriter(docWriter.directory, docWriter.getSegment(), context);
lastDocID = 0;
}
}

View File

@ -17,12 +17,17 @@ package org.apache.lucene.index.codecs;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/** Holds a set of codecs, keyed by name. You subclass
* this, instantiate it, and register your codecs, then
* pass this instance to IndexReader/IndexWriter (via
@ -105,6 +110,16 @@ public class CodecProvider {
public SegmentInfosReader getSegmentInfosReader() {
return infosReader;
}
/** expert */
public FieldsReader fieldsReader(Directory directory, String segment, FieldInfos fn, IOContext context, int docStoreOffset, int size) throws IOException {
return new DefaultFieldsReader(directory, segment, fn, context, docStoreOffset, size);
}
/** expert */
public FieldsWriter fieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
return new DefaultFieldsWriter(directory, segment, context);
}
static private CodecProvider defaultCodecs = new CoreCodecProvider();

View File

@ -1,4 +1,4 @@
package org.apache.lucene.index;
package org.apache.lucene.index.codecs;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -19,6 +19,14 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldReaderException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@ -35,7 +43,7 @@ import java.io.Closeable;
*
* @lucene.internal
*/
public final class FieldsReader implements Cloneable, Closeable {
public final class DefaultFieldsReader extends FieldsReader implements Cloneable, Closeable {
private final static int FORMAT_SIZE = 4;
private final FieldInfos fieldInfos;
@ -67,9 +75,9 @@ public final class FieldsReader implements Cloneable, Closeable {
* clones are called (eg, currently SegmentReader manages
* this logic). */
@Override
public Object clone() {
public DefaultFieldsReader clone() {
ensureOpen();
return new FieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
return new DefaultFieldsReader(fieldInfos, numTotalDocs, size, format, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
}
/** Verifies that the code version which wrote the segment is supported. */
@ -79,10 +87,10 @@ public final class FieldsReader implements Cloneable, Closeable {
try {
int format = idxStream.readInt();
if (format < FieldsWriter.FORMAT_MINIMUM)
throw new IndexFormatTooOldException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
if (format > FieldsWriter.FORMAT_CURRENT)
throw new IndexFormatTooNewException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
if (format < DefaultFieldsWriter.FORMAT_MINIMUM)
throw new IndexFormatTooOldException(indexStreamFN, format, DefaultFieldsWriter.FORMAT_MINIMUM, DefaultFieldsWriter.FORMAT_CURRENT);
if (format > DefaultFieldsWriter.FORMAT_CURRENT)
throw new IndexFormatTooNewException(indexStreamFN, format, DefaultFieldsWriter.FORMAT_MINIMUM, DefaultFieldsWriter.FORMAT_CURRENT);
} finally {
idxStream.close();
}
@ -90,7 +98,7 @@ public final class FieldsReader implements Cloneable, Closeable {
}
// Used only by clone
private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
private DefaultFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int docStoreOffset,
IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream) {
this.fieldInfos = fieldInfos;
this.numTotalDocs = numTotalDocs;
@ -103,11 +111,11 @@ public final class FieldsReader implements Cloneable, Closeable {
indexStream = (IndexInput) cloneableIndexStream.clone();
}
public FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
public DefaultFieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
this(d, segment, fn, IOContext.DEFAULT, -1, 0);
}
public FieldsReader(Directory d, String segment, FieldInfos fn, IOContext context, int docStoreOffset, int size) throws IOException {
public DefaultFieldsReader(Directory d, String segment, FieldInfos fn, IOContext context, int docStoreOffset, int size) throws IOException {
boolean success = false;
isOriginal = true;
try {
@ -119,10 +127,10 @@ public final class FieldsReader implements Cloneable, Closeable {
format = cloneableIndexStream.readInt();
if (format < FieldsWriter.FORMAT_MINIMUM)
throw new IndexFormatTooOldException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
if (format > FieldsWriter.FORMAT_CURRENT)
throw new IndexFormatTooNewException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
if (format < DefaultFieldsWriter.FORMAT_MINIMUM)
throw new IndexFormatTooOldException(indexStreamFN, format, DefaultFieldsWriter.FORMAT_MINIMUM, DefaultFieldsWriter.FORMAT_CURRENT);
if (format > DefaultFieldsWriter.FORMAT_CURRENT)
throw new IndexFormatTooNewException(indexStreamFN, format, DefaultFieldsWriter.FORMAT_MINIMUM, DefaultFieldsWriter.FORMAT_CURRENT);
fieldsStream = (IndexInput) cloneableFieldsStream.clone();
@ -200,10 +208,10 @@ public final class FieldsReader implements Cloneable, Closeable {
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
int bits = fieldsStream.readByte() & 0xFF;
assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
assert bits <= (DefaultFieldsWriter.FIELD_IS_NUMERIC_MASK | DefaultFieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
final boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
final int numeric = bits & FieldsWriter.FIELD_IS_NUMERIC_MASK;
final boolean binary = (bits & DefaultFieldsWriter.FIELD_IS_BINARY) != 0;
final int numeric = bits & DefaultFieldsWriter.FIELD_IS_NUMERIC_MASK;
final boolean doStop;
if (binary) {
@ -211,16 +219,16 @@ public final class FieldsReader implements Cloneable, Closeable {
doStop = visitor.binaryField(fieldInfo, fieldsStream, numBytes);
} else if (numeric != 0) {
switch(numeric) {
case FieldsWriter.FIELD_IS_NUMERIC_INT:
case DefaultFieldsWriter.FIELD_IS_NUMERIC_INT:
doStop = visitor.intField(fieldInfo, fieldsStream.readInt());
break;
case FieldsWriter.FIELD_IS_NUMERIC_LONG:
case DefaultFieldsWriter.FIELD_IS_NUMERIC_LONG:
doStop = visitor.longField(fieldInfo, fieldsStream.readLong());
break;
case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
case DefaultFieldsWriter.FIELD_IS_NUMERIC_FLOAT:
doStop = visitor.floatField(fieldInfo, Float.intBitsToFloat(fieldsStream.readInt()));
break;
case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
case DefaultFieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
doStop = visitor.doubleField(fieldInfo, Double.longBitsToDouble(fieldsStream.readLong()));
break;
default:
@ -274,12 +282,12 @@ public final class FieldsReader implements Cloneable, Closeable {
case 0:
numBytes = fieldsStream.readVInt();
break;
case FieldsWriter.FIELD_IS_NUMERIC_INT:
case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
case DefaultFieldsWriter.FIELD_IS_NUMERIC_INT:
case DefaultFieldsWriter.FIELD_IS_NUMERIC_FLOAT:
numBytes = 4;
break;
case FieldsWriter.FIELD_IS_NUMERIC_LONG:
case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
case DefaultFieldsWriter.FIELD_IS_NUMERIC_LONG:
case DefaultFieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
numBytes = 8;
break;
default:

View File

@ -1,4 +1,4 @@
package org.apache.lucene.index;
package org.apache.lucene.index.codecs;
/**
* Copyright 2004 The Apache Software Foundation
@ -18,6 +18,9 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -25,7 +28,8 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
final class FieldsWriter {
/** @lucene.experimental */
public final class DefaultFieldsWriter extends FieldsWriter {
// NOTE: bit 0 is free here! You can steal it!
static final int FIELD_IS_BINARY = 1 << 1;
@ -63,7 +67,7 @@ final class FieldsWriter {
private IndexOutput fieldsStream;
private IndexOutput indexStream;
FieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
DefaultFieldsWriter(Directory directory, String segment, IOContext context) throws IOException {
this.directory = directory;
this.segment = segment;
@ -83,7 +87,7 @@ final class FieldsWriter {
}
}
FieldsWriter(IndexOutput fdx, IndexOutput fdt) {
DefaultFieldsWriter(IndexOutput fdx, IndexOutput fdt) {
directory = null;
segment = null;
fieldsStream = fdt;
@ -98,17 +102,17 @@ final class FieldsWriter {
// and adds a new entry for this document into the index
// stream. This assumes the buffer was already written
// in the correct fields format.
void startDocument(int numStoredFields) throws IOException {
public void startDocument(int numStoredFields) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
fieldsStream.writeVInt(numStoredFields);
}
void skipDocument() throws IOException {
public void skipDocument() throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
fieldsStream.writeVInt(0);
}
void close() throws IOException {
public void close() throws IOException {
if (directory != null) {
try {
IOUtils.close(fieldsStream, indexStream);
@ -118,7 +122,7 @@ final class FieldsWriter {
}
}
void abort() {
public void abort() {
if (directory != null) {
try {
close();
@ -135,7 +139,7 @@ final class FieldsWriter {
}
}
final void writeField(int fieldNumber, IndexableField field) throws IOException {
public final void writeField(int fieldNumber, IndexableField field) throws IOException {
fieldsStream.writeVInt(fieldNumber);
int bits = 0;
final BytesRef bytes;
@ -201,7 +205,7 @@ final class FieldsWriter {
* document. The stream IndexInput is the
* fieldsStream from which we should bulk-copy all
* bytes. */
final void addRawDocuments(IndexInput stream, int[] lengths, int numDocs) throws IOException {
public final void addRawDocuments(IndexInput stream, int[] lengths, int numDocs) throws IOException {
long position = fieldsStream.getFilePointer();
long start = position;
for(int i=0;i<numDocs;i++) {
@ -212,7 +216,7 @@ final class FieldsWriter {
assert fieldsStream.getFilePointer() == position;
}
final void addDocument(Iterable<? extends IndexableField> doc, FieldInfos fieldInfos) throws IOException {
public final void addDocument(Iterable<? extends IndexableField> doc, FieldInfos fieldInfos) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
int storedCount = 0;

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index.codecs;
import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldsReader;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.IndexFormatTooNewException;
@ -79,7 +78,7 @@ public class DefaultSegmentInfosReader extends SegmentInfosReader {
}
try {
FieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
DefaultFieldsReader.checkCodeVersion(dir, si.getDocStoreSegment());
} finally {
// If we opened the directory, close it
if (dir != directory) dir.close();

View File

@ -0,0 +1,39 @@
package org.apache.lucene.index.codecs;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.IndexInput;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
public abstract class FieldsReader implements Cloneable, Closeable {
public abstract void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException;
/** Returns the length in bytes of each raw document in a
* contiguous range of length numDocs starting with
* startDocID. Returns the IndexInput (the fieldStream),
* already seeked to the starting point for startDocID.*/
public abstract IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException;
public abstract int size();
public abstract FieldsReader clone();
}

View File

@ -0,0 +1,44 @@
package org.apache.lucene.index.codecs;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.store.IndexInput;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
public abstract class FieldsWriter implements Closeable {
public abstract void addDocument(Iterable<? extends IndexableField> doc, FieldInfos fieldInfos) throws IOException;
/** Bulk write a contiguous series of documents. The
* lengths array is the length (in bytes) of each raw
* document. The stream IndexInput is the
* fieldsStream from which we should bulk-copy all
* bytes. */
public abstract void addRawDocuments(IndexInput stream, int[] lengths, int numDocs) throws IOException;
public abstract void startDocument(int numStoredFields) throws IOException;
public abstract void skipDocument() throws IOException;
public abstract void writeField(int fieldNumber, IndexableField field) throws IOException;
public abstract void abort();
}