mirror of https://github.com/apache/lucene.git
LUCENE-4558: Make CompressingStoredFieldsFormat more flexible.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1411262 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c7fb9535ee
commit
4231ed4ca2
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.compressing;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||||
|
@ -29,6 +30,7 @@ import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A {@link StoredFieldsFormat} that is very similar to
|
* A {@link StoredFieldsFormat} that is very similar to
|
||||||
* {@link Lucene40StoredFieldsFormat} but compresses documents in chunks in
|
* {@link Lucene40StoredFieldsFormat} but compresses documents in chunks in
|
||||||
|
@ -45,16 +47,23 @@ import org.apache.lucene.store.IOContext;
|
||||||
*/
|
*/
|
||||||
public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
|
public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
|
||||||
|
|
||||||
|
private final String formatName;
|
||||||
private final CompressionMode compressionMode;
|
private final CompressionMode compressionMode;
|
||||||
private final int chunkSize;
|
private final int chunkSize;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new {@link CompressingStoredFieldsFormat}.
|
* Create a new {@link CompressingStoredFieldsFormat}.
|
||||||
* <p>
|
* <p>
|
||||||
|
* <code>formatName</code> is the name of the format. This name will be used
|
||||||
|
* in the file formats to perform
|
||||||
|
* {@link CodecUtil#checkHeader(org.apache.lucene.store.DataInput, String, int, int) codec header checks}.
|
||||||
|
* <p>
|
||||||
* The <code>compressionMode</code> parameter allows you to choose between
|
* The <code>compressionMode</code> parameter allows you to choose between
|
||||||
* compression algorithms that have various compression and decompression
|
* compression algorithms that have various compression and decompression
|
||||||
* speeds so that you can pick the one that best fits your indexing and
|
* speeds so that you can pick the one that best fits your indexing and
|
||||||
* searching throughput.
|
* searching throughput. You should never instantiate two
|
||||||
|
* {@link CompressingStoredFieldsFormat}s that have the same name but
|
||||||
|
* different {@link CompressionMode}s.
|
||||||
* <p>
|
* <p>
|
||||||
* <code>chunkSize</code> is the minimum byte size of a chunk of documents.
|
* <code>chunkSize</code> is the minimum byte size of a chunk of documents.
|
||||||
* A value of <code>1</code> can make sense if there is redundancy across
|
* A value of <code>1</code> can make sense if there is redundancy across
|
||||||
|
@ -67,11 +76,13 @@ public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
|
||||||
* loading a little slower (depending on the size of your OS cache compared
|
* loading a little slower (depending on the size of your OS cache compared
|
||||||
* to the size of your index).
|
* to the size of your index).
|
||||||
*
|
*
|
||||||
|
* @param formatName the name of the {@link StoredFieldsFormat}
|
||||||
* @param compressionMode the {@link CompressionMode} to use
|
* @param compressionMode the {@link CompressionMode} to use
|
||||||
* @param chunkSize the minimum number of bytes of a single chunk of stored documents
|
* @param chunkSize the minimum number of bytes of a single chunk of stored documents
|
||||||
* @see CompressionMode
|
* @see CompressionMode
|
||||||
*/
|
*/
|
||||||
public CompressingStoredFieldsFormat(CompressionMode compressionMode, int chunkSize) {
|
public CompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode, int chunkSize) {
|
||||||
|
this.formatName = formatName;
|
||||||
this.compressionMode = compressionMode;
|
this.compressionMode = compressionMode;
|
||||||
if (chunkSize < 1) {
|
if (chunkSize < 1) {
|
||||||
throw new IllegalArgumentException("chunkSize must be >= 1");
|
throw new IllegalArgumentException("chunkSize must be >= 1");
|
||||||
|
@ -79,27 +90,17 @@ public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
|
||||||
this.chunkSize = chunkSize;
|
this.chunkSize = chunkSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new {@link CompressingStoredFieldsFormat} with
|
|
||||||
* {@link CompressionMode#FAST} compression and chunks of <tt>16 KB</tt>.
|
|
||||||
*
|
|
||||||
* @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(CompressionMode, int)
|
|
||||||
*/
|
|
||||||
public CompressingStoredFieldsFormat() {
|
|
||||||
this(CompressionMode.FAST, 1 << 14);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si,
|
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si,
|
||||||
FieldInfos fn, IOContext context) throws IOException {
|
FieldInfos fn, IOContext context) throws IOException {
|
||||||
return new CompressingStoredFieldsReader(directory, si, fn, context);
|
return new CompressingStoredFieldsReader(directory, si, fn, context, formatName, compressionMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si,
|
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si,
|
||||||
IOContext context) throws IOException {
|
IOContext context) throws IOException {
|
||||||
return new CompressingStoredFieldsWriter(directory, si, context,
|
return new CompressingStoredFieldsWriter(directory, si, context,
|
||||||
compressionMode, chunkSize);
|
formatName, compressionMode, chunkSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -18,10 +18,8 @@ package org.apache.lucene.codecs.compressing;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.BYTE_ARR;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.BYTE_ARR;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.CODEC_NAME_DAT;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.CODEC_SFX_DAT;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.CODEC_NAME_IDX;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.CODEC_SFX_IDX;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.HEADER_LENGTH_DAT;
|
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.HEADER_LENGTH_IDX;
|
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_DOUBLE;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_DOUBLE;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_FLOAT;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_FLOAT;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_INT;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_INT;
|
||||||
|
@ -81,7 +79,9 @@ final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
||||||
this.closed = false;
|
this.closed = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public CompressingStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
|
public CompressingStoredFieldsReader( Directory d, SegmentInfo si, FieldInfos fn,
|
||||||
|
IOContext context, String formatName, CompressionMode compressionMode) throws IOException {
|
||||||
|
this.compressionMode = compressionMode;
|
||||||
final String segment = si.name;
|
final String segment = si.name;
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
fieldInfos = fn;
|
fieldInfos = fn;
|
||||||
|
@ -92,17 +92,17 @@ final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
||||||
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
|
final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION);
|
||||||
indexStream = d.openInput(indexStreamFN, context);
|
indexStream = d.openInput(indexStreamFN, context);
|
||||||
|
|
||||||
CodecUtil.checkHeader(indexStream, CODEC_NAME_IDX, VERSION_START, VERSION_CURRENT);
|
final String codecNameIdx = formatName + CODEC_SFX_IDX;
|
||||||
CodecUtil.checkHeader(fieldsStream, CODEC_NAME_DAT, VERSION_START, VERSION_CURRENT);
|
final String codecNameDat = formatName + CODEC_SFX_DAT;
|
||||||
assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
|
CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
|
||||||
assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
|
CodecUtil.checkHeader(fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
|
||||||
|
assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();
|
||||||
|
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
|
||||||
|
|
||||||
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
|
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
|
||||||
indexStream = null;
|
indexStream = null;
|
||||||
|
|
||||||
packedIntsVersion = fieldsStream.readVInt();
|
packedIntsVersion = fieldsStream.readVInt();
|
||||||
final int compressionModeId = fieldsStream.readVInt();
|
|
||||||
compressionMode = CompressionMode.byId(compressionModeId);
|
|
||||||
decompressor = compressionMode.newDecompressor();
|
decompressor = compressionMode.newDecompressor();
|
||||||
this.bytes = new BytesRef();
|
this.bytes = new BytesRef();
|
||||||
|
|
||||||
|
|
|
@ -59,12 +59,10 @@ final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
static final int TYPE_BITS = PackedInts.bitsRequired(NUMERIC_DOUBLE);
|
static final int TYPE_BITS = PackedInts.bitsRequired(NUMERIC_DOUBLE);
|
||||||
static final int TYPE_MASK = (int) PackedInts.maxValue(TYPE_BITS);
|
static final int TYPE_MASK = (int) PackedInts.maxValue(TYPE_BITS);
|
||||||
|
|
||||||
static final String CODEC_NAME_IDX = "CompressingStoredFieldsIndex";
|
static final String CODEC_SFX_IDX = "Index";
|
||||||
static final String CODEC_NAME_DAT = "CompressingStoredFieldsData";
|
static final String CODEC_SFX_DAT = "Data";
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
static final long HEADER_LENGTH_IDX = CodecUtil.headerLength(CODEC_NAME_IDX);
|
|
||||||
static final long HEADER_LENGTH_DAT = CodecUtil.headerLength(CODEC_NAME_DAT);
|
|
||||||
|
|
||||||
private final Directory directory;
|
private final Directory directory;
|
||||||
private final String segment;
|
private final String segment;
|
||||||
|
@ -81,8 +79,8 @@ final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
private int docBase; // doc ID at the beginning of the chunk
|
private int docBase; // doc ID at the beginning of the chunk
|
||||||
private int numBufferedDocs; // docBase + numBufferedDocs == current doc ID
|
private int numBufferedDocs; // docBase + numBufferedDocs == current doc ID
|
||||||
|
|
||||||
public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si,
|
public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, IOContext context,
|
||||||
IOContext context, CompressionMode compressionMode, int chunkSize) throws IOException {
|
String formatName, CompressionMode compressionMode, int chunkSize) throws IOException {
|
||||||
assert directory != null;
|
assert directory != null;
|
||||||
this.directory = directory;
|
this.directory = directory;
|
||||||
this.segment = si.name;
|
this.segment = si.name;
|
||||||
|
@ -100,16 +98,17 @@ final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
try {
|
try {
|
||||||
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
|
fieldsStream = directory.createOutput(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION), context);
|
||||||
|
|
||||||
CodecUtil.writeHeader(indexStream, CODEC_NAME_IDX, VERSION_CURRENT);
|
final String codecNameIdx = formatName + CODEC_SFX_IDX;
|
||||||
CodecUtil.writeHeader(fieldsStream, CODEC_NAME_DAT, VERSION_CURRENT);
|
final String codecNameDat = formatName + CODEC_SFX_DAT;
|
||||||
assert HEADER_LENGTH_IDX == indexStream.getFilePointer();
|
CodecUtil.writeHeader(indexStream, codecNameIdx, VERSION_CURRENT);
|
||||||
assert HEADER_LENGTH_DAT == fieldsStream.getFilePointer();
|
CodecUtil.writeHeader(fieldsStream, codecNameDat, VERSION_CURRENT);
|
||||||
|
assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();
|
||||||
|
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
|
||||||
|
|
||||||
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
|
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
|
||||||
indexStream = null;
|
indexStream = null;
|
||||||
|
|
||||||
fieldsStream.writeVInt(PackedInts.VERSION_CURRENT);
|
fieldsStream.writeVInt(PackedInts.VERSION_CURRENT);
|
||||||
fieldsStream.writeVInt(compressionMode.getId());
|
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -33,7 +33,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
* decompression of stored fields.
|
* decompression of stored fields.
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public enum CompressionMode {
|
public abstract class CompressionMode {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A compression mode that trades compression ratio for speed. Although the
|
* A compression mode that trades compression ratio for speed. Although the
|
||||||
|
@ -41,19 +41,24 @@ public enum CompressionMode {
|
||||||
* very fast. Use this mode with indices that have a high update rate but
|
* very fast. Use this mode with indices that have a high update rate but
|
||||||
* should be able to load documents from disk quickly.
|
* should be able to load documents from disk quickly.
|
||||||
*/
|
*/
|
||||||
FAST(0) {
|
public static final CompressionMode FAST = new CompressionMode() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Compressor newCompressor() {
|
public Compressor newCompressor() {
|
||||||
return LZ4_FAST_COMPRESSOR;
|
return LZ4_FAST_COMPRESSOR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Decompressor newDecompressor() {
|
public Decompressor newDecompressor() {
|
||||||
return LZ4_DECOMPRESSOR;
|
return LZ4_DECOMPRESSOR;
|
||||||
}
|
}
|
||||||
|
|
||||||
},
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "FAST";
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A compression mode that trades speed for compression ratio. Although
|
* A compression mode that trades speed for compression ratio. Although
|
||||||
|
@ -61,19 +66,24 @@ public enum CompressionMode {
|
||||||
* provide a good compression ratio. This mode might be interesting if/when
|
* provide a good compression ratio. This mode might be interesting if/when
|
||||||
* your index size is much bigger than your OS cache.
|
* your index size is much bigger than your OS cache.
|
||||||
*/
|
*/
|
||||||
HIGH_COMPRESSION(1) {
|
public static final CompressionMode HIGH_COMPRESSION = new CompressionMode() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Compressor newCompressor() {
|
public Compressor newCompressor() {
|
||||||
return new DeflateCompressor(Deflater.BEST_COMPRESSION);
|
return new DeflateCompressor(Deflater.BEST_COMPRESSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Decompressor newDecompressor() {
|
public Decompressor newDecompressor() {
|
||||||
return new DeflateDecompressor();
|
return new DeflateDecompressor();
|
||||||
}
|
}
|
||||||
|
|
||||||
},
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "HIGH_COMPRESSION";
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This compression mode is similar to {@link #FAST} but it spends more time
|
* This compression mode is similar to {@link #FAST} but it spends more time
|
||||||
|
@ -81,55 +91,37 @@ public enum CompressionMode {
|
||||||
* mode is best used with indices that have a low update rate but should be
|
* mode is best used with indices that have a low update rate but should be
|
||||||
* able to load documents from disk quickly.
|
* able to load documents from disk quickly.
|
||||||
*/
|
*/
|
||||||
FAST_DECOMPRESSION(2) {
|
public static final CompressionMode FAST_DECOMPRESSION = new CompressionMode() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Compressor newCompressor() {
|
public Compressor newCompressor() {
|
||||||
return LZ4_HIGH_COMPRESSOR;
|
return LZ4_HIGH_COMPRESSOR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
Decompressor newDecompressor() {
|
public Decompressor newDecompressor() {
|
||||||
return LZ4_DECOMPRESSOR;
|
return LZ4_DECOMPRESSOR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "FAST_DECOMPRESSION";
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Get a {@link CompressionMode} according to its id. */
|
/** Sole constructor. */
|
||||||
public static CompressionMode byId(int id) {
|
protected CompressionMode() {}
|
||||||
for (CompressionMode mode : CompressionMode.values()) {
|
|
||||||
if (mode.getId() == id) {
|
|
||||||
return mode;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw new IllegalArgumentException("Unknown id: " + id);
|
|
||||||
}
|
|
||||||
|
|
||||||
private final int id;
|
|
||||||
|
|
||||||
private CompressionMode(int id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns an ID for this compression mode. Should be unique across
|
|
||||||
* {@link CompressionMode}s as it is used for serialization and
|
|
||||||
* unserialization.
|
|
||||||
*/
|
|
||||||
public final int getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new {@link Compressor} instance.
|
* Create a new {@link Compressor} instance.
|
||||||
*/
|
*/
|
||||||
abstract Compressor newCompressor();
|
public abstract Compressor newCompressor();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new {@link Decompressor} instance.
|
* Create a new {@link Decompressor} instance.
|
||||||
*/
|
*/
|
||||||
abstract Decompressor newDecompressor();
|
public abstract Decompressor newDecompressor();
|
||||||
|
|
||||||
|
|
||||||
private static final Decompressor LZ4_DECOMPRESSOR = new Decompressor() {
|
private static final Decompressor LZ4_DECOMPRESSOR = new Decompressor() {
|
||||||
|
|
||||||
|
@ -264,6 +256,7 @@ public enum CompressionMode {
|
||||||
|
|
||||||
if (compressor.needsInput()) {
|
if (compressor.needsInput()) {
|
||||||
// no output
|
// no output
|
||||||
|
assert len == 0 : len;
|
||||||
out.writeVInt(0);
|
out.writeVInt(0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,6 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* <li>FieldData (.fdt) --> <Header>, PackedIntsVersion, CompressionFormat, <Chunk><sup>ChunkCount</sup></li>
|
* <li>FieldData (.fdt) --> <Header>, PackedIntsVersion, CompressionFormat, <Chunk><sup>ChunkCount</sup></li>
|
||||||
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
* <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
|
||||||
* <li>PackedIntsVersion --> {@link PackedInts#VERSION_CURRENT} as a {@link DataOutput#writeVInt VInt}</li>
|
* <li>PackedIntsVersion --> {@link PackedInts#VERSION_CURRENT} as a {@link DataOutput#writeVInt VInt}</li>
|
||||||
* <li>CompressionFormat --> always <tt>0</tt> as a {@link DataOutput#writeVInt VInt}, this may allow for different compression formats in the future</li>
|
|
||||||
* <li>ChunkCount is not known in advance and is the number of chunks necessary to store all document of the segment</li>
|
* <li>ChunkCount is not known in advance and is the number of chunks necessary to store all document of the segment</li>
|
||||||
* <li>Chunk --> DocBase, ChunkDocs, DocFieldCounts, DocLengths, <CompressedDocs></li>
|
* <li>Chunk --> DocBase, ChunkDocs, DocFieldCounts, DocLengths, <CompressedDocs></li>
|
||||||
* <li>DocBase --> the ID of the first document of the chunk as a {@link DataOutput#writeVInt VInt}</li>
|
* <li>DocBase --> the ID of the first document of the chunk as a {@link DataOutput#writeVInt VInt}</li>
|
||||||
|
@ -147,7 +146,7 @@ public final class Lucene41StoredFieldsFormat extends CompressingStoredFieldsFor
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene41StoredFieldsFormat() {
|
public Lucene41StoredFieldsFormat() {
|
||||||
super(CompressionMode.FAST, 1 << 14);
|
super("Lucene41StoredFields", CompressionMode.FAST, 1 << 14);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,35 +24,40 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A codec that uses {@link CompressingStoredFieldsFormat} for its stored
|
* A codec that uses {@link CompressingStoredFieldsFormat} for its stored
|
||||||
* fields and delegates to {@link Lucene41Codec} for everything else.
|
* fields and delegates to {@link Lucene41Codec} for everything else.
|
||||||
*/
|
*/
|
||||||
public class CompressingCodec extends FilterCodec {
|
public abstract class CompressingCodec extends FilterCodec {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a random instance.
|
* Create a random instance.
|
||||||
*/
|
*/
|
||||||
|
public static CompressingCodec randomInstance(Random random, int chunkSize) {
|
||||||
|
switch (random.nextInt(4)) {
|
||||||
|
case 0:
|
||||||
|
return new FastCompressingCodec(chunkSize);
|
||||||
|
case 1:
|
||||||
|
return new FastDecompressionCompressingCodec(chunkSize);
|
||||||
|
case 2:
|
||||||
|
return new HighCompressionCompressingCodec(chunkSize);
|
||||||
|
case 3:
|
||||||
|
return new DummyCompressingCodec(chunkSize);
|
||||||
|
default:
|
||||||
|
throw new AssertionError();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static CompressingCodec randomInstance(Random random) {
|
public static CompressingCodec randomInstance(Random random) {
|
||||||
final CompressionMode mode = RandomPicks.randomFrom(random, CompressionMode.values());
|
return randomInstance(random, RandomInts.randomIntBetween(random, 1, 500));
|
||||||
final int chunkSize = RandomInts.randomIntBetween(random, 1, 500);
|
|
||||||
return new CompressingCodec(mode, chunkSize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private final CompressingStoredFieldsFormat storedFieldsFormat;
|
private final CompressingStoredFieldsFormat storedFieldsFormat;
|
||||||
|
|
||||||
/**
|
public CompressingCodec(String name, CompressionMode compressionMode, int chunkSize) {
|
||||||
* @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(CompressionMode, int)
|
super(name, new Lucene41Codec());
|
||||||
*/
|
this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, compressionMode, chunkSize);
|
||||||
public CompressingCodec(CompressionMode compressionMode, int chunkSize) {
|
|
||||||
super("Compressing", new Lucene41Codec());
|
|
||||||
this.storedFieldsFormat = new CompressingStoredFieldsFormat(compressionMode, chunkSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
public CompressingCodec() {
|
|
||||||
this(CompressionMode.FAST, 1 << 14);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,94 @@
|
||||||
|
package org.apache.lucene.codecs.compressing;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
/** CompressionCodec that does not compress data, useful for testing. */
|
||||||
|
public class DummyCompressingCodec extends CompressingCodec {
|
||||||
|
|
||||||
|
public static final CompressionMode DUMMY = new CompressionMode() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Compressor newCompressor() {
|
||||||
|
return DUMMY_COMPRESSOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Decompressor newDecompressor() {
|
||||||
|
return DUMMY_DECOMPRESSOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "DUMMY";
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
private static final Decompressor DUMMY_DECOMPRESSOR = new Decompressor() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void decompress(DataInput in, int originalLength,
|
||||||
|
int offset, int length, BytesRef bytes) throws IOException {
|
||||||
|
assert offset + length <= originalLength;
|
||||||
|
if (bytes.bytes.length < originalLength) {
|
||||||
|
bytes.bytes = new byte[ArrayUtil.oversize(originalLength, 1)];
|
||||||
|
}
|
||||||
|
in.readBytes(bytes.bytes, 0, offset + length);
|
||||||
|
bytes.offset = offset;
|
||||||
|
bytes.length = length;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copyCompressedData(DataInput in, int originalLength, DataOutput out) throws IOException {
|
||||||
|
out.copyBytes(in, originalLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Decompressor clone() {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
private static final Compressor DUMMY_COMPRESSOR = new Compressor() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
|
||||||
|
out.writeBytes(bytes, off, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Constructor that allows to configure the chunk size. */
|
||||||
|
public DummyCompressingCodec(int chunkSize) {
|
||||||
|
super("DummyCompressingStoredFields", DUMMY, chunkSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Default constructor. */
|
||||||
|
public DummyCompressingCodec() {
|
||||||
|
this(1 << 14);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
package org.apache.lucene.codecs.compressing;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** CompressionCodec that uses {@link CompressionMode#FAST} */
|
||||||
|
public class FastCompressingCodec extends CompressingCodec {
|
||||||
|
|
||||||
|
/** Constructor that allows to configure the chunk size. */
|
||||||
|
public FastCompressingCodec(int chunkSize) {
|
||||||
|
super("FastCompressingStoredFields", CompressionMode.FAST, chunkSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Default constructor. */
|
||||||
|
public FastCompressingCodec() {
|
||||||
|
this(1 << 14);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
package org.apache.lucene.codecs.compressing;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** CompressionCodec that uses {@link CompressionMode#FAST_DECOMPRESSION} */
|
||||||
|
public class FastDecompressionCompressingCodec extends CompressingCodec {
|
||||||
|
|
||||||
|
/** Constructor that allows to configure the chunk size. */
|
||||||
|
public FastDecompressionCompressingCodec(int chunkSize) {
|
||||||
|
super("FastDecompressionCompressingStoredFields", CompressionMode.FAST_DECOMPRESSION, chunkSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Default constructor. */
|
||||||
|
public FastDecompressionCompressingCodec() {
|
||||||
|
this(1 << 14);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
package org.apache.lucene.codecs.compressing;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** CompressionCodec that uses {@link CompressionMode#HIGH_COMPRESSION} */
|
||||||
|
public class HighCompressionCompressingCodec extends CompressingCodec {
|
||||||
|
|
||||||
|
public HighCompressionCompressingCodec(int chunkSize) {
|
||||||
|
super("HighCompressionCompressingStoredFields", CompressionMode.HIGH_COMPRESSION, chunkSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
public HighCompressionCompressingCodec() {
|
||||||
|
this(1 << 14);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -14,4 +14,7 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
org.apache.lucene.codecs.asserting.AssertingCodec
|
org.apache.lucene.codecs.asserting.AssertingCodec
|
||||||
org.apache.lucene.codecs.compressing.CompressingCodec
|
org.apache.lucene.codecs.compressing.FastCompressingCodec
|
||||||
|
org.apache.lucene.codecs.compressing.FastDecompressionCompressingCodec
|
||||||
|
org.apache.lucene.codecs.compressing.HighCompressionCompressingCodec
|
||||||
|
org.apache.lucene.codecs.compressing.DummyCompressingCodec
|
||||||
|
|
Loading…
Reference in New Issue