LUCENE-6153: randomize stored fields/vectors index blocksize

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1648849 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2015-01-01 14:57:06 +00:00
parent 94a23da9c2
commit 55eb8b3da3
13 changed files with 71 additions and 51 deletions

View File

@ -49,15 +49,16 @@ public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
private final CompressionMode compressionMode;
private final int chunkSize;
private final int maxDocsPerChunk;
private final int blockSize;
/**
* Create a new {@link CompressingStoredFieldsFormat} with an empty segment
* suffix.
*
* @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(String, String, CompressionMode, int, int)
* @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(String, String, CompressionMode, int, int, int)
*/
public CompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) {
this(formatName, "", compressionMode, chunkSize, maxDocsPerChunk);
public CompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk, int blockSize) {
this(formatName, "", compressionMode, chunkSize, maxDocsPerChunk, blockSize);
}
/**
@ -92,10 +93,11 @@ public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
* @param compressionMode the {@link CompressionMode} to use
* @param chunkSize the minimum number of bytes of a single chunk of stored documents
* @param maxDocsPerChunk the maximum number of documents in a single chunk
* @param blockSize the number of chunks to store in an index block
* @see CompressionMode
*/
public CompressingStoredFieldsFormat(String formatName, String segmentSuffix,
CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) {
CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk, int blockSize) {
this.formatName = formatName;
this.segmentSuffix = segmentSuffix;
this.compressionMode = compressionMode;
@ -107,6 +109,10 @@ public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
throw new IllegalArgumentException("maxDocsPerChunk must be >= 1");
}
this.maxDocsPerChunk = maxDocsPerChunk;
if (blockSize < 1) {
throw new IllegalArgumentException("blockSize must be >= 1");
}
this.blockSize = blockSize;
}
@Override
@ -120,13 +126,13 @@ public class CompressingStoredFieldsFormat extends StoredFieldsFormat {
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si,
IOContext context) throws IOException {
return new CompressingStoredFieldsWriter(directory, si, segmentSuffix, context,
formatName, compressionMode, chunkSize, maxDocsPerChunk);
formatName, compressionMode, chunkSize, maxDocsPerChunk, blockSize);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(compressionMode=" + compressionMode
+ ", chunkSize=" + chunkSize + ", maxDocsPerChunk=" + maxDocsPerChunk + ")";
+ ", chunkSize=" + chunkSize + ", maxDocsPerChunk=" + maxDocsPerChunk + ", blockSize=" + blockSize + ")";
}
}

View File

@ -72,9 +72,8 @@ import org.apache.lucene.util.packed.PackedInts;
*/
public final class CompressingStoredFieldsIndexWriter implements Closeable {
static final int BLOCK_SIZE = 1024; // number of chunks to serialize at once
final IndexOutput fieldsIndexOut;
final int blockSize;
int totalDocs;
int blockDocs;
int blockChunks;
@ -83,12 +82,16 @@ public final class CompressingStoredFieldsIndexWriter implements Closeable {
final int[] docBaseDeltas;
final long[] startPointerDeltas;
CompressingStoredFieldsIndexWriter(IndexOutput indexOutput) throws IOException {
CompressingStoredFieldsIndexWriter(IndexOutput indexOutput, int blockSize) throws IOException {
if (blockSize <= 0) {
throw new IllegalArgumentException("blockSize must be positive");
}
this.blockSize = blockSize;
this.fieldsIndexOut = indexOutput;
reset();
totalDocs = 0;
docBaseDeltas = new int[BLOCK_SIZE];
startPointerDeltas = new long[BLOCK_SIZE];
docBaseDeltas = new int[blockSize];
startPointerDeltas = new long[blockSize];
fieldsIndexOut.writeVInt(PackedInts.VERSION_CURRENT);
}
@ -171,7 +174,7 @@ public final class CompressingStoredFieldsIndexWriter implements Closeable {
}
void writeIndex(int numDocs, long startPointer) throws IOException {
if (blockChunks == BLOCK_SIZE) {
if (blockChunks == blockSize) {
writeBlock();
reset();
}

View File

@ -86,7 +86,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
/** Sole constructor. */
public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context,
String formatName, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) throws IOException {
String formatName, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk, int blockSize) throws IOException {
assert directory != null;
this.segment = si.name;
this.compressor = compressionMode.newCompressor();
@ -112,7 +112,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
assert CodecUtil.indexHeaderLength(codecNameDat, segmentSuffix) == fieldsStream.getFilePointer();
assert CodecUtil.indexHeaderLength(codecNameIdx, segmentSuffix) == indexStream.getFilePointer();
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream, blockSize);
indexStream = null;
fieldsStream.writeVInt(chunkSize);

View File

@ -40,6 +40,7 @@ public class CompressingTermVectorsFormat extends TermVectorsFormat {
private final String segmentSuffix;
private final CompressionMode compressionMode;
private final int chunkSize;
private final int blockSize;
/**
* Create a new {@link CompressingTermVectorsFormat}.
@ -65,10 +66,11 @@ public class CompressingTermVectorsFormat extends TermVectorsFormat {
* @param segmentSuffix a suffix to append to files created by this format
* @param compressionMode the {@link CompressionMode} to use
* @param chunkSize the minimum number of bytes of a single chunk of stored documents
* @param blockSize the number of chunks to store in an index block.
* @see CompressionMode
*/
public CompressingTermVectorsFormat(String formatName, String segmentSuffix,
CompressionMode compressionMode, int chunkSize) {
CompressionMode compressionMode, int chunkSize, int blockSize) {
this.formatName = formatName;
this.segmentSuffix = segmentSuffix;
this.compressionMode = compressionMode;
@ -76,6 +78,10 @@ public class CompressingTermVectorsFormat extends TermVectorsFormat {
throw new IllegalArgumentException("chunkSize must be >= 1");
}
this.chunkSize = chunkSize;
if (blockSize < 1) {
throw new IllegalArgumentException("blockSize must be >= 1");
}
this.blockSize = blockSize;
}
@Override
@ -90,13 +96,13 @@ public class CompressingTermVectorsFormat extends TermVectorsFormat {
public final TermVectorsWriter vectorsWriter(Directory directory,
SegmentInfo segmentInfo, IOContext context) throws IOException {
return new CompressingTermVectorsWriter(directory, segmentInfo, segmentSuffix,
context, formatName, compressionMode, chunkSize);
context, formatName, compressionMode, chunkSize, blockSize);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(compressionMode=" + compressionMode
+ ", chunkSize=" + chunkSize + ")";
+ ", chunkSize=" + chunkSize + ", blockSize=" + blockSize + ")";
}
}

View File

@ -17,7 +17,7 @@ package org.apache.lucene.codecs.compressing;
* limitations under the License.
*/
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.BLOCK_SIZE;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.PACKED_BLOCK_SIZE;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_DAT;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_IDX;
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.FLAGS_BITS;
@ -92,7 +92,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
this.decompressor = reader.decompressor.clone();
this.chunkSize = reader.chunkSize;
this.numDocs = reader.numDocs;
this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0);
this.version = reader.version;
this.closed = false;
}
@ -150,7 +150,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
packedIntsVersion = vectorsStream.readVInt();
chunkSize = vectorsStream.readVInt();
decompressor = compressionMode.newDecompressor();
this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, BLOCK_SIZE, 0);
this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0);
success = true;
} finally {

View File

@ -68,7 +68,7 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static final int BLOCK_SIZE = 64;
static final int PACKED_BLOCK_SIZE = 64;
static final int POSITIONS = 0x01;
static final int OFFSETS = 0x02;
@ -204,7 +204,7 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
/** Sole constructor. */
public CompressingTermVectorsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context,
String formatName, CompressionMode compressionMode, int chunkSize) throws IOException {
String formatName, CompressionMode compressionMode, int chunkSize, int blockSize) throws IOException {
assert directory != null;
this.directory = directory;
this.segment = si.name;
@ -233,12 +233,12 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
assert CodecUtil.indexHeaderLength(codecNameDat, segmentSuffix) == vectorsStream.getFilePointer();
assert CodecUtil.indexHeaderLength(codecNameIdx, segmentSuffix) == indexStream.getFilePointer();
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream);
indexWriter = new CompressingStoredFieldsIndexWriter(indexStream, blockSize);
indexStream = null;
vectorsStream.writeVInt(PackedInts.VERSION_CURRENT);
vectorsStream.writeVInt(chunkSize);
writer = new BlockPackedWriter(vectorsStream, BLOCK_SIZE);
writer = new BlockPackedWriter(vectorsStream, PACKED_BLOCK_SIZE);
positionsBuf = new int[1024];
startOffsetsBuf = new int[1024];

View File

@ -182,9 +182,9 @@ public final class Lucene50StoredFieldsFormat extends StoredFieldsFormat {
StoredFieldsFormat impl(Mode mode) {
switch (mode) {
case BEST_SPEED:
return new CompressingStoredFieldsFormat("Lucene50StoredFieldsFast", CompressionMode.FAST, 1 << 14, 128);
return new CompressingStoredFieldsFormat("Lucene50StoredFieldsFast", CompressionMode.FAST, 1 << 14, 128, 1024);
case BEST_COMPRESSION:
return new CompressingStoredFieldsFormat("Lucene50StoredFieldsHigh", CompressionMode.HIGH_COMPRESSION, 61440, 512);
return new CompressingStoredFieldsFormat("Lucene50StoredFieldsHigh", CompressionMode.HIGH_COMPRESSION, 61440, 512, 1024);
default: throw new AssertionError();
}
}

View File

@ -125,7 +125,7 @@ public final class Lucene50TermVectorsFormat extends CompressingTermVectorsForma
/** Sole constructor. */
public Lucene50TermVectorsFormat() {
super("Lucene50TermVectors", "", CompressionMode.FAST, 1 << 12);
super("Lucene50TermVectors", "", CompressionMode.FAST, 1 << 12, 1024);
}
}

View File

@ -36,16 +36,16 @@ public abstract class CompressingCodec extends FilterCodec {
/**
* Create a random instance.
*/
public static CompressingCodec randomInstance(Random random, int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix) {
public static CompressingCodec randomInstance(Random random, int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix, int blockSize) {
switch (random.nextInt(4)) {
case 0:
return new FastCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix);
return new FastCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix, blockSize);
case 1:
return new FastDecompressionCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix);
return new FastDecompressionCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix, blockSize);
case 2:
return new HighCompressionCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix);
return new HighCompressionCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix, blockSize);
case 3:
return new DummyCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix);
return new DummyCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix, blockSize);
default:
throw new AssertionError();
}
@ -58,14 +58,19 @@ public abstract class CompressingCodec extends FilterCodec {
public static CompressingCodec randomInstance(Random random) {
final int chunkSize = random.nextBoolean() ? RandomInts.randomIntBetween(random, 1, 10) : RandomInts.randomIntBetween(random, 1, 1 << 15);
final int chunkDocs = random.nextBoolean() ? RandomInts.randomIntBetween(random, 1, 10) : RandomInts.randomIntBetween(random, 64, 1024);
return randomInstance(random, chunkSize, chunkDocs, false);
final int blockSize = random.nextBoolean() ? RandomInts.randomIntBetween(random, 1, 10) : RandomInts.randomIntBetween(random, 1, 1024);
return randomInstance(random, chunkSize, chunkDocs, false, blockSize);
}
/**
* Creates a random {@link CompressingCodec} that is using a segment suffix
*/
public static CompressingCodec randomInstance(Random random, boolean withSegmentSuffix) {
return randomInstance(random, RandomInts.randomIntBetween(random, 1, 1 << 15), RandomInts.randomIntBetween(random, 64, 1024), withSegmentSuffix);
return randomInstance(random,
RandomInts.randomIntBetween(random, 1, 1 << 15),
RandomInts.randomIntBetween(random, 64, 1024),
withSegmentSuffix,
RandomInts.randomIntBetween(random, 1, 1024));
}
private final CompressingStoredFieldsFormat storedFieldsFormat;
@ -74,17 +79,17 @@ public abstract class CompressingCodec extends FilterCodec {
/**
* Creates a compressing codec with a given segment suffix
*/
public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) {
public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk, int blockSize) {
super(name, TestUtil.getDefaultCodec());
this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize, maxDocsPerChunk);
this.termVectorsFormat = new CompressingTermVectorsFormat(name, segmentSuffix, compressionMode, chunkSize);
this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize, maxDocsPerChunk, blockSize);
this.termVectorsFormat = new CompressingTermVectorsFormat(name, segmentSuffix, compressionMode, chunkSize, blockSize);
}
/**
* Creates a compressing codec with an empty segment suffix
*/
public CompressingCodec(String name, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) {
this(name, "", compressionMode, chunkSize, maxDocsPerChunk);
public CompressingCodec(String name, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk, int blockSize) {
this(name, "", compressionMode, chunkSize, maxDocsPerChunk, blockSize);
}
@Override

View File

@ -21,14 +21,14 @@ package org.apache.lucene.codecs.compressing;
public class FastCompressingCodec extends CompressingCodec {
/** Constructor that allows to configure the chunk size. */
public FastCompressingCodec(int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix) {
public FastCompressingCodec(int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix, int blockSize) {
super("FastCompressingStoredFields",
withSegmentSuffix ? "FastCompressingStoredFields" : "",
CompressionMode.FAST, chunkSize, maxDocsPerChunk);
CompressionMode.FAST, chunkSize, maxDocsPerChunk, blockSize);
}
/** Default constructor. */
public FastCompressingCodec() {
this(1 << 14, 128, false);
this(1 << 14, 128, false, 1024);
}
}

View File

@ -21,14 +21,14 @@ package org.apache.lucene.codecs.compressing;
public class FastDecompressionCompressingCodec extends CompressingCodec {
/** Constructor that allows to configure the chunk size. */
public FastDecompressionCompressingCodec(int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix) {
public FastDecompressionCompressingCodec(int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix, int blockSize) {
super("FastDecompressionCompressingStoredFields",
withSegmentSuffix ? "FastDecompressionCompressingStoredFields" : "",
CompressionMode.FAST_DECOMPRESSION, chunkSize, maxDocsPerChunk);
CompressionMode.FAST_DECOMPRESSION, chunkSize, maxDocsPerChunk, blockSize);
}
/** Default constructor. */
public FastDecompressionCompressingCodec() {
this(1 << 14, 256, false);
this(1 << 14, 256, false, 1024);
}
}

View File

@ -21,16 +21,16 @@ package org.apache.lucene.codecs.compressing;
public class HighCompressionCompressingCodec extends CompressingCodec {
/** Constructor that allows to configure the chunk size. */
public HighCompressionCompressingCodec(int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix) {
public HighCompressionCompressingCodec(int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix, int blockSize) {
super("HighCompressionCompressingStoredFields",
withSegmentSuffix ? "HighCompressionCompressingStoredFields" : "",
CompressionMode.HIGH_COMPRESSION, chunkSize, maxDocsPerChunk);
CompressionMode.HIGH_COMPRESSION, chunkSize, maxDocsPerChunk, blockSize);
}
/** Default constructor. */
public HighCompressionCompressingCodec() {
// we don't worry about zlib block overhead as it's
// not bad and try to save space instead:
this(61440, 512, false);
this(61440, 512, false, 1024);
}
}

View File

@ -83,15 +83,15 @@ public class DummyCompressingCodec extends CompressingCodec {
};
/** Constructor that allows to configure the chunk size. */
public DummyCompressingCodec(int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix) {
public DummyCompressingCodec(int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix, int blockSize) {
super("DummyCompressingStoredFields",
withSegmentSuffix ? "DummyCompressingStoredFields" : "",
DUMMY, chunkSize, maxDocsPerChunk);
DUMMY, chunkSize, maxDocsPerChunk, blockSize);
}
/** Default constructor. */
public DummyCompressingCodec() {
this(1 << 14, 128, false);
this(1 << 14, 128, false, 1024);
}
}