mirror of https://github.com/apache/lucene.git
LUCENE-6086: Space optimizations for numeric stored fields.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1644661 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
16337aba86
commit
b7a20caf84
|
@ -20,17 +20,23 @@ package org.apache.lucene.codecs.compressing;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.BYTE_ARR;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.BYTE_ARR;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.CODEC_SFX_DAT;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.CODEC_SFX_DAT;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.CODEC_SFX_IDX;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.CODEC_SFX_IDX;
|
||||||
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.DAY;
|
||||||
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.DAY_ENCODING;
|
||||||
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.FIELDS_EXTENSION;
|
||||||
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.FIELDS_INDEX_EXTENSION;
|
||||||
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.HOUR;
|
||||||
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.HOUR_ENCODING;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_DOUBLE;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_DOUBLE;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_FLOAT;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_FLOAT;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_INT;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_INT;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_LONG;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.NUMERIC_LONG;
|
||||||
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.SECOND;
|
||||||
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.SECOND_ENCODING;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.STRING;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.STRING;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_BITS;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_BITS;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_MASK;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_MASK;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START;
|
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START;
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.FIELDS_EXTENSION;
|
|
||||||
import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.FIELDS_INDEX_EXTENSION;
|
|
||||||
|
|
||||||
import java.io.EOFException;
|
import java.io.EOFException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -57,6 +63,7 @@ import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.Accountables;
|
import org.apache.lucene.util.Accountables;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.BitUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
@ -201,16 +208,16 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
||||||
visitor.stringField(info, new String(data, StandardCharsets.UTF_8));
|
visitor.stringField(info, new String(data, StandardCharsets.UTF_8));
|
||||||
break;
|
break;
|
||||||
case NUMERIC_INT:
|
case NUMERIC_INT:
|
||||||
visitor.intField(info, in.readInt());
|
visitor.intField(info, in.readZInt());
|
||||||
break;
|
break;
|
||||||
case NUMERIC_FLOAT:
|
case NUMERIC_FLOAT:
|
||||||
visitor.floatField(info, Float.intBitsToFloat(in.readInt()));
|
visitor.floatField(info, readZFloat(in));
|
||||||
break;
|
break;
|
||||||
case NUMERIC_LONG:
|
case NUMERIC_LONG:
|
||||||
visitor.longField(info, in.readLong());
|
visitor.longField(info, readTLong(in));
|
||||||
break;
|
break;
|
||||||
case NUMERIC_DOUBLE:
|
case NUMERIC_DOUBLE:
|
||||||
visitor.doubleField(info, Double.longBitsToDouble(in.readLong()));
|
visitor.doubleField(info, readZDouble(in));
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new AssertionError("Unknown type flag: " + Integer.toHexString(bits));
|
throw new AssertionError("Unknown type flag: " + Integer.toHexString(bits));
|
||||||
|
@ -225,18 +232,98 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
||||||
in.skipBytes(length);
|
in.skipBytes(length);
|
||||||
break;
|
break;
|
||||||
case NUMERIC_INT:
|
case NUMERIC_INT:
|
||||||
|
in.readZInt();
|
||||||
|
break;
|
||||||
case NUMERIC_FLOAT:
|
case NUMERIC_FLOAT:
|
||||||
in.readInt();
|
readZFloat(in);
|
||||||
break;
|
break;
|
||||||
case NUMERIC_LONG:
|
case NUMERIC_LONG:
|
||||||
|
readTLong(in);
|
||||||
|
break;
|
||||||
case NUMERIC_DOUBLE:
|
case NUMERIC_DOUBLE:
|
||||||
in.readLong();
|
readZDouble(in);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new AssertionError("Unknown type flag: " + Integer.toHexString(bits));
|
throw new AssertionError("Unknown type flag: " + Integer.toHexString(bits));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a float in a variable-length format. Reads between one and
|
||||||
|
* five bytes. Small integral values typically take fewer bytes.
|
||||||
|
*/
|
||||||
|
static float readZFloat(DataInput in) throws IOException {
|
||||||
|
int b = in.readByte() & 0xFF;
|
||||||
|
if (b == 0xFF) {
|
||||||
|
// negative value
|
||||||
|
return Float.intBitsToFloat(in.readInt());
|
||||||
|
} else if ((b & 0x80) != 0) {
|
||||||
|
// small integer [-1..125]
|
||||||
|
return (b & 0x7f) - 1;
|
||||||
|
} else {
|
||||||
|
// positive float
|
||||||
|
int bits = b << 24 | ((in.readShort() & 0xFFFF) << 8) | (in.readByte() & 0xFF);
|
||||||
|
return Float.intBitsToFloat(bits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a double in a variable-length format. Reads between one and
|
||||||
|
* nine bytes. Small integral values typically take fewer bytes.
|
||||||
|
*/
|
||||||
|
static double readZDouble(DataInput in) throws IOException {
|
||||||
|
int b = in.readByte() & 0xFF;
|
||||||
|
if (b == 0xFF) {
|
||||||
|
// negative value
|
||||||
|
return Double.longBitsToDouble(in.readLong());
|
||||||
|
} else if (b == 0xFE) {
|
||||||
|
// float
|
||||||
|
return Float.intBitsToFloat(in.readInt());
|
||||||
|
} else if ((b & 0x80) != 0) {
|
||||||
|
// small integer [-1..124]
|
||||||
|
return (b & 0x7f) - 1;
|
||||||
|
} else {
|
||||||
|
// positive double
|
||||||
|
long bits = ((long) b) << 56 | ((in.readInt() & 0xFFFFFFFFL) << 24) | ((in.readShort() & 0xFFFFL) << 8) | (in.readByte() & 0xFFL);
|
||||||
|
return Double.longBitsToDouble(bits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a long in a variable-length format. Reads between one and
|
||||||
|
* nine bytes. Small values typically take fewer bytes.
|
||||||
|
*/
|
||||||
|
static long readTLong(DataInput in) throws IOException {
|
||||||
|
int header = in.readByte() & 0xFF;
|
||||||
|
|
||||||
|
long bits = header & 0x1F;
|
||||||
|
if ((header & 0x20) != 0) {
|
||||||
|
// continuation bit
|
||||||
|
bits |= in.readVLong() << 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
long l = BitUtil.zigZagDecode(bits);
|
||||||
|
|
||||||
|
switch (header & DAY_ENCODING) {
|
||||||
|
case SECOND_ENCODING:
|
||||||
|
l *= SECOND;
|
||||||
|
break;
|
||||||
|
case HOUR_ENCODING:
|
||||||
|
l *= HOUR;
|
||||||
|
break;
|
||||||
|
case DAY_ENCODING:
|
||||||
|
l *= DAY;
|
||||||
|
break;
|
||||||
|
case 0:
|
||||||
|
// uncompressed
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new AssertionError();
|
||||||
|
}
|
||||||
|
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void visitDocument(int docID, StoredFieldVisitor visitor)
|
public void visitDocument(int docID, StoredFieldVisitor visitor)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.BitUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.GrowableByteArrayDataOutput;
|
import org.apache.lucene.util.GrowableByteArrayDataOutput;
|
||||||
|
@ -70,9 +71,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION_CURRENT = VERSION_START;
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
private final Directory directory;
|
|
||||||
private final String segment;
|
private final String segment;
|
||||||
private final String segmentSuffix;
|
|
||||||
private CompressingStoredFieldsIndexWriter indexWriter;
|
private CompressingStoredFieldsIndexWriter indexWriter;
|
||||||
private IndexOutput fieldsStream;
|
private IndexOutput fieldsStream;
|
||||||
|
|
||||||
|
@ -91,9 +90,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context,
|
public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context,
|
||||||
String formatName, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) throws IOException {
|
String formatName, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) throws IOException {
|
||||||
assert directory != null;
|
assert directory != null;
|
||||||
this.directory = directory;
|
|
||||||
this.segment = si.name;
|
this.segment = si.name;
|
||||||
this.segmentSuffix = segmentSuffix;
|
|
||||||
this.compressionMode = compressionMode;
|
this.compressionMode = compressionMode;
|
||||||
this.compressor = compressionMode.newCompressor();
|
this.compressor = compressionMode.newCompressor();
|
||||||
this.chunkSize = chunkSize;
|
this.chunkSize = chunkSize;
|
||||||
|
@ -288,19 +285,170 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
|
||||||
bufferedDocs.writeString(field.stringValue());
|
bufferedDocs.writeString(field.stringValue());
|
||||||
} else {
|
} else {
|
||||||
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
|
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
|
||||||
bufferedDocs.writeInt(number.intValue());
|
bufferedDocs.writeZInt(number.intValue());
|
||||||
} else if (number instanceof Long) {
|
} else if (number instanceof Long) {
|
||||||
bufferedDocs.writeLong(number.longValue());
|
writeTLong(bufferedDocs, number.longValue());
|
||||||
} else if (number instanceof Float) {
|
} else if (number instanceof Float) {
|
||||||
bufferedDocs.writeInt(Float.floatToIntBits(number.floatValue()));
|
writeZFloat(bufferedDocs, number.floatValue());
|
||||||
} else if (number instanceof Double) {
|
} else if (number instanceof Double) {
|
||||||
bufferedDocs.writeLong(Double.doubleToLongBits(number.doubleValue()));
|
writeZDouble(bufferedDocs, number.doubleValue());
|
||||||
} else {
|
} else {
|
||||||
throw new AssertionError("Cannot get here");
|
throw new AssertionError("Cannot get here");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -0 isn't compressed.
|
||||||
|
static final int NEGATIVE_ZERO_FLOAT = Float.floatToIntBits(-0f);
|
||||||
|
static final long NEGATIVE_ZERO_DOUBLE = Double.doubleToLongBits(-0d);
|
||||||
|
|
||||||
|
// for compression of timestamps
|
||||||
|
static final long SECOND = 1000L;
|
||||||
|
static final long HOUR = 60 * 60 * SECOND;
|
||||||
|
static final long DAY = 24 * HOUR;
|
||||||
|
static final int SECOND_ENCODING = 0x40;
|
||||||
|
static final int HOUR_ENCODING = 0x80;
|
||||||
|
static final int DAY_ENCODING = 0xC0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a float in a variable-length format. Writes between one and
|
||||||
|
* five bytes. Small integral values typically take fewer bytes.
|
||||||
|
* <p>
|
||||||
|
* ZFloat --> Header, Bytes*?
|
||||||
|
* <ul>
|
||||||
|
* <li>Header --> {@link DataOutput#writeByte Uint8}. When it is
|
||||||
|
* equal to 0xFF then the value is negative and stored in the next
|
||||||
|
* 4 bytes. Otherwise if the first bit is set then the other bits
|
||||||
|
* in the header encode the value plus one and no other
|
||||||
|
* bytes are read. Otherwise, the value is a positive float value
|
||||||
|
* whose first byte is the header, and 3 bytes need to be read to
|
||||||
|
* complete it.
|
||||||
|
* <li>Bytes --> Potential additional bytes to read depending on the
|
||||||
|
* header.
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
*/
|
||||||
|
static void writeZFloat(DataOutput out, float f) throws IOException {
|
||||||
|
int intVal = (int) f;
|
||||||
|
final int floatBits = Float.floatToIntBits(f);
|
||||||
|
|
||||||
|
if (f == intVal
|
||||||
|
&& intVal >= -1
|
||||||
|
&& intVal <= 0x7D
|
||||||
|
&& floatBits != NEGATIVE_ZERO_FLOAT) {
|
||||||
|
// small integer value [-1..125]: single byte
|
||||||
|
out.writeByte((byte) (0x80 | (1 + intVal)));
|
||||||
|
} else if ((floatBits >>> 31) == 0) {
|
||||||
|
// other positive floats: 4 bytes
|
||||||
|
out.writeInt(floatBits);
|
||||||
|
} else {
|
||||||
|
// other negative float: 5 bytes
|
||||||
|
out.writeByte((byte) 0xFF);
|
||||||
|
out.writeInt(floatBits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a float in a variable-length format. Writes between one and
|
||||||
|
* five bytes. Small integral values typically take fewer bytes.
|
||||||
|
* <p>
|
||||||
|
* ZFloat --> Header, Bytes*?
|
||||||
|
* <ul>
|
||||||
|
* <li>Header --> {@link DataOutput#writeByte Uint8}. When it is
|
||||||
|
* equal to 0xFF then the value is negative and stored in the next
|
||||||
|
* 8 bytes. When it is equal to 0xFE then the value is stored as a
|
||||||
|
* float in the next 4 bytes. Otherwise if the first bit is set
|
||||||
|
* then the other bits in the header encode the value plus one and
|
||||||
|
* no other bytes are read. Otherwise, the value is a positive float
|
||||||
|
* value whose first byte is the header, and 7 bytes need to be read
|
||||||
|
* to complete it.
|
||||||
|
* <li>Bytes --> Potential additional bytes to read depending on the
|
||||||
|
* header.
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
*/
|
||||||
|
static void writeZDouble(DataOutput out, double d) throws IOException {
|
||||||
|
int intVal = (int) d;
|
||||||
|
final long doubleBits = Double.doubleToLongBits(d);
|
||||||
|
|
||||||
|
if (d == intVal &&
|
||||||
|
intVal >= -1 &&
|
||||||
|
intVal <= 0x7C &&
|
||||||
|
doubleBits != NEGATIVE_ZERO_DOUBLE) {
|
||||||
|
// small integer value [-1..124]: single byte
|
||||||
|
out.writeByte((byte) (0x80 | (intVal + 1)));
|
||||||
|
return;
|
||||||
|
} else if (d == (float) d) {
|
||||||
|
// d has an accurate float representation: 5 bytes
|
||||||
|
out.writeByte((byte) 0xFE);
|
||||||
|
out.writeInt(Float.floatToIntBits((float) d));
|
||||||
|
} else if ((doubleBits >>> 63) == 0) {
|
||||||
|
// other positive doubles: 8 bytes
|
||||||
|
out.writeLong(doubleBits);
|
||||||
|
} else {
|
||||||
|
// other negative doubles: 9 bytes
|
||||||
|
out.writeByte((byte) 0xFF);
|
||||||
|
out.writeLong(doubleBits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a long in a variable-length format. Writes between one and
|
||||||
|
* ten bytes. Small values or values representing timestamps with day,
|
||||||
|
* hour or second precision typically require fewer bytes.
|
||||||
|
* <p>
|
||||||
|
* ZLong --> Header, Bytes*?
|
||||||
|
* <ul>
|
||||||
|
* <li>Header --> The first two bits indicate the compression scheme:
|
||||||
|
* <ul>
|
||||||
|
* <li>00 - uncompressed
|
||||||
|
* <li>01 - multiple of 1000 (second)
|
||||||
|
* <li>10 - multiple of 3600000 (hour)
|
||||||
|
* <li>11 - multiple of 86400000 (day)
|
||||||
|
* </ul>
|
||||||
|
* Then the next bit is a continuation bit, indicating whether more
|
||||||
|
* bytes need to be read, and the last 5 bits are the lower bits of
|
||||||
|
* the encoded value. In order to reconstruct the value, you need to
|
||||||
|
* combine the 5 lower bits of the header with a vLong in the next
|
||||||
|
* bytes (if the continuation bit is set to 1). Then
|
||||||
|
* {@link BitUtil#zigZagDecode(int) zigzag-decode} it and finally
|
||||||
|
* multiply by the multiple corresponding to the compression scheme.
|
||||||
|
* <li>Bytes --> Potential additional bytes to read depending on the
|
||||||
|
* header.
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
*/
|
||||||
|
// T for "timestamp"
|
||||||
|
static void writeTLong(DataOutput out, long l) throws IOException {
|
||||||
|
int header;
|
||||||
|
if (l % SECOND != 0) {
|
||||||
|
header = 0;
|
||||||
|
} else if (l % DAY == 0) {
|
||||||
|
// timestamp with day precision
|
||||||
|
header = DAY_ENCODING;
|
||||||
|
l /= DAY;
|
||||||
|
} else if (l % HOUR == 0) {
|
||||||
|
// timestamp with hour precision, or day precision with a timezone
|
||||||
|
header = HOUR_ENCODING;
|
||||||
|
l /= HOUR;
|
||||||
|
} else {
|
||||||
|
// timestamp with second precision
|
||||||
|
header = SECOND_ENCODING;
|
||||||
|
l /= SECOND;
|
||||||
|
}
|
||||||
|
|
||||||
|
final long zigZagL = BitUtil.zigZagEncode(l);
|
||||||
|
header |= (zigZagL & 0x1F); // last 5 bits
|
||||||
|
final long upperBits = zigZagL >>> 5;
|
||||||
|
if (upperBits != 0) {
|
||||||
|
header |= 0x20;
|
||||||
|
}
|
||||||
|
out.writeByte((byte) header);
|
||||||
|
if (upperBits != 0) {
|
||||||
|
out.writeVLong(upperBits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void finish(FieldInfos fis, int numDocs) throws IOException {
|
public void finish(FieldInfos fis, int numDocs) throws IOException {
|
||||||
if (numBufferedDocs > 0) {
|
if (numBufferedDocs > 0) {
|
||||||
|
|
|
@ -18,25 +18,32 @@ package org.apache.lucene.codecs.compressing;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field.Store;
|
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.IntField;
|
import org.apache.lucene.document.IntField;
|
||||||
import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
|
import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
|
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||||
|
|
||||||
public class TestCompressingStoredFieldsFormat extends BaseStoredFieldsFormatTestCase {
|
public class TestCompressingStoredFieldsFormat extends BaseStoredFieldsFormatTestCase {
|
||||||
|
|
||||||
|
static final long SECOND = 1000L;
|
||||||
|
static final long HOUR = 60 * 60 * SECOND;
|
||||||
|
static final long DAY = 24 * HOUR;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Codec getCodec() {
|
protected Codec getCodec() {
|
||||||
return CompressingCodec.randomInstance(random());
|
return CompressingCodec.randomInstance(random());
|
||||||
|
@ -88,4 +95,176 @@ public class TestCompressingStoredFieldsFormat extends BaseStoredFieldsFormatTes
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testZFloat() throws Exception {
|
||||||
|
byte buffer[] = new byte[5]; // we never need more than 5 bytes
|
||||||
|
ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
|
||||||
|
ByteArrayDataInput in = new ByteArrayDataInput(buffer);
|
||||||
|
|
||||||
|
// round-trip small integer values
|
||||||
|
for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
|
||||||
|
float f = (float) i;
|
||||||
|
CompressingStoredFieldsWriter.writeZFloat(out, f);
|
||||||
|
in.reset(buffer, 0, out.getPosition());
|
||||||
|
float g = CompressingStoredFieldsReader.readZFloat(in);
|
||||||
|
assertTrue(in.eof());
|
||||||
|
assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g));
|
||||||
|
|
||||||
|
// check that compression actually works
|
||||||
|
if (i >= -1 && i <= 123) {
|
||||||
|
assertEquals(1, out.getPosition()); // single byte compression
|
||||||
|
}
|
||||||
|
out.reset(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// round-trip special values
|
||||||
|
float special[] = {
|
||||||
|
-0.0f,
|
||||||
|
+0.0f,
|
||||||
|
Float.NEGATIVE_INFINITY,
|
||||||
|
Float.POSITIVE_INFINITY,
|
||||||
|
Float.MIN_VALUE,
|
||||||
|
Float.MAX_VALUE,
|
||||||
|
Float.NaN,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (float f : special) {
|
||||||
|
CompressingStoredFieldsWriter.writeZFloat(out, f);
|
||||||
|
in.reset(buffer, 0, out.getPosition());
|
||||||
|
float g = CompressingStoredFieldsReader.readZFloat(in);
|
||||||
|
assertTrue(in.eof());
|
||||||
|
assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g));
|
||||||
|
out.reset(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// round-trip random values
|
||||||
|
Random r = random();
|
||||||
|
for (int i = 0; i < 100000; i++) {
|
||||||
|
float f = r.nextFloat() * (random().nextInt(100) - 50);
|
||||||
|
CompressingStoredFieldsWriter.writeZFloat(out, f);
|
||||||
|
assertTrue("length=" + out.getPosition() + ", f=" + f, out.getPosition() <= ((Float.floatToIntBits(f) >>> 31) == 1 ? 5 : 4));
|
||||||
|
in.reset(buffer, 0, out.getPosition());
|
||||||
|
float g = CompressingStoredFieldsReader.readZFloat(in);
|
||||||
|
assertTrue(in.eof());
|
||||||
|
assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g));
|
||||||
|
out.reset(buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testZDouble() throws Exception {
|
||||||
|
byte buffer[] = new byte[9]; // we never need more than 9 bytes
|
||||||
|
ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
|
||||||
|
ByteArrayDataInput in = new ByteArrayDataInput(buffer);
|
||||||
|
|
||||||
|
// round-trip small integer values
|
||||||
|
for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
|
||||||
|
double x = (double) i;
|
||||||
|
CompressingStoredFieldsWriter.writeZDouble(out, x);
|
||||||
|
in.reset(buffer, 0, out.getPosition());
|
||||||
|
double y = CompressingStoredFieldsReader.readZDouble(in);
|
||||||
|
assertTrue(in.eof());
|
||||||
|
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
|
||||||
|
|
||||||
|
// check that compression actually works
|
||||||
|
if (i >= -1 && i <= 124) {
|
||||||
|
assertEquals(1, out.getPosition()); // single byte compression
|
||||||
|
}
|
||||||
|
out.reset(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// round-trip special values
|
||||||
|
double special[] = {
|
||||||
|
-0.0d,
|
||||||
|
+0.0d,
|
||||||
|
Double.NEGATIVE_INFINITY,
|
||||||
|
Double.POSITIVE_INFINITY,
|
||||||
|
Double.MIN_VALUE,
|
||||||
|
Double.MAX_VALUE,
|
||||||
|
Double.NaN
|
||||||
|
};
|
||||||
|
|
||||||
|
for (double x : special) {
|
||||||
|
CompressingStoredFieldsWriter.writeZDouble(out, x);
|
||||||
|
in.reset(buffer, 0, out.getPosition());
|
||||||
|
double y = CompressingStoredFieldsReader.readZDouble(in);
|
||||||
|
assertTrue(in.eof());
|
||||||
|
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
|
||||||
|
out.reset(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// round-trip random values
|
||||||
|
Random r = random();
|
||||||
|
for (int i = 0; i < 100000; i++) {
|
||||||
|
double x = r.nextDouble() * (random().nextInt(100) - 50);
|
||||||
|
CompressingStoredFieldsWriter.writeZDouble(out, x);
|
||||||
|
assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= (x < 0 ? 9 : 8));
|
||||||
|
in.reset(buffer, 0, out.getPosition());
|
||||||
|
double y = CompressingStoredFieldsReader.readZDouble(in);
|
||||||
|
assertTrue(in.eof());
|
||||||
|
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
|
||||||
|
out.reset(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// same with floats
|
||||||
|
for (int i = 0; i < 100000; i++) {
|
||||||
|
double x = (double) (r.nextFloat() * (random().nextInt(100) - 50));
|
||||||
|
CompressingStoredFieldsWriter.writeZDouble(out, x);
|
||||||
|
assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= 5);
|
||||||
|
in.reset(buffer, 0, out.getPosition());
|
||||||
|
double y = CompressingStoredFieldsReader.readZDouble(in);
|
||||||
|
assertTrue(in.eof());
|
||||||
|
assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y));
|
||||||
|
out.reset(buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTLong() throws Exception {
|
||||||
|
byte buffer[] = new byte[10]; // we never need more than 10 bytes
|
||||||
|
ByteArrayDataOutput out = new ByteArrayDataOutput(buffer);
|
||||||
|
ByteArrayDataInput in = new ByteArrayDataInput(buffer);
|
||||||
|
|
||||||
|
// round-trip small integer values
|
||||||
|
for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
|
||||||
|
for (long mul : new long[] {SECOND, HOUR, DAY}) {
|
||||||
|
long l1 = (long) i * mul;
|
||||||
|
CompressingStoredFieldsWriter.writeTLong(out, l1);
|
||||||
|
in.reset(buffer, 0, out.getPosition());
|
||||||
|
long l2 = CompressingStoredFieldsReader.readTLong(in);
|
||||||
|
assertTrue(in.eof());
|
||||||
|
assertEquals(l1, l2);
|
||||||
|
|
||||||
|
// check that compression actually works
|
||||||
|
if (i >= -16 && i <= 15) {
|
||||||
|
assertEquals(1, out.getPosition()); // single byte compression
|
||||||
|
}
|
||||||
|
out.reset(buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// round-trip random values
|
||||||
|
Random r = random();
|
||||||
|
for (int i = 0; i < 100000; i++) {
|
||||||
|
final int numBits = r.nextInt(65);
|
||||||
|
long l1 = r.nextLong() & ((1L << numBits) - 1);
|
||||||
|
switch (r.nextInt(4)) {
|
||||||
|
case 0:
|
||||||
|
l1 *= SECOND;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
l1 *= HOUR;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
l1 *= DAY;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
CompressingStoredFieldsWriter.writeTLong(out, l1);
|
||||||
|
in.reset(buffer, 0, out.getPosition());
|
||||||
|
long l2 = CompressingStoredFieldsReader.readTLong(in);
|
||||||
|
assertTrue(in.eof());
|
||||||
|
assertEquals(l1, l2);
|
||||||
|
out.reset(buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue