mirror of https://github.com/apache/lucene.git
LUCENE-3681: use 2 bytes (unsigned short) to save label for FST.INPUT_TYPE.BYTE2 case
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1228928 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
03644a4a0a
commit
5dba63c719
|
@ -714,6 +714,11 @@ Changes in backwards compatibility policy
|
|||
contrib/queryparser. If you have used those classes in your code
|
||||
just add the lucene-queryparser.jar file to your classpath.
|
||||
(Uwe Schindler)
|
||||
|
||||
* LUCENE-3681: FST now stores labels for BYTE2 input type as 2 bytes
|
||||
instead of vInt; this can make FSTs smaller and faster, but it is a
|
||||
break in the binary format so if you had built and saved any FSTs
|
||||
then you need to rebuild them. (Robert Muir, Mike McCandless)
|
||||
|
||||
Security fixes
|
||||
|
||||
|
|
|
@ -92,7 +92,10 @@ public class FST<T> {
|
|||
/** Changed numBytesPerArc for array'd case from byte to int. */
|
||||
private final static int VERSION_INT_NUM_BYTES_PER_ARC = 1;
|
||||
|
||||
private final static int VERSION_CURRENT = VERSION_INT_NUM_BYTES_PER_ARC;
|
||||
/** Write BYTE2 labels as 2-byte short, not vInt. */
|
||||
private final static int VERSION_SHORT_BYTE2_LABELS = 2;
|
||||
|
||||
private final static int VERSION_CURRENT = VERSION_SHORT_BYTE2_LABELS;
|
||||
|
||||
// Never serialized; just used to represent the virtual
|
||||
// final node w/ no arcs:
|
||||
|
@ -199,7 +202,9 @@ public class FST<T> {
|
|||
public FST(DataInput in, Outputs<T> outputs) throws IOException {
|
||||
this.outputs = outputs;
|
||||
writer = null;
|
||||
CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_INT_NUM_BYTES_PER_ARC, VERSION_INT_NUM_BYTES_PER_ARC);
|
||||
// NOTE: only reads most recent format; we don't have
|
||||
// back-compat promise for FSTs (they are experimental):
|
||||
CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_SHORT_BYTE2_LABELS, VERSION_SHORT_BYTE2_LABELS);
|
||||
if (in.readByte() == 1) {
|
||||
// accepts empty string
|
||||
int numBytes = in.readVInt();
|
||||
|
@ -389,7 +394,7 @@ public class FST<T> {
|
|||
writer.writeByte((byte) v);
|
||||
} else if (inputType == INPUT_TYPE.BYTE2) {
|
||||
assert v <= 65535: "v=" + v;
|
||||
writer.writeVInt(v);
|
||||
writer.writeShort((short) v);
|
||||
} else {
|
||||
//writeInt(v);
|
||||
writer.writeVInt(v);
|
||||
|
@ -399,7 +404,11 @@ public class FST<T> {
|
|||
int readLabel(DataInput in) throws IOException {
|
||||
final int v;
|
||||
if (inputType == INPUT_TYPE.BYTE1) {
|
||||
// Unsigned byte:
|
||||
v = in.readByte()&0xFF;
|
||||
} else if (inputType == INPUT_TYPE.BYTE2) {
|
||||
// Unsigned short:
|
||||
v = in.readShort()&0xFFFF;
|
||||
} else {
|
||||
v = in.readVInt();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue