LUCENE-4620: inline encoding/decoding

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1433926 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-01-16 13:25:13 +00:00
parent 48a5004fe6
commit 800d2f322b
22 changed files with 206 additions and 347 deletions

View File

@ -49,20 +49,20 @@ import org.apache.lucene.util.IntsRef;
* @lucene.experimental
*/
public abstract class ChunksIntEncoder extends IntEncoder {
/** Holds the values which must be encoded, outside the indicator. */
protected final IntsRef encodeQueue;
/** Represents bits flag byte. */
protected int indicator = 0;
/** Counts the current ordinal of the encoded value. */
protected byte ordinal = 0;
protected ChunksIntEncoder(int chunkSize) {
encodeQueue = new IntsRef(chunkSize);
}
/**
* Encodes the values of the current chunk. First it writes the indicator, and
* then it encodes the values outside the indicator.
@ -76,17 +76,40 @@ public abstract class ChunksIntEncoder extends IntEncoder {
buf.bytes[buf.length++] = ((byte) indicator);
for (int i = 0; i < encodeQueue.length; i++) {
VInt8.encode(encodeQueue.ints[i], buf);
// it is better if the encoding is inlined like so, and not e.g.
// in a utility method
int value = encodeQueue.ints[i];
if ((value & ~0x7F) == 0) {
buf.bytes[buf.length] = (byte) value;
buf.length++;
} else if ((value & ~0x3FFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 1] = (byte) (value & 0x7F);
buf.length += 2;
} else if ((value & ~0x1FFFFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 2] = (byte) (value & 0x7F);
buf.length += 3;
} else if ((value & ~0xFFFFFFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 3] = (byte) (value & 0x7F);
buf.length += 4;
} else {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 4] = (byte) (value & 0x7F);
buf.length += 5;
}
}
reset();
}
@Override
protected void reset() {
ordinal = 0;
indicator = 0;
encodeQueue.length = 0;
}
}

View File

@ -26,7 +26,7 @@ import org.apache.lucene.util.IntsRef;
*
* @lucene.experimental
*/
public class DGapIntDecoder extends IntDecoder {
public final class DGapIntDecoder extends IntDecoder {
private final IntDecoder decoder;
@ -35,13 +35,8 @@ public class DGapIntDecoder extends IntDecoder {
}
@Override
protected void reset() {
decoder.reset();
}
@Override
protected void doDecode(BytesRef buf, IntsRef values, int upto) {
decoder.doDecode(buf, values, upto);
public void decode(BytesRef buf, IntsRef values) {
decoder.decode(buf, values);
int prev = 0;
for (int i = 0; i < values.length; i++) {
values.ints[i] += prev;
@ -51,7 +46,7 @@ public class DGapIntDecoder extends IntDecoder {
@Override
public String toString() {
return "DGap (" + decoder.toString() + ")";
return "DGap(" + decoder.toString() + ")";
}
}

View File

@ -35,7 +35,7 @@ import org.apache.lucene.util.IntsRef;
*
* @lucene.experimental
*/
public class DGapIntEncoder extends IntEncoderFilter {
public final class DGapIntEncoder extends IntEncoderFilter {
/** Initializes with the given encoder. */
public DGapIntEncoder(IntEncoder encoder) {
@ -43,14 +43,15 @@ public class DGapIntEncoder extends IntEncoderFilter {
}
@Override
protected void doEncode(IntsRef values, BytesRef buf, int upto) {
public void encode(IntsRef values, BytesRef buf) {
int prev = 0;
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
int tmp = values.ints[i];
values.ints[i] -= prev;
prev = tmp;
}
encoder.doEncode(values, buf, upto);
encoder.encode(values, buf);
}
@Override
@ -60,7 +61,7 @@ public class DGapIntEncoder extends IntEncoderFilter {
@Override
public String toString() {
return "DGap (" + encoder.toString() + ")";
return "DGap(" + encoder.toString() + ")";
}
}

View File

@ -45,10 +45,13 @@ public class EightFlagsIntDecoder extends IntDecoder {
}
@Override
protected void doDecode(BytesRef buf, IntsRef values, int upto) {
while (buf.offset < upto) {
public void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0;
int upto = buf.offset + buf.length;
int offset = buf.offset;
while (offset < upto) {
// read indicator
int indicator = buf.bytes[buf.offset++] & 0xFF;
int indicator = buf.bytes[offset++] & 0xFF;
int ordinal = 0;
int capacityNeeded = values.length + 8;
@ -59,11 +62,21 @@ public class EightFlagsIntDecoder extends IntDecoder {
// process indicator, until we read 8 values, or end-of-buffer
while (ordinal != 8) {
if (DECODE_TABLE[indicator][ordinal++] == 0) {
if (buf.offset == upto) { // end of buffer
if (offset == upto) { // end of buffer
return;
}
// decode the value from the stream.
values.ints[values.length++] = VInt8.decode(buf) + 2;
// it is better if the decoding is inlined like so, and not e.g.
// in a utility method
int value = 0;
while (true) {
byte b = buf.bytes[offset++];
if (b >= 0) {
values.ints[values.length++] = ((value << 7) | b) + 2;
break;
} else {
value = (value << 7) | (b & 0x7F);
}
}
} else {
values.ints[values.length++] = 1;
}
@ -73,7 +86,7 @@ public class EightFlagsIntDecoder extends IntDecoder {
@Override
public String toString() {
return "EightFlags (VInt8)";
return "EightFlags(VInt8)";
}
}

View File

@ -59,7 +59,9 @@ public class EightFlagsIntEncoder extends ChunksIntEncoder {
}
@Override
protected void doEncode(IntsRef values, BytesRef buf, int upto) {
public void encode(IntsRef values, BytesRef buf) {
buf.offset = buf.length = 0;
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
int value = values.ints[i];
if (value == 1) {
@ -88,7 +90,7 @@ public class EightFlagsIntEncoder extends ChunksIntEncoder {
@Override
public String toString() {
return "EightFlags (VInt)";
return "EightFlags(VInt)";
}
}

View File

@ -45,10 +45,13 @@ public class FourFlagsIntDecoder extends IntDecoder {
}
@Override
protected void doDecode(BytesRef buf, IntsRef values, int upto) {
while (buf.offset < upto) {
public void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0;
int upto = buf.offset + buf.length;
int offset = buf.offset;
while (offset < upto) {
// read indicator
int indicator = buf.bytes[buf.offset++] & 0xFF;
int indicator = buf.bytes[offset++] & 0xFF;
int ordinal = 0;
int capacityNeeded = values.length + 4;
@ -59,11 +62,21 @@ public class FourFlagsIntDecoder extends IntDecoder {
while (ordinal != 4) {
byte decodeVal = DECODE_TABLE[indicator][ordinal++];
if (decodeVal == 0) {
if (buf.offset == upto) { // end of buffer
if (offset == upto) { // end of buffer
return;
}
// decode the value from the stream.
values.ints[values.length++] = VInt8.decode(buf) + 4;
// it is better if the decoding is inlined like so, and not e.g.
// in a utility method
int value = 0;
while (true) {
byte b = buf.bytes[offset++];
if (b >= 0) {
values.ints[values.length++] = ((value << 7) | b) + 4;
break;
} else {
value = (value << 7) | (b & 0x7F);
}
}
} else {
values.ints[values.length++] = decodeVal;
}
@ -73,7 +86,7 @@ public class FourFlagsIntDecoder extends IntDecoder {
@Override
public String toString() {
return "FourFlags (VInt8)";
return "FourFlags(VInt8)";
}
}

View File

@ -65,7 +65,9 @@ public class FourFlagsIntEncoder extends ChunksIntEncoder {
}
@Override
protected void doEncode(IntsRef values, BytesRef buf, int upto) {
public void encode(IntsRef values, BytesRef buf) {
buf.offset = buf.length = 0;
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
int value = values.ints[i];
if (value <= 3) {
@ -94,7 +96,7 @@ public class FourFlagsIntEncoder extends ChunksIntEncoder {
@Override
public String toString() {
return "FourFlags (VInt)";
return "FourFlags(VInt)";
}
}

View File

@ -27,44 +27,10 @@ import org.apache.lucene.util.IntsRef;
*/
public abstract class IntDecoder {
/**
* Performs the actual decoding. Values should be read from
* {@link BytesRef#offset} up to {@code upto}. Also, {@code values} offset and
* length are set to 0 and the encoder is expected to update
* {@link IntsRef#length}, but not {@link IntsRef#offset}.
*
* <p>
* <b>NOTE:</b> it is ok to use the buffer's offset as the current position in
* the buffer (and modify it), it will be reset by
* {@link #decode(BytesRef, IntsRef)}.
*/
protected abstract void doDecode(BytesRef buf, IntsRef values, int upto);
/**
* Called before {@link #doDecode(BytesRef, IntsRef, int)} so that decoders
* can reset their state.
*/
protected void reset() {
// do nothing by default
}
/**
* Decodes the values from the buffer into the given {@link IntsRef}. Note
* that {@code values.offset} and {@code values.length} are set to 0.
*/
public final void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0; // must do that because we cannot grow() them otherwise
// some decoders may use the buffer's offset as a position index, so save
// current offset.
int bufOffset = buf.offset;
reset();
doDecode(buf, values, buf.offset + buf.length);
assert values.offset == 0 : "offset should not have been modified by the decoder.";
// fix offset
buf.offset = bufOffset;
}
public abstract void decode(BytesRef buf, IntsRef values);
}

View File

@ -31,32 +31,11 @@ public abstract class IntEncoder {
public IntEncoder() {}
/**
* Performs the actual encoding. Values should be read from
* {@link IntsRef#offset} up to {@code upto}. Also, it is guaranteed that
* {@code buf's} offset and length are set to 0 and the encoder is expected to
* update {@link BytesRef#length}, but not {@link BytesRef#offset}.
*/
protected abstract void doEncode(IntsRef values, BytesRef buf, int upto);
/**
* Called before {@link #doEncode(IntsRef, BytesRef, int)} so that encoders
* can reset their state.
*/
protected void reset() {
// do nothing by default
}
/**
* Encodes the values to the given buffer. Note that the buffer's offset and
* length are set to 0.
*/
public final void encode(IntsRef values, BytesRef buf) {
buf.offset = buf.length = 0;
reset();
doEncode(values, buf, values.offset + values.length);
assert buf.offset == 0 : "offset should not have been modified by the encoder.";
}
public abstract void encode(IntsRef values, BytesRef buf);
/**
* Returns an {@link IntDecoder} which can decode the values that were encoded

View File

@ -31,9 +31,4 @@ public abstract class IntEncoderFilter extends IntEncoder {
this.encoder = encoder;
}
@Override
public void reset() {
encoder.reset();
}
}

View File

@ -42,14 +42,10 @@ public class NOnesIntDecoder extends FourFlagsIntDecoder {
}
@Override
protected void reset() {
public void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0;
internalBuffer.length = 0;
super.reset();
}
@Override
protected void doDecode(BytesRef buf, IntsRef values, int upto) {
super.doDecode(buf, internalBuffer, upto);
super.decode(buf, internalBuffer);
if (values.ints.length < internalBuffer.length) {
// need space for internalBuffer.length to internalBuffer.length*N,
// grow mildly at first
@ -84,7 +80,7 @@ public class NOnesIntDecoder extends FourFlagsIntDecoder {
@Override
public String toString() {
return "NOnes (" + n + ") (" + super.toString() + ")";
return "NOnes(" + n + ") (" + super.toString() + ")";
}
}

View File

@ -65,19 +65,15 @@ public class NOnesIntEncoder extends FourFlagsIntEncoder {
}
@Override
protected void reset() {
public void encode(IntsRef values, BytesRef buf) {
internalBuffer.length = 0;
super.reset();
}
@Override
protected void doEncode(IntsRef values, BytesRef buf, int upto) {
// make sure the internal buffer is large enough
if (values.length > internalBuffer.ints.length) {
internalBuffer.grow(values.length);
}
int onesCounter = 0;
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
int value = values.ints[i];
if (value == 1) {
@ -102,7 +98,7 @@ public class NOnesIntEncoder extends FourFlagsIntEncoder {
--onesCounter;
internalBuffer.ints[internalBuffer.length++] = 1;
}
super.doEncode(internalBuffer, buf, internalBuffer.length);
super.encode(internalBuffer, buf);
}
@Override
@ -112,7 +108,7 @@ public class NOnesIntEncoder extends FourFlagsIntEncoder {
@Override
public String toString() {
return "NOnes (" + n + ") (" + super.toString() + ")";
return "NOnes(" + n + ") (" + super.toString() + ")";
}
}

View File

@ -1,7 +1,9 @@
package org.apache.lucene.util.encoding;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,19 +27,24 @@ import org.apache.lucene.util.IntsRef;
*
* @lucene.experimental
*/
public class SimpleIntDecoder extends IntDecoder {
public final class SimpleIntDecoder extends IntDecoder {
@Override
protected void doDecode(BytesRef buf, IntsRef values, int upto) {
while (buf.offset < upto) {
if (values.length == values.ints.length) {
values.grow(values.length + 10); // grow by few items, however not too many
}
public void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0;
int numValues = buf.length / 4; // every value is 4 bytes
if (values.ints.length < numValues) { // offset and length are 0
values.ints = new int[ArrayUtil.oversize(numValues, RamUsageEstimator.NUM_BYTES_INT)];
}
int offset = buf.offset;
int upto = buf.offset + buf.length;
while (offset < upto) {
values.ints[values.length++] =
((buf.bytes[buf.offset++] & 0xFF) << 24) |
((buf.bytes[buf.offset++] & 0xFF) << 16) |
((buf.bytes[buf.offset++] & 0xFF) << 8) |
(buf.bytes[buf.offset++] & 0xFF);
((buf.bytes[offset++] & 0xFF) << 24) |
((buf.bytes[offset++] & 0xFF) << 16) |
((buf.bytes[offset++] & 0xFF) << 8) |
(buf.bytes[offset++] & 0xFF);
}
}

View File

@ -25,16 +25,18 @@ import org.apache.lucene.util.IntsRef;
*
* @lucene.experimental
*/
public class SimpleIntEncoder extends IntEncoder {
public final class SimpleIntEncoder extends IntEncoder {
@Override
protected void doEncode(IntsRef values, BytesRef buf, int upto) {
public void encode(IntsRef values, BytesRef buf) {
buf.offset = buf.length = 0;
// ensure there's enough room in the buffer
int bytesNeeded = values.length * 4;
if (buf.bytes.length < bytesNeeded) {
buf.grow(bytesNeeded);
}
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
int value = values.ints[i];
buf.bytes[buf.length++] = (byte) (value >>> 24);

View File

@ -28,7 +28,7 @@ import org.apache.lucene.util.IntsRef;
*
* @lucene.experimental
*/
public class SortingIntEncoder extends IntEncoderFilter {
public final class SortingIntEncoder extends IntEncoderFilter {
/** Initializes with the given encoder. */
public SortingIntEncoder(IntEncoder encoder) {
@ -36,9 +36,9 @@ public class SortingIntEncoder extends IntEncoderFilter {
}
@Override
protected void doEncode(IntsRef values, BytesRef buf, int upto) {
Arrays.sort(values.ints, values.offset, upto);
encoder.doEncode(values, buf, upto);
public void encode(IntsRef values, BytesRef buf) {
Arrays.sort(values.ints, values.offset, values.offset + values.length);
encoder.encode(values, buf);
}
@Override
@ -48,7 +48,7 @@ public class SortingIntEncoder extends IntEncoderFilter {
@Override
public String toString() {
return "Sorting (" + encoder.toString() + ")";
return "Sorting(" + encoder.toString() + ")";
}
}

View File

@ -36,9 +36,10 @@ public final class UniqueValuesIntEncoder extends IntEncoderFilter {
}
@Override
protected void doEncode(IntsRef values, BytesRef buf, int upto) {
public void encode(IntsRef values, BytesRef buf) {
int prev = values.ints[values.offset];
int idx = values.offset + 1;
int upto = values.offset + values.length;
for (int i = idx; i < upto; i++) {
if (values.ints[i] != prev) {
values.ints[idx++] = values.ints[i];
@ -46,7 +47,7 @@ public final class UniqueValuesIntEncoder extends IntEncoderFilter {
}
}
values.length = idx - values.offset;
encoder.doEncode(values, buf, idx);
encoder.encode(values, buf);
}
@Override
@ -56,7 +57,7 @@ public final class UniqueValuesIntEncoder extends IntEncoderFilter {
@Override
public String toString() {
return "Unique (" + encoder.toString() + ")";
return "Unique(" + encoder.toString() + ")";
}
}

View File

@ -1,138 +0,0 @@
package org.apache.lucene.util.encoding;
import org.apache.lucene.util.BytesRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Variable-length encoding of 32-bit integers, into 8-bit bytes. A number is
* encoded as follows:
* <ul>
* <li>If it is less than 127 and non-negative (i.e., if the number uses only 7
* bits), it is encoded as as single byte: 0bbbbbbb.
* <li>If its highest nonzero bit is greater than bit 6 (0x40), it is
* represented as a series of bytes, each byte's 7 LSB containing bits from the
* original value, with the MSB set for all but the last byte. The first encoded
* byte contains the highest nonzero bits from the original; the second byte
* contains the next 7 MSB; and so on, with the last byte containing the 7 LSB
* of the original.
* </ul>
* Examples:
* <ol>
* <li>n = 117 = 1110101: This has fewer than 8 significant bits, and so is
* encoded as 01110101 = 0x75.
* <li>n = 100000 = (binary) 11000011010100000. This has 17 significant bits,
* and so needs three Vint8 bytes. Left-zero-pad it to a multiple of 7 bits,
* then split it into chunks of 7 and add an MSB, 0 for the last byte, 1 for the
* others: 1|0000110 1|0001101 0|0100000 = 0x86 0x8D 0x20.
* </ol>
* {@link #encode(int, BytesRef)} and {@link #decode(BytesRef)} will correctly
* handle any 32-bit integer, but for negative numbers, and positive numbers
* with more than 28 significant bits, encoding requires 5 bytes; this is not an
* efficient encoding scheme for large positive numbers or any negative number.
*
* @lucene.experimental
*/
public class VInt8 {
/** The maximum number of bytes needed to encode an integer. */
public static final int MAXIMUM_BYTES_NEEDED = 5;
/**
* Decodes an int from the given bytes, starting at {@link BytesRef#offset}.
* Returns the decoded bytes and updates {@link BytesRef#offset}.
*/
public static int decode(BytesRef bytes) {
/*
This is the original code of this method, but a Hotspot bug
corrupted the for-loop of DataInput.readVInt() (see LUCENE-2975)
so the loop was unwounded here too, to be on the safe side
int value = 0;
while (true) {
byte first = bytes.bytes[bytes.offset++];
value |= first & 0x7F;
if ((first & 0x80) == 0) {
return value;
}
value <<= 7;
}
*/
// byte 1
byte b = bytes.bytes[bytes.offset++];
if (b >= 0) return b;
// byte 2
int value = b & 0x7F;
b = bytes.bytes[bytes.offset++];
value = (value << 7) | b & 0x7F;
if (b >= 0) return value;
// byte 3
b = bytes.bytes[bytes.offset++];
value = (value << 7) | b & 0x7F;
if (b >= 0) return value;
// byte 4
b = bytes.bytes[bytes.offset++];
value = (value << 7) | b & 0x7F;
if (b >= 0) return value;
// byte 5
b = bytes.bytes[bytes.offset++];
return (value << 7) | b & 0x7F;
}
/**
* Encodes the given number into bytes, starting at {@link BytesRef#length}.
* Assumes that the array is large enough.
*/
public static void encode(int value, BytesRef bytes) {
if ((value & ~0x7F) == 0) {
bytes.bytes[bytes.length] = (byte) value;
bytes.length++;
} else if ((value & ~0x3FFF) == 0) {
bytes.bytes[bytes.length] = (byte) (0x80 | ((value & 0x3F80) >> 7));
bytes.bytes[bytes.length + 1] = (byte) (value & 0x7F);
bytes.length += 2;
} else if ((value & ~0x1FFFFF) == 0) {
bytes.bytes[bytes.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
bytes.bytes[bytes.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7));
bytes.bytes[bytes.length + 2] = (byte) (value & 0x7F);
bytes.length += 3;
} else if ((value & ~0xFFFFFFF) == 0) {
bytes.bytes[bytes.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
bytes.bytes[bytes.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
bytes.bytes[bytes.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7));
bytes.bytes[bytes.length + 3] = (byte) (value & 0x7F);
bytes.length += 4;
} else {
bytes.bytes[bytes.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28));
bytes.bytes[bytes.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
bytes.bytes[bytes.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
bytes.bytes[bytes.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7));
bytes.bytes[bytes.length + 4] = (byte) (value & 0x7F);
bytes.length += 5;
}
}
private VInt8() {
// Just making it impossible to instantiate.
}
}

View File

@ -1,7 +1,9 @@
package org.apache.lucene.util.encoding;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.RamUsageEstimator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,15 +27,32 @@ import org.apache.lucene.util.IntsRef;
*
* @lucene.experimental
*/
public class VInt8IntDecoder extends IntDecoder {
public final class VInt8IntDecoder extends IntDecoder {
@Override
protected void doDecode(BytesRef buf, IntsRef values, int upto) {
while (buf.offset < upto) {
if (values.length == values.ints.length) {
values.grow(values.length + 10); // grow by few items, however not too many
public void decode(BytesRef buf, IntsRef values) {
values.offset = values.length = 0;
// grow the buffer up front, even if by a large number of values (buf.length)
// that saves the need to check inside the loop for every decoded value if
// the buffer needs to grow.
if (values.ints.length < buf.length) {
values.ints = new int[ArrayUtil.oversize(buf.length, RamUsageEstimator.NUM_BYTES_INT)];
}
// it is better if the decoding is inlined like so, and not e.g.
// in a utility method
int upto = buf.offset + buf.length;
int value = 0;
int offset = buf.offset;
while (offset < upto) {
byte b = buf.bytes[offset++];
if (b >= 0) {
values.ints[values.length++] = (value << 7) | b;
value = 0;
} else {
value = (value << 7) | (b & 0x7F);
}
values.ints[values.length++] = VInt8.decode(buf);
}
}

View File

@ -47,17 +47,47 @@ import org.apache.lucene.util.IntsRef;
*
* @lucene.experimental
*/
public class VInt8IntEncoder extends IntEncoder {
public final class VInt8IntEncoder extends IntEncoder {
@Override
protected void doEncode(IntsRef values, BytesRef buf, int upto) {
public void encode(IntsRef values, BytesRef buf) {
buf.offset = buf.length = 0;
int maxBytesNeeded = 5 * values.length; // at most 5 bytes per VInt
if (buf.bytes.length < maxBytesNeeded) {
buf.grow(maxBytesNeeded);
}
int upto = values.offset + values.length;
for (int i = values.offset; i < upto; i++) {
VInt8.encode(values.ints[i], buf);
// it is better if the encoding is inlined like so, and not e.g.
// in a utility method
int value = values.ints[i];
if ((value & ~0x7F) == 0) {
buf.bytes[buf.length] = (byte) value;
buf.length++;
} else if ((value & ~0x3FFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 1] = (byte) (value & 0x7F);
buf.length += 2;
} else if ((value & ~0x1FFFFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 2] = (byte) (value & 0x7F);
buf.length += 3;
} else if ((value & ~0xFFFFFFF) == 0) {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 3] = (byte) (value & 0x7F);
buf.length += 4;
} else {
buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28));
buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21));
buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14));
buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7));
buf.bytes[buf.length + 4] = (byte) (value & 0x7F);
buf.length += 5;
}
}
}

View File

@ -1,6 +1,12 @@
package org.apache.lucene.facet.index.params;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.encoding.DGapIntEncoder;
import org.apache.lucene.util.encoding.IntDecoder;
import org.apache.lucene.util.encoding.IntEncoder;
import org.apache.lucene.util.encoding.SortingIntEncoder;
import org.apache.lucene.util.encoding.UniqueValuesIntEncoder;
import org.apache.lucene.util.encoding.VInt8IntEncoder;
import org.junit.Test;
/*
@ -26,8 +32,10 @@ public class CategoryListParamsTest extends LuceneTestCase {
public void testDefaultSettings() {
CategoryListParams clp = new CategoryListParams();
assertEquals("wrong default field", "$facets", clp.field);
assertEquals("unexpected default encoder", "Sorting (Unique (DGap (VInt8)))", clp.createEncoder().toString());
assertEquals("unexpected default decoder", "DGap (VInt8)", clp.createEncoder().createMatchingDecoder().toString());
IntEncoder encoder = new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder())));
IntDecoder decoder = encoder.createMatchingDecoder();
assertEquals("unexpected default encoder", encoder.toString(), clp.createEncoder().toString());
assertEquals("unexpected default decoder", decoder.toString(), clp.createEncoder().createMatchingDecoder().toString());
}
/**

View File

@ -64,9 +64,12 @@ public class EncodingTest extends LuceneTestCase {
BytesRef bytes = new BytesRef(100); // some initial capacity - encoders should grow the byte[]
IntsRef values = new IntsRef(100); // some initial capacity - decoders should grow the int[]
encoding(encoder, data, bytes);
decoding(bytes, values, encoder.createMatchingDecoder());
assertTrue(expected.intsEquals(values));
for (int i = 0; i < 2; i++) {
// run 2 iterations to catch encoders/decoders which don't reset properly
encoding(encoder, data, bytes);
decoding(bytes, values, encoder.createMatchingDecoder());
assertTrue(expected.intsEquals(values));
}
}
private static void encoding(IntEncoder encoder, IntsRef data, BytesRef bytes) throws IOException {

View File

@ -1,54 +0,0 @@
package org.apache.lucene.util.encoding;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Tests the {@link VInt8} class.
*/
public class Vint8Test extends LuceneTestCase {
private static final int[] TEST_VALUES = {
-1000000000,
-1, 0, (1 << 7) - 1, 1 << 7, (1 << 14) - 1, 1 << 14,
(1 << 21) - 1, 1 << 21, (1 << 28) - 1, 1 << 28
};
private static int[] BYTES_NEEDED_TEST_VALUES = {
5, 5, 1, 1, 2, 2, 3, 3, 4, 4, 5
};
@Test
public void testBytesRef() throws Exception {
BytesRef bytes = new BytesRef(256);
int expectedSize = 0;
for (int j = 0; j < TEST_VALUES.length; j++) {
VInt8.encode(TEST_VALUES[j], bytes);
expectedSize += BYTES_NEEDED_TEST_VALUES[j];
}
assertEquals(expectedSize, bytes.length);
for (int j = 0; j < TEST_VALUES.length; j++) {
assertEquals(TEST_VALUES[j], VInt8.decode(bytes));
}
assertEquals(bytes.offset, bytes.length);
}
}