diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/ChunksIntEncoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/ChunksIntEncoder.java index a19550c29e5..8829f2b9e57 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/ChunksIntEncoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/ChunksIntEncoder.java @@ -49,20 +49,20 @@ import org.apache.lucene.util.IntsRef; * @lucene.experimental */ public abstract class ChunksIntEncoder extends IntEncoder { - + /** Holds the values which must be encoded, outside the indicator. */ protected final IntsRef encodeQueue; - + /** Represents bits flag byte. */ protected int indicator = 0; - + /** Counts the current ordinal of the encoded value. */ protected byte ordinal = 0; - + protected ChunksIntEncoder(int chunkSize) { encodeQueue = new IntsRef(chunkSize); } - + /** * Encodes the values of the current chunk. First it writes the indicator, and * then it encodes the values outside the indicator. @@ -76,17 +76,40 @@ public abstract class ChunksIntEncoder extends IntEncoder { buf.bytes[buf.length++] = ((byte) indicator); for (int i = 0; i < encodeQueue.length; i++) { - VInt8.encode(encodeQueue.ints[i], buf); + // it is better if the encoding is inlined like so, and not e.g. + // in a utility method + int value = encodeQueue.ints[i]; + if ((value & ~0x7F) == 0) { + buf.bytes[buf.length] = (byte) value; + buf.length++; + } else if ((value & ~0x3FFF) == 0) { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 1] = (byte) (value & 0x7F); + buf.length += 2; + } else if ((value & ~0x1FFFFF) == 0) { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); + buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 2] = (byte) (value & 0x7F); + buf.length += 3; + } else if ((value & ~0xFFFFFFF) == 0) { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21)); + buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); + buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 3] = (byte) (value & 0x7F); + buf.length += 4; + } else { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28)); + buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21)); + buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); + buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 4] = (byte) (value & 0x7F); + buf.length += 5; + } } - reset(); - } - - @Override - protected void reset() { ordinal = 0; indicator = 0; encodeQueue.length = 0; } - + } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntDecoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntDecoder.java index a5b2fb3c28c..5e908206800 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntDecoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntDecoder.java @@ -26,7 +26,7 @@ import org.apache.lucene.util.IntsRef; * * @lucene.experimental */ -public class DGapIntDecoder extends IntDecoder { +public final class DGapIntDecoder extends IntDecoder { private final IntDecoder decoder; @@ -35,13 +35,8 @@ public class DGapIntDecoder extends IntDecoder { } @Override - protected void reset() { - decoder.reset(); - } - - @Override - protected void doDecode(BytesRef buf, IntsRef values, int upto) { - decoder.doDecode(buf, values, upto); + public void decode(BytesRef buf, IntsRef values) { + decoder.decode(buf, values); int prev = 0; for (int i = 0; i < values.length; i++) { values.ints[i] += prev; @@ -51,7 +46,7 @@ public class DGapIntDecoder extends IntDecoder { @Override public String toString() { - return "DGap (" + decoder.toString() + ")"; + return "DGap(" + decoder.toString() + ")"; } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntEncoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntEncoder.java index 305f975c619..5e8ca5d293d 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntEncoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntEncoder.java @@ -35,7 +35,7 @@ import org.apache.lucene.util.IntsRef; * * @lucene.experimental */ -public class DGapIntEncoder extends IntEncoderFilter { +public final class DGapIntEncoder extends IntEncoderFilter { /** Initializes with the given encoder. */ public DGapIntEncoder(IntEncoder encoder) { @@ -43,14 +43,15 @@ public class DGapIntEncoder extends IntEncoderFilter { } @Override - protected void doEncode(IntsRef values, BytesRef buf, int upto) { + public void encode(IntsRef values, BytesRef buf) { int prev = 0; + int upto = values.offset + values.length; for (int i = values.offset; i < upto; i++) { int tmp = values.ints[i]; values.ints[i] -= prev; prev = tmp; } - encoder.doEncode(values, buf, upto); + encoder.encode(values, buf); } @Override @@ -60,7 +61,7 @@ public class DGapIntEncoder extends IntEncoderFilter { @Override public String toString() { - return "DGap (" + encoder.toString() + ")"; + return "DGap(" + encoder.toString() + ")"; } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntDecoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntDecoder.java index 270fcffca52..317185f44e3 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntDecoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntDecoder.java @@ -45,10 +45,13 @@ public class EightFlagsIntDecoder extends IntDecoder { } @Override - protected void doDecode(BytesRef buf, IntsRef values, int upto) { - while (buf.offset < upto) { + public void decode(BytesRef buf, IntsRef values) { + values.offset = values.length = 0; + int upto = buf.offset + buf.length; + int offset = buf.offset; + while (offset < upto) { // read indicator - int indicator = buf.bytes[buf.offset++] & 0xFF; + int indicator = buf.bytes[offset++] & 0xFF; int ordinal = 0; int capacityNeeded = values.length + 8; @@ -59,11 +62,21 @@ public class EightFlagsIntDecoder extends IntDecoder { // process indicator, until we read 8 values, or end-of-buffer while (ordinal != 8) { if (DECODE_TABLE[indicator][ordinal++] == 0) { - if (buf.offset == upto) { // end of buffer + if (offset == upto) { // end of buffer return; } - // decode the value from the stream. - values.ints[values.length++] = VInt8.decode(buf) + 2; + // it is better if the decoding is inlined like so, and not e.g. + // in a utility method + int value = 0; + while (true) { + byte b = buf.bytes[offset++]; + if (b >= 0) { + values.ints[values.length++] = ((value << 7) | b) + 2; + break; + } else { + value = (value << 7) | (b & 0x7F); + } + } } else { values.ints[values.length++] = 1; } @@ -73,7 +86,7 @@ public class EightFlagsIntDecoder extends IntDecoder { @Override public String toString() { - return "EightFlags (VInt8)"; + return "EightFlags(VInt8)"; } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntEncoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntEncoder.java index 143660a4742..812d86d1003 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntEncoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntEncoder.java @@ -59,7 +59,9 @@ public class EightFlagsIntEncoder extends ChunksIntEncoder { } @Override - protected void doEncode(IntsRef values, BytesRef buf, int upto) { + public void encode(IntsRef values, BytesRef buf) { + buf.offset = buf.length = 0; + int upto = values.offset + values.length; for (int i = values.offset; i < upto; i++) { int value = values.ints[i]; if (value == 1) { @@ -88,7 +90,7 @@ public class EightFlagsIntEncoder extends ChunksIntEncoder { @Override public String toString() { - return "EightFlags (VInt)"; + return "EightFlags(VInt)"; } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntDecoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntDecoder.java index ebc161ac9cd..5d1f957335e 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntDecoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntDecoder.java @@ -45,10 +45,13 @@ public class FourFlagsIntDecoder extends IntDecoder { } @Override - protected void doDecode(BytesRef buf, IntsRef values, int upto) { - while (buf.offset < upto) { + public void decode(BytesRef buf, IntsRef values) { + values.offset = values.length = 0; + int upto = buf.offset + buf.length; + int offset = buf.offset; + while (offset < upto) { // read indicator - int indicator = buf.bytes[buf.offset++] & 0xFF; + int indicator = buf.bytes[offset++] & 0xFF; int ordinal = 0; int capacityNeeded = values.length + 4; @@ -59,11 +62,21 @@ public class FourFlagsIntDecoder extends IntDecoder { while (ordinal != 4) { byte decodeVal = DECODE_TABLE[indicator][ordinal++]; if (decodeVal == 0) { - if (buf.offset == upto) { // end of buffer + if (offset == upto) { // end of buffer return; } - // decode the value from the stream. - values.ints[values.length++] = VInt8.decode(buf) + 4; + // it is better if the decoding is inlined like so, and not e.g. + // in a utility method + int value = 0; + while (true) { + byte b = buf.bytes[offset++]; + if (b >= 0) { + values.ints[values.length++] = ((value << 7) | b) + 4; + break; + } else { + value = (value << 7) | (b & 0x7F); + } + } } else { values.ints[values.length++] = decodeVal; } @@ -73,7 +86,7 @@ public class FourFlagsIntDecoder extends IntDecoder { @Override public String toString() { - return "FourFlags (VInt8)"; + return "FourFlags(VInt8)"; } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntEncoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntEncoder.java index 535a90fb60c..fd822651000 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntEncoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntEncoder.java @@ -65,7 +65,9 @@ public class FourFlagsIntEncoder extends ChunksIntEncoder { } @Override - protected void doEncode(IntsRef values, BytesRef buf, int upto) { + public void encode(IntsRef values, BytesRef buf) { + buf.offset = buf.length = 0; + int upto = values.offset + values.length; for (int i = values.offset; i < upto; i++) { int value = values.ints[i]; if (value <= 3) { @@ -94,7 +96,7 @@ public class FourFlagsIntEncoder extends ChunksIntEncoder { @Override public String toString() { - return "FourFlags (VInt)"; + return "FourFlags(VInt)"; } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/IntDecoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/IntDecoder.java index acf83cc6158..e89c7557039 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/IntDecoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/IntDecoder.java @@ -27,44 +27,10 @@ import org.apache.lucene.util.IntsRef; */ public abstract class IntDecoder { - /** - * Performs the actual decoding. Values should be read from - * {@link BytesRef#offset} up to {@code upto}. Also, {@code values} offset and - * length are set to 0 and the encoder is expected to update - * {@link IntsRef#length}, but not {@link IntsRef#offset}. - * - *

- * NOTE: it is ok to use the buffer's offset as the current position in - * the buffer (and modify it), it will be reset by - * {@link #decode(BytesRef, IntsRef)}. - */ - protected abstract void doDecode(BytesRef buf, IntsRef values, int upto); - - /** - * Called before {@link #doDecode(BytesRef, IntsRef, int)} so that decoders - * can reset their state. - */ - protected void reset() { - // do nothing by default - } - /** * Decodes the values from the buffer into the given {@link IntsRef}. Note * that {@code values.offset} and {@code values.length} are set to 0. */ - public final void decode(BytesRef buf, IntsRef values) { - values.offset = values.length = 0; // must do that because we cannot grow() them otherwise - - // some decoders may use the buffer's offset as a position index, so save - // current offset. - int bufOffset = buf.offset; - - reset(); - doDecode(buf, values, buf.offset + buf.length); - assert values.offset == 0 : "offset should not have been modified by the decoder."; - - // fix offset - buf.offset = bufOffset; - } + public abstract void decode(BytesRef buf, IntsRef values); } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoder.java index 0a3197d6c6b..64e2878cbf7 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoder.java @@ -31,32 +31,11 @@ public abstract class IntEncoder { public IntEncoder() {} - /** - * Performs the actual encoding. Values should be read from - * {@link IntsRef#offset} up to {@code upto}. Also, it is guaranteed that - * {@code buf's} offset and length are set to 0 and the encoder is expected to - * update {@link BytesRef#length}, but not {@link BytesRef#offset}. - */ - protected abstract void doEncode(IntsRef values, BytesRef buf, int upto); - - /** - * Called before {@link #doEncode(IntsRef, BytesRef, int)} so that encoders - * can reset their state. - */ - protected void reset() { - // do nothing by default - } - /** * Encodes the values to the given buffer. Note that the buffer's offset and * length are set to 0. */ - public final void encode(IntsRef values, BytesRef buf) { - buf.offset = buf.length = 0; - reset(); - doEncode(values, buf, values.offset + values.length); - assert buf.offset == 0 : "offset should not have been modified by the encoder."; - } + public abstract void encode(IntsRef values, BytesRef buf); /** * Returns an {@link IntDecoder} which can decode the values that were encoded diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoderFilter.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoderFilter.java index ee2e5db7e9e..c1fa04b5dc9 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoderFilter.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoderFilter.java @@ -31,9 +31,4 @@ public abstract class IntEncoderFilter extends IntEncoder { this.encoder = encoder; } - @Override - public void reset() { - encoder.reset(); - } - } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntDecoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntDecoder.java index 1cf33857280..37e52e9a815 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntDecoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntDecoder.java @@ -42,14 +42,10 @@ public class NOnesIntDecoder extends FourFlagsIntDecoder { } @Override - protected void reset() { + public void decode(BytesRef buf, IntsRef values) { + values.offset = values.length = 0; internalBuffer.length = 0; - super.reset(); - } - - @Override - protected void doDecode(BytesRef buf, IntsRef values, int upto) { - super.doDecode(buf, internalBuffer, upto); + super.decode(buf, internalBuffer); if (values.ints.length < internalBuffer.length) { // need space for internalBuffer.length to internalBuffer.length*N, // grow mildly at first @@ -84,7 +80,7 @@ public class NOnesIntDecoder extends FourFlagsIntDecoder { @Override public String toString() { - return "NOnes (" + n + ") (" + super.toString() + ")"; + return "NOnes(" + n + ") (" + super.toString() + ")"; } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntEncoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntEncoder.java index 956eea253a3..5a705c5ecf7 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntEncoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntEncoder.java @@ -65,19 +65,15 @@ public class NOnesIntEncoder extends FourFlagsIntEncoder { } @Override - protected void reset() { + public void encode(IntsRef values, BytesRef buf) { internalBuffer.length = 0; - super.reset(); - } - - @Override - protected void doEncode(IntsRef values, BytesRef buf, int upto) { // make sure the internal buffer is large enough if (values.length > internalBuffer.ints.length) { internalBuffer.grow(values.length); } int onesCounter = 0; + int upto = values.offset + values.length; for (int i = values.offset; i < upto; i++) { int value = values.ints[i]; if (value == 1) { @@ -102,7 +98,7 @@ public class NOnesIntEncoder extends FourFlagsIntEncoder { --onesCounter; internalBuffer.ints[internalBuffer.length++] = 1; } - super.doEncode(internalBuffer, buf, internalBuffer.length); + super.encode(internalBuffer, buf); } @Override @@ -112,7 +108,7 @@ public class NOnesIntEncoder extends FourFlagsIntEncoder { @Override public String toString() { - return "NOnes (" + n + ") (" + super.toString() + ")"; + return "NOnes(" + n + ") (" + super.toString() + ")"; } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntDecoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntDecoder.java index af6fce26af7..fd7a79e38c4 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntDecoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntDecoder.java @@ -1,7 +1,9 @@ package org.apache.lucene.util.encoding; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.RamUsageEstimator; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -25,19 +27,24 @@ import org.apache.lucene.util.IntsRef; * * @lucene.experimental */ -public class SimpleIntDecoder extends IntDecoder { +public final class SimpleIntDecoder extends IntDecoder { @Override - protected void doDecode(BytesRef buf, IntsRef values, int upto) { - while (buf.offset < upto) { - if (values.length == values.ints.length) { - values.grow(values.length + 10); // grow by few items, however not too many - } + public void decode(BytesRef buf, IntsRef values) { + values.offset = values.length = 0; + int numValues = buf.length / 4; // every value is 4 bytes + if (values.ints.length < numValues) { // offset and length are 0 + values.ints = new int[ArrayUtil.oversize(numValues, RamUsageEstimator.NUM_BYTES_INT)]; + } + + int offset = buf.offset; + int upto = buf.offset + buf.length; + while (offset < upto) { values.ints[values.length++] = - ((buf.bytes[buf.offset++] & 0xFF) << 24) | - ((buf.bytes[buf.offset++] & 0xFF) << 16) | - ((buf.bytes[buf.offset++] & 0xFF) << 8) | - (buf.bytes[buf.offset++] & 0xFF); + ((buf.bytes[offset++] & 0xFF) << 24) | + ((buf.bytes[offset++] & 0xFF) << 16) | + ((buf.bytes[offset++] & 0xFF) << 8) | + (buf.bytes[offset++] & 0xFF); } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntEncoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntEncoder.java index fd6a0206117..ae0b2958b30 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntEncoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntEncoder.java @@ -25,16 +25,18 @@ import org.apache.lucene.util.IntsRef; * * @lucene.experimental */ -public class SimpleIntEncoder extends IntEncoder { +public final class SimpleIntEncoder extends IntEncoder { @Override - protected void doEncode(IntsRef values, BytesRef buf, int upto) { + public void encode(IntsRef values, BytesRef buf) { + buf.offset = buf.length = 0; // ensure there's enough room in the buffer int bytesNeeded = values.length * 4; if (buf.bytes.length < bytesNeeded) { buf.grow(bytesNeeded); } + int upto = values.offset + values.length; for (int i = values.offset; i < upto; i++) { int value = values.ints[i]; buf.bytes[buf.length++] = (byte) (value >>> 24); diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/SortingIntEncoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/SortingIntEncoder.java index 0ebb06efa85..128542087b9 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/SortingIntEncoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/SortingIntEncoder.java @@ -28,7 +28,7 @@ import org.apache.lucene.util.IntsRef; * * @lucene.experimental */ -public class SortingIntEncoder extends IntEncoderFilter { +public final class SortingIntEncoder extends IntEncoderFilter { /** Initializes with the given encoder. */ public SortingIntEncoder(IntEncoder encoder) { @@ -36,9 +36,9 @@ public class SortingIntEncoder extends IntEncoderFilter { } @Override - protected void doEncode(IntsRef values, BytesRef buf, int upto) { - Arrays.sort(values.ints, values.offset, upto); - encoder.doEncode(values, buf, upto); + public void encode(IntsRef values, BytesRef buf) { + Arrays.sort(values.ints, values.offset, values.offset + values.length); + encoder.encode(values, buf); } @Override @@ -48,7 +48,7 @@ public class SortingIntEncoder extends IntEncoderFilter { @Override public String toString() { - return "Sorting (" + encoder.toString() + ")"; + return "Sorting(" + encoder.toString() + ")"; } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/UniqueValuesIntEncoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/UniqueValuesIntEncoder.java index c9a6be5c848..2612c234c70 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/UniqueValuesIntEncoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/UniqueValuesIntEncoder.java @@ -36,9 +36,10 @@ public final class UniqueValuesIntEncoder extends IntEncoderFilter { } @Override - protected void doEncode(IntsRef values, BytesRef buf, int upto) { + public void encode(IntsRef values, BytesRef buf) { int prev = values.ints[values.offset]; int idx = values.offset + 1; + int upto = values.offset + values.length; for (int i = idx; i < upto; i++) { if (values.ints[i] != prev) { values.ints[idx++] = values.ints[i]; @@ -46,7 +47,7 @@ public final class UniqueValuesIntEncoder extends IntEncoderFilter { } } values.length = idx - values.offset; - encoder.doEncode(values, buf, idx); + encoder.encode(values, buf); } @Override @@ -56,7 +57,7 @@ public final class UniqueValuesIntEncoder extends IntEncoderFilter { @Override public String toString() { - return "Unique (" + encoder.toString() + ")"; + return "Unique(" + encoder.toString() + ")"; } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8.java deleted file mode 100644 index 267d52bae96..00000000000 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8.java +++ /dev/null @@ -1,138 +0,0 @@ -package org.apache.lucene.util.encoding; - -import org.apache.lucene.util.BytesRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Variable-length encoding of 32-bit integers, into 8-bit bytes. A number is - * encoded as follows: - *

- * Examples: - *
    - *
  1. n = 117 = 1110101: This has fewer than 8 significant bits, and so is - * encoded as 01110101 = 0x75. - *
  2. n = 100000 = (binary) 11000011010100000. This has 17 significant bits, - * and so needs three Vint8 bytes. Left-zero-pad it to a multiple of 7 bits, - * then split it into chunks of 7 and add an MSB, 0 for the last byte, 1 for the - * others: 1|0000110 1|0001101 0|0100000 = 0x86 0x8D 0x20. - *
- * {@link #encode(int, BytesRef)} and {@link #decode(BytesRef)} will correctly - * handle any 32-bit integer, but for negative numbers, and positive numbers - * with more than 28 significant bits, encoding requires 5 bytes; this is not an - * efficient encoding scheme for large positive numbers or any negative number. - * - * @lucene.experimental - */ -public class VInt8 { - - /** The maximum number of bytes needed to encode an integer. */ - public static final int MAXIMUM_BYTES_NEEDED = 5; - - /** - * Decodes an int from the given bytes, starting at {@link BytesRef#offset}. - * Returns the decoded bytes and updates {@link BytesRef#offset}. - */ - public static int decode(BytesRef bytes) { - /* - This is the original code of this method, but a Hotspot bug - corrupted the for-loop of DataInput.readVInt() (see LUCENE-2975) - so the loop was unwounded here too, to be on the safe side - int value = 0; - while (true) { - byte first = bytes.bytes[bytes.offset++]; - value |= first & 0x7F; - if ((first & 0x80) == 0) { - return value; - } - value <<= 7; - } - */ - - // byte 1 - byte b = bytes.bytes[bytes.offset++]; - if (b >= 0) return b; - - // byte 2 - int value = b & 0x7F; - b = bytes.bytes[bytes.offset++]; - value = (value << 7) | b & 0x7F; - if (b >= 0) return value; - - // byte 3 - b = bytes.bytes[bytes.offset++]; - value = (value << 7) | b & 0x7F; - if (b >= 0) return value; - - // byte 4 - b = bytes.bytes[bytes.offset++]; - value = (value << 7) | b & 0x7F; - if (b >= 0) return value; - - // byte 5 - b = bytes.bytes[bytes.offset++]; - return (value << 7) | b & 0x7F; - } - - /** - * Encodes the given number into bytes, starting at {@link BytesRef#length}. - * Assumes that the array is large enough. - */ - public static void encode(int value, BytesRef bytes) { - if ((value & ~0x7F) == 0) { - bytes.bytes[bytes.length] = (byte) value; - bytes.length++; - } else if ((value & ~0x3FFF) == 0) { - bytes.bytes[bytes.length] = (byte) (0x80 | ((value & 0x3F80) >> 7)); - bytes.bytes[bytes.length + 1] = (byte) (value & 0x7F); - bytes.length += 2; - } else if ((value & ~0x1FFFFF) == 0) { - bytes.bytes[bytes.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); - bytes.bytes[bytes.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7)); - bytes.bytes[bytes.length + 2] = (byte) (value & 0x7F); - bytes.length += 3; - } else if ((value & ~0xFFFFFFF) == 0) { - bytes.bytes[bytes.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21)); - bytes.bytes[bytes.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); - bytes.bytes[bytes.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7)); - bytes.bytes[bytes.length + 3] = (byte) (value & 0x7F); - bytes.length += 4; - } else { - bytes.bytes[bytes.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28)); - bytes.bytes[bytes.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21)); - bytes.bytes[bytes.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); - bytes.bytes[bytes.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7)); - bytes.bytes[bytes.length + 4] = (byte) (value & 0x7F); - bytes.length += 5; - } - } - - private VInt8() { - // Just making it impossible to instantiate. - } - -} diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntDecoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntDecoder.java index e9fe5600c9a..65a122ac7de 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntDecoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntDecoder.java @@ -1,7 +1,9 @@ package org.apache.lucene.util.encoding; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.RamUsageEstimator; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -25,15 +27,32 @@ import org.apache.lucene.util.IntsRef; * * @lucene.experimental */ -public class VInt8IntDecoder extends IntDecoder { +public final class VInt8IntDecoder extends IntDecoder { @Override - protected void doDecode(BytesRef buf, IntsRef values, int upto) { - while (buf.offset < upto) { - if (values.length == values.ints.length) { - values.grow(values.length + 10); // grow by few items, however not too many + public void decode(BytesRef buf, IntsRef values) { + values.offset = values.length = 0; + + // grow the buffer up front, even if by a large number of values (buf.length) + // that saves the need to check inside the loop for every decoded value if + // the buffer needs to grow. + if (values.ints.length < buf.length) { + values.ints = new int[ArrayUtil.oversize(buf.length, RamUsageEstimator.NUM_BYTES_INT)]; + } + + // it is better if the decoding is inlined like so, and not e.g. + // in a utility method + int upto = buf.offset + buf.length; + int value = 0; + int offset = buf.offset; + while (offset < upto) { + byte b = buf.bytes[offset++]; + if (b >= 0) { + values.ints[values.length++] = (value << 7) | b; + value = 0; + } else { + value = (value << 7) | (b & 0x7F); } - values.ints[values.length++] = VInt8.decode(buf); } } diff --git a/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntEncoder.java b/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntEncoder.java index 7c62bf3e035..cc4bc1332af 100644 --- a/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntEncoder.java +++ b/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntEncoder.java @@ -47,17 +47,47 @@ import org.apache.lucene.util.IntsRef; * * @lucene.experimental */ -public class VInt8IntEncoder extends IntEncoder { +public final class VInt8IntEncoder extends IntEncoder { @Override - protected void doEncode(IntsRef values, BytesRef buf, int upto) { + public void encode(IntsRef values, BytesRef buf) { + buf.offset = buf.length = 0; int maxBytesNeeded = 5 * values.length; // at most 5 bytes per VInt if (buf.bytes.length < maxBytesNeeded) { buf.grow(maxBytesNeeded); } + int upto = values.offset + values.length; for (int i = values.offset; i < upto; i++) { - VInt8.encode(values.ints[i], buf); + // it is better if the encoding is inlined like so, and not e.g. + // in a utility method + int value = values.ints[i]; + if ((value & ~0x7F) == 0) { + buf.bytes[buf.length] = (byte) value; + buf.length++; + } else if ((value & ~0x3FFF) == 0) { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 1] = (byte) (value & 0x7F); + buf.length += 2; + } else if ((value & ~0x1FFFFF) == 0) { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); + buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 2] = (byte) (value & 0x7F); + buf.length += 3; + } else if ((value & ~0xFFFFFFF) == 0) { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21)); + buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); + buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 3] = (byte) (value & 0x7F); + buf.length += 4; + } else { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28)); + buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21)); + buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); + buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 4] = (byte) (value & 0x7F); + buf.length += 5; + } } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java b/lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java index 49ca1b71fb5..0f97ab360ab 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java @@ -1,6 +1,12 @@ package org.apache.lucene.facet.index.params; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.encoding.DGapIntEncoder; +import org.apache.lucene.util.encoding.IntDecoder; +import org.apache.lucene.util.encoding.IntEncoder; +import org.apache.lucene.util.encoding.SortingIntEncoder; +import org.apache.lucene.util.encoding.UniqueValuesIntEncoder; +import org.apache.lucene.util.encoding.VInt8IntEncoder; import org.junit.Test; /* @@ -26,8 +32,10 @@ public class CategoryListParamsTest extends LuceneTestCase { public void testDefaultSettings() { CategoryListParams clp = new CategoryListParams(); assertEquals("wrong default field", "$facets", clp.field); - assertEquals("unexpected default encoder", "Sorting (Unique (DGap (VInt8)))", clp.createEncoder().toString()); - assertEquals("unexpected default decoder", "DGap (VInt8)", clp.createEncoder().createMatchingDecoder().toString()); + IntEncoder encoder = new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))); + IntDecoder decoder = encoder.createMatchingDecoder(); + assertEquals("unexpected default encoder", encoder.toString(), clp.createEncoder().toString()); + assertEquals("unexpected default decoder", decoder.toString(), clp.createEncoder().createMatchingDecoder().toString()); } /** diff --git a/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java b/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java index 57689d5731c..f9831c3a9ff 100644 --- a/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java +++ b/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java @@ -64,9 +64,12 @@ public class EncodingTest extends LuceneTestCase { BytesRef bytes = new BytesRef(100); // some initial capacity - encoders should grow the byte[] IntsRef values = new IntsRef(100); // some initial capacity - decoders should grow the int[] - encoding(encoder, data, bytes); - decoding(bytes, values, encoder.createMatchingDecoder()); - assertTrue(expected.intsEquals(values)); + for (int i = 0; i < 2; i++) { + // run 2 iterations to catch encoders/decoders which don't reset properly + encoding(encoder, data, bytes); + decoding(bytes, values, encoder.createMatchingDecoder()); + assertTrue(expected.intsEquals(values)); + } } private static void encoding(IntEncoder encoder, IntsRef data, BytesRef bytes) throws IOException { diff --git a/lucene/facet/src/test/org/apache/lucene/util/encoding/Vint8Test.java b/lucene/facet/src/test/org/apache/lucene/util/encoding/Vint8Test.java deleted file mode 100644 index f0556f3c718..00000000000 --- a/lucene/facet/src/test/org/apache/lucene/util/encoding/Vint8Test.java +++ /dev/null @@ -1,54 +0,0 @@ -package org.apache.lucene.util.encoding; - -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LuceneTestCase; -import org.junit.Test; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Tests the {@link VInt8} class. - */ -public class Vint8Test extends LuceneTestCase { - - private static final int[] TEST_VALUES = { - -1000000000, - -1, 0, (1 << 7) - 1, 1 << 7, (1 << 14) - 1, 1 << 14, - (1 << 21) - 1, 1 << 21, (1 << 28) - 1, 1 << 28 - }; - private static int[] BYTES_NEEDED_TEST_VALUES = { - 5, 5, 1, 1, 2, 2, 3, 3, 4, 4, 5 - }; - - @Test - public void testBytesRef() throws Exception { - BytesRef bytes = new BytesRef(256); - int expectedSize = 0; - for (int j = 0; j < TEST_VALUES.length; j++) { - VInt8.encode(TEST_VALUES[j], bytes); - expectedSize += BYTES_NEEDED_TEST_VALUES[j]; - } - assertEquals(expectedSize, bytes.length); - - for (int j = 0; j < TEST_VALUES.length; j++) { - assertEquals(TEST_VALUES[j], VInt8.decode(bytes)); - } - assertEquals(bytes.offset, bytes.length); - } - -}