mirror of https://github.com/apache/lucene.git
LUCENE-10193: Cut over more array access to VarHandles. (#402)
LZ4 is interesting because it used to read data in little-endian order even though Directory APIs were big endian. So most calls to LZ4 in backward-codecs have been changed to change the endianness of the input/output.
This commit is contained in:
parent
6bb2bbcd6a
commit
0e1f9fcf31
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.backward_codecs.lucene40.blocktree;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
|
||||
|
||||
|
@ -42,7 +43,8 @@ enum CompressionAlgorithm {
|
|||
|
||||
@Override
|
||||
void read(DataInput in, byte[] out, int len) throws IOException {
|
||||
org.apache.lucene.util.compress.LZ4.decompress(in, len, out, 0);
|
||||
org.apache.lucene.util.compress.LZ4.decompress(
|
||||
EndiannessReverserUtil.wrapDataInput(in), len, out, 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -20,15 +20,24 @@ import java.io.IOException;
|
|||
import java.util.Objects;
|
||||
import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50CompressingStoredFieldsFormat;
|
||||
import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicWriter;
|
||||
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||
import org.apache.lucene.codecs.compressing.CompressionMode;
|
||||
import org.apache.lucene.codecs.compressing.Compressor;
|
||||
import org.apache.lucene.codecs.compressing.Decompressor;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.compress.LZ4;
|
||||
|
||||
/**
|
||||
* Lucene 5.0 stored fields format.
|
||||
|
@ -148,7 +157,7 @@ public class Lucene50StoredFieldsFormat extends StoredFieldsFormat {
|
|||
switch (mode) {
|
||||
case BEST_SPEED:
|
||||
return new Lucene50CompressingStoredFieldsFormat(
|
||||
"Lucene50StoredFieldsFastData", CompressionMode.FAST, 1 << 14, 128, 10);
|
||||
"Lucene50StoredFieldsFastData", FAST_MODE, 1 << 14, 128, 10);
|
||||
case BEST_COMPRESSION:
|
||||
return new Lucene50CompressingStoredFieldsFormat(
|
||||
"Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10);
|
||||
|
@ -156,4 +165,71 @@ public class Lucene50StoredFieldsFormat extends StoredFieldsFormat {
|
|||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
static final CompressionMode FAST_MODE =
|
||||
new CompressionMode() {
|
||||
|
||||
@Override
|
||||
public Compressor newCompressor() {
|
||||
return new LZ4FastCompressor();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Decompressor newDecompressor() {
|
||||
return LZ4_DECOMPRESSOR;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FAST";
|
||||
}
|
||||
};
|
||||
|
||||
private static final class LZ4FastCompressor extends Compressor {
|
||||
|
||||
private final LZ4.FastCompressionHashTable ht;
|
||||
|
||||
LZ4FastCompressor() {
|
||||
ht = new LZ4.FastCompressionHashTable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
|
||||
LZ4.compress(bytes, off, len, EndiannessReverserUtil.wrapDataOutput(out), ht);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
// no-op
|
||||
}
|
||||
}
|
||||
|
||||
private static final Decompressor LZ4_DECOMPRESSOR =
|
||||
new Decompressor() {
|
||||
|
||||
@Override
|
||||
public void decompress(
|
||||
DataInput in, int originalLength, int offset, int length, BytesRef bytes)
|
||||
throws IOException {
|
||||
assert offset + length <= originalLength;
|
||||
// add 7 padding bytes, this is not necessary but can help decompression run faster
|
||||
if (bytes.bytes.length < originalLength + 7) {
|
||||
bytes.bytes = new byte[ArrayUtil.oversize(originalLength + 7, 1)];
|
||||
}
|
||||
final int decompressedLength =
|
||||
LZ4.decompress(
|
||||
EndiannessReverserUtil.wrapDataInput(in), offset + length, bytes.bytes, 0);
|
||||
if (decompressedLength > originalLength) {
|
||||
throw new CorruptIndexException(
|
||||
"Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength, in);
|
||||
}
|
||||
bytes.offset = offset;
|
||||
bytes.length = length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Decompressor clone() {
|
||||
return this;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50Compressin
|
|||
import org.apache.lucene.backward_codecs.lucene87.Lucene87StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.compressing.CompressionMode;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.packed.BlockPackedWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
@ -154,6 +153,6 @@ public final class Lucene50TermVectorsFormat extends Lucene50CompressingTermVect
|
|||
|
||||
/** Sole constructor. */
|
||||
public Lucene50TermVectorsFormat() {
|
||||
super("Lucene50TermVectorsData", "", CompressionMode.FAST, 1 << 12, 128, 10);
|
||||
super("Lucene50TermVectorsData", "", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 12, 128, 10);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -476,7 +476,8 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
|
|||
}
|
||||
}
|
||||
maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength);
|
||||
LZ4.compress(block, 0, uncompressedBlockLength, data, ht);
|
||||
LZ4.compress(
|
||||
block, 0, uncompressedBlockLength, EndiannessReverserUtil.wrapDataOutput(data), ht);
|
||||
numDocsInCurrentBlock = 0;
|
||||
// Ensure initialized with zeroes because full array is always written
|
||||
Arrays.fill(docLengths, 0);
|
||||
|
@ -847,7 +848,8 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
|
|||
int uncompressedLength = bufferedOutput.getPosition();
|
||||
data.writeVInt(uncompressedLength);
|
||||
long before = data.getFilePointer();
|
||||
LZ4.compress(termsDictBuffer, 0, uncompressedLength, data, ht);
|
||||
LZ4.compress(
|
||||
termsDictBuffer, 0, uncompressedLength, EndiannessReverserUtil.wrapDataOutput(data), ht);
|
||||
int compressedLength = (int) (data.getFilePointer() - before);
|
||||
// Block length will be used for creating buffer for decompression, one corner case is that
|
||||
// compressed length might be bigger than un-compressed length, so just return the bigger one.
|
||||
|
|
|
@ -898,7 +898,11 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
}
|
||||
|
||||
assert uncompressedBlockLength <= uncompressedBlock.length;
|
||||
LZ4.decompress(compressedData, uncompressedBlockLength, uncompressedBlock, 0);
|
||||
LZ4.decompress(
|
||||
EndiannessReverserUtil.wrapDataInput(compressedData),
|
||||
uncompressedBlockLength,
|
||||
uncompressedBlock,
|
||||
0);
|
||||
}
|
||||
|
||||
uncompressedBytesRef.offset = uncompressedDocStarts[docInBlockId];
|
||||
|
@ -1355,7 +1359,8 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
if (currentCompressedBlockStart != offset) {
|
||||
int decompressLength = bytes.readVInt();
|
||||
// Decompress the remaining of current block
|
||||
LZ4.decompress(bytes, decompressLength, blockBuffer.bytes, 0);
|
||||
LZ4.decompress(
|
||||
EndiannessReverserUtil.wrapDataInput(bytes), decompressLength, blockBuffer.bytes, 0);
|
||||
currentCompressedBlockStart = offset;
|
||||
currentCompressedBlockEnd = bytes.getFilePointer();
|
||||
} else {
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.backward_codecs.lucene87;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
|
||||
import org.apache.lucene.codecs.compressing.CompressionMode;
|
||||
import org.apache.lucene.codecs.compressing.Compressor;
|
||||
import org.apache.lucene.codecs.compressing.Decompressor;
|
||||
|
@ -100,7 +101,8 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
|
|||
buffer = ArrayUtil.grow(buffer, dictLength + blockLength);
|
||||
bytes.length = 0;
|
||||
// Read the dictionary
|
||||
if (LZ4.decompress(in, dictLength, buffer, 0) != dictLength) {
|
||||
if (LZ4.decompress(EndiannessReverserUtil.wrapDataInput(in), dictLength, buffer, 0)
|
||||
!= dictLength) {
|
||||
throw new CorruptIndexException("Illegal dict length", in);
|
||||
}
|
||||
|
||||
|
@ -128,7 +130,8 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
|
|||
// Read blocks that intersect with the interval we need
|
||||
while (offsetInBlock < offset + length) {
|
||||
final int bytesToDecompress = Math.min(blockLength, offset + length - offsetInBlock);
|
||||
LZ4.decompress(in, bytesToDecompress, buffer, dictLength);
|
||||
LZ4.decompress(
|
||||
EndiannessReverserUtil.wrapDataInput(in), bytesToDecompress, buffer, dictLength);
|
||||
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + bytesToDecompress);
|
||||
System.arraycopy(buffer, dictLength, bytes.bytes, bytes.length, bytesToDecompress);
|
||||
bytes.length += bytesToDecompress;
|
||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.store.DataInput;
|
|||
|
||||
class EndiannessReverserDataInput extends DataInput {
|
||||
|
||||
private final DataInput in;
|
||||
final DataInput in;
|
||||
|
||||
EndiannessReverserDataInput(DataInput in) {
|
||||
this.in = in;
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.store.DataOutput;
|
|||
|
||||
class EndiannessReverserDataOutput extends DataOutput {
|
||||
|
||||
private final DataOutput out;
|
||||
final DataOutput out;
|
||||
|
||||
EndiannessReverserDataOutput(DataOutput out) {
|
||||
this.out = out;
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.store.RandomAccessInput;
|
|||
/** A {@link IndexInput} wrapper that changes the endianness of the provided index input. */
|
||||
final class EndiannessReverserIndexInput extends IndexInput {
|
||||
|
||||
private final IndexInput in;
|
||||
final IndexInput in;
|
||||
|
||||
EndiannessReverserIndexInput(IndexInput in) {
|
||||
super("Endianness reverser Index Input wrapper");
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.store.IndexOutput;
|
|||
/** A {@link IndexOutput} wrapper that changes the endianness of the provided index output. */
|
||||
final class EndiannessReverserIndexOutput extends IndexOutput {
|
||||
|
||||
private final IndexOutput out;
|
||||
final IndexOutput out;
|
||||
|
||||
EndiannessReverserIndexOutput(IndexOutput out) {
|
||||
super("Endianness reverser Index Output wrapper", out.getName());
|
||||
|
|
|
@ -62,11 +62,23 @@ public final class EndiannessReverserUtil {
|
|||
|
||||
/** wraps a data output */
|
||||
public static DataOutput wrapDataOutput(DataOutput dataOutput) {
|
||||
if (dataOutput instanceof EndiannessReverserDataOutput) {
|
||||
return ((EndiannessReverserDataOutput) dataOutput).out;
|
||||
}
|
||||
if (dataOutput instanceof EndiannessReverserIndexOutput) {
|
||||
return ((EndiannessReverserIndexOutput) dataOutput).out;
|
||||
}
|
||||
return new EndiannessReverserDataOutput(dataOutput);
|
||||
}
|
||||
|
||||
/** wraps a data input */
|
||||
public static DataInput wrapDataInput(DataInput dataInput) {
|
||||
if (dataInput instanceof EndiannessReverserDataInput) {
|
||||
return ((EndiannessReverserDataInput) dataInput).in;
|
||||
}
|
||||
if (dataInput instanceof EndiannessReverserIndexInput) {
|
||||
return ((EndiannessReverserIndexInput) dataInput).in;
|
||||
}
|
||||
return new EndiannessReverserDataInput(dataInput);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -61,7 +61,7 @@ public final class Lucene50RWStoredFieldsFormat extends Lucene50StoredFieldsForm
|
|||
switch (mode) {
|
||||
case BEST_SPEED:
|
||||
return new Lucene50RWCompressingStoredFieldsFormat(
|
||||
"Lucene50StoredFieldsFastData", CompressionMode.FAST, 1 << 14, 128, 10);
|
||||
"Lucene50StoredFieldsFastData", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 14, 128, 10);
|
||||
case BEST_COMPRESSION:
|
||||
return new Lucene50RWCompressingStoredFieldsFormat(
|
||||
"Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10);
|
||||
|
|
|
@ -17,13 +17,12 @@
|
|||
package org.apache.lucene.backward_codecs.lucene50;
|
||||
|
||||
import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50RWCompressingTermVectorsFormat;
|
||||
import org.apache.lucene.codecs.compressing.CompressionMode;
|
||||
|
||||
/** RW impersonation of Lucene50StoredFieldsFormat. */
|
||||
public final class Lucene50RWTermVectorsFormat extends Lucene50RWCompressingTermVectorsFormat {
|
||||
|
||||
/** Sole constructor. */
|
||||
public Lucene50RWTermVectorsFormat() {
|
||||
super("Lucene50TermVectorsData", "", CompressionMode.FAST, 1 << 12, 128, 10);
|
||||
super("Lucene50TermVectorsData", "", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 12, 128, 10);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -95,7 +95,7 @@ final class ByteSliceReader extends DataInput {
|
|||
public void nextSlice() {
|
||||
|
||||
// Skip to our next slice
|
||||
final int nextIndex = (int) BitUtil.VH_BE_INT.get(buffer, limit);
|
||||
final int nextIndex = (int) BitUtil.VH_LE_INT.get(buffer, limit);
|
||||
|
||||
level = ByteBlockPool.NEXT_LEVEL_ARRAY[level];
|
||||
final int newSize = ByteBlockPool.LEVEL_SIZE_ARRAY[level];
|
||||
|
|
|
@ -1,86 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
|
||||
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
|
||||
/**
|
||||
* Class to write byte streams into slices of shared byte[]. This is used by DocumentsWriter to hold
|
||||
* the posting list for many terms in RAM.
|
||||
*/
|
||||
final class ByteSliceWriter extends DataOutput {
|
||||
|
||||
/* Initial chunks size of the shared byte[] blocks used to
|
||||
store postings data */
|
||||
private static final int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK;
|
||||
|
||||
private byte[] slice;
|
||||
private int upto;
|
||||
private final ByteBlockPool pool;
|
||||
|
||||
int offset0;
|
||||
|
||||
public ByteSliceWriter(ByteBlockPool pool) {
|
||||
this.pool = pool;
|
||||
}
|
||||
|
||||
/** Set up the writer to write at address. */
|
||||
public void init(int address) {
|
||||
slice = pool.buffers[address >> ByteBlockPool.BYTE_BLOCK_SHIFT];
|
||||
assert slice != null;
|
||||
upto = address & ByteBlockPool.BYTE_BLOCK_MASK;
|
||||
offset0 = address;
|
||||
assert upto < slice.length;
|
||||
}
|
||||
|
||||
/** Write byte into byte slice stream */
|
||||
@Override
|
||||
public void writeByte(byte b) {
|
||||
assert slice != null;
|
||||
if (slice[upto] != 0) {
|
||||
upto = pool.allocSlice(slice, upto);
|
||||
slice = pool.buffer;
|
||||
offset0 = pool.byteOffset;
|
||||
assert slice != null;
|
||||
}
|
||||
slice[upto++] = b;
|
||||
assert upto != slice.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(final byte[] b, int offset, final int len) {
|
||||
final int offsetEnd = offset + len;
|
||||
while (offset < offsetEnd) {
|
||||
if (slice[upto] != 0) {
|
||||
// End marker
|
||||
upto = pool.allocSlice(slice, upto);
|
||||
slice = pool.buffer;
|
||||
offset0 = pool.byteOffset;
|
||||
}
|
||||
|
||||
slice[upto++] = b[offset++];
|
||||
assert upto != slice.length;
|
||||
}
|
||||
}
|
||||
|
||||
public int getAddress() {
|
||||
return upto + (offset0 & BYTE_BLOCK_NOT_MASK);
|
||||
}
|
||||
}
|
|
@ -254,17 +254,16 @@ public final class ByteBlockPool implements Accountable {
|
|||
final int offset = newUpto + byteOffset;
|
||||
byteUpto += newSize;
|
||||
|
||||
// Copy forward the past 3 bytes (which we are about
|
||||
// to overwrite with the forwarding address):
|
||||
buffer[newUpto] = slice[upto - 3];
|
||||
buffer[newUpto + 1] = slice[upto - 2];
|
||||
buffer[newUpto + 2] = slice[upto - 1];
|
||||
// Copy forward the past 3 bytes (which we are about to overwrite with the forwarding address).
|
||||
// We actually copy 4 bytes at once since VarHandles make it cheap.
|
||||
int past3Bytes = ((int) BitUtil.VH_LE_INT.get(slice, upto - 3)) & 0xFFFFFF;
|
||||
// Ensure we're not changing the content of `buffer` by setting 4 bytes instead of 3. This
|
||||
// should never happen since the next `newSize` bytes must be equal to 0.
|
||||
assert buffer[newUpto + 3] == 0;
|
||||
BitUtil.VH_LE_INT.set(buffer, newUpto, past3Bytes);
|
||||
|
||||
// Write forwarding address at end of last slice:
|
||||
slice[upto - 3] = (byte) (offset >>> 24);
|
||||
slice[upto - 2] = (byte) (offset >>> 16);
|
||||
slice[upto - 1] = (byte) (offset >>> 8);
|
||||
slice[upto] = (byte) offset;
|
||||
BitUtil.VH_LE_INT.set(slice, upto - 3, offset);
|
||||
|
||||
// Write new level:
|
||||
buffer[byteUpto - 1] = (byte) (16 | newLevel);
|
||||
|
@ -308,7 +307,7 @@ public final class ByteBlockPool implements Accountable {
|
|||
term.offset = pos + 1;
|
||||
} else {
|
||||
// length is 2 bytes
|
||||
term.length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7);
|
||||
term.length = ((short) BitUtil.VH_BE_SHORT.get(bytes, pos)) & 0x7FFF;
|
||||
term.offset = pos + 2;
|
||||
}
|
||||
assert term.length >= 0;
|
||||
|
|
|
@ -181,7 +181,7 @@ public final class BytesRefHash implements Accountable {
|
|||
offset = pos + 1;
|
||||
} else {
|
||||
// length is 2 bytes
|
||||
length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7);
|
||||
length = ((short) BitUtil.VH_BE_SHORT.get(bytes, pos)) & 0x7FFF;
|
||||
offset = pos + 2;
|
||||
}
|
||||
return Arrays.equals(bytes, offset, offset + length, b.bytes, b.offset, b.offset + b.length);
|
||||
|
@ -282,8 +282,7 @@ public final class BytesRefHash implements Accountable {
|
|||
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1, length);
|
||||
} else {
|
||||
// 2 byte to store length
|
||||
buffer[bufferUpto] = (byte) (0x80 | (length & 0x7f));
|
||||
buffer[bufferUpto + 1] = (byte) ((length >> 7) & 0xff);
|
||||
BitUtil.VH_BE_SHORT.set(buffer, bufferUpto, (short) (length | 0x8000));
|
||||
pool.byteUpto += length + 2;
|
||||
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 2, length);
|
||||
}
|
||||
|
@ -392,7 +391,7 @@ public final class BytesRefHash implements Accountable {
|
|||
len = bytes[start];
|
||||
pos = start + 1;
|
||||
} else {
|
||||
len = (bytes[start] & 0x7f) + ((bytes[start + 1] & 0xff) << 7);
|
||||
len = ((short) BitUtil.VH_BE_SHORT.get(bytes, start)) & 0x7FFF;
|
||||
pos = start + 2;
|
||||
}
|
||||
code = doHash(bytes, pos, len);
|
||||
|
|
|
@ -128,7 +128,7 @@ public final class PagedBytes implements Accountable {
|
|||
b.length = block[offset];
|
||||
b.offset = offset + 1;
|
||||
} else {
|
||||
b.length = ((block[offset] & 0x7f) << 8) | (block[1 + offset] & 0xff);
|
||||
b.length = ((short) BitUtil.VH_BE_SHORT.get(block, offset)) & 0x7FFF;
|
||||
b.offset = offset + 2;
|
||||
assert b.length > 0;
|
||||
}
|
||||
|
@ -285,8 +285,8 @@ public final class PagedBytes implements Accountable {
|
|||
if (bytes.length < 128) {
|
||||
currentBlock[upto++] = (byte) bytes.length;
|
||||
} else {
|
||||
currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8));
|
||||
currentBlock[upto++] = (byte) (bytes.length & 0xff);
|
||||
BitUtil.VH_BE_SHORT.set(currentBlock, upto, (short) (bytes.length | 0x8000));
|
||||
upto += 2;
|
||||
}
|
||||
System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
|
||||
upto += bytes.length;
|
||||
|
|
|
@ -64,11 +64,8 @@ public final class OfflinePointWriter implements PointWriter {
|
|||
+ "]";
|
||||
|
||||
out.writeBytes(packedValue, 0, packedValue.length);
|
||||
// write bytes for comparing in lexicographically order
|
||||
out.writeByte((byte) (docID >> 24));
|
||||
out.writeByte((byte) (docID >> 16));
|
||||
out.writeByte((byte) (docID >> 8));
|
||||
out.writeByte((byte) docID);
|
||||
// write bytes in big-endian order for comparing in lexicographically order
|
||||
out.writeInt(Integer.reverseBytes(docID));
|
||||
count++;
|
||||
assert expectedCount == 0 || count <= expectedCount
|
||||
: "expectedCount=" + expectedCount + " vs count=" + count;
|
||||
|
|
|
@ -107,7 +107,7 @@ public final class LZ4 {
|
|||
}
|
||||
|
||||
// matchs
|
||||
final int matchDec = (compressed.readByte() & 0xFF) | ((compressed.readByte() & 0xFF) << 8);
|
||||
final int matchDec = compressed.readShort() & 0xFFFF;
|
||||
assert matchDec > 0;
|
||||
|
||||
int matchLen = token & 0x0F;
|
||||
|
@ -176,8 +176,7 @@ public final class LZ4 {
|
|||
// encode match dec
|
||||
final int matchDec = matchOff - matchRef;
|
||||
assert matchDec > 0 && matchDec < 1 << 16;
|
||||
out.writeByte((byte) matchDec);
|
||||
out.writeByte((byte) (matchDec >>> 8));
|
||||
out.writeShort((short) matchDec);
|
||||
|
||||
// encode match len
|
||||
if (matchLen >= MIN_MATCH + 0x0F) {
|
||||
|
|
|
@ -1,108 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.util.Random;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.RecyclingByteBlockAllocator;
|
||||
|
||||
public class TestByteSlices extends LuceneTestCase {
|
||||
|
||||
public void testBasic() throws Throwable {
|
||||
Random random = random();
|
||||
ByteBlockPool pool =
|
||||
new ByteBlockPool(
|
||||
new RecyclingByteBlockAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, random().nextInt(100)));
|
||||
|
||||
final int NUM_STREAM = atLeast(random, 100);
|
||||
|
||||
ByteSliceWriter writer = new ByteSliceWriter(pool);
|
||||
|
||||
int[] starts = new int[NUM_STREAM];
|
||||
int[] uptos = new int[NUM_STREAM];
|
||||
int[] counters = new int[NUM_STREAM];
|
||||
|
||||
ByteSliceReader reader = new ByteSliceReader();
|
||||
|
||||
for (int ti = 0; ti < 100; ti++) {
|
||||
|
||||
for (int stream = 0; stream < NUM_STREAM; stream++) {
|
||||
starts[stream] = -1;
|
||||
counters[stream] = 0;
|
||||
}
|
||||
|
||||
int num = atLeast(random, 3000);
|
||||
for (int iter = 0; iter < num; iter++) {
|
||||
int stream;
|
||||
if (random.nextBoolean()) {
|
||||
stream = random.nextInt(3);
|
||||
} else {
|
||||
stream = random.nextInt(NUM_STREAM);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("write stream=" + stream);
|
||||
}
|
||||
|
||||
if (starts[stream] == -1) {
|
||||
final int spot = pool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
|
||||
starts[stream] = uptos[stream] = spot + pool.byteOffset;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" init to " + starts[stream]);
|
||||
}
|
||||
}
|
||||
|
||||
writer.init(uptos[stream]);
|
||||
int numValue;
|
||||
if (random.nextInt(10) == 3) {
|
||||
numValue = random.nextInt(100);
|
||||
} else if (random.nextInt(5) == 3) {
|
||||
numValue = random.nextInt(3);
|
||||
} else {
|
||||
numValue = random.nextInt(20);
|
||||
}
|
||||
|
||||
for (int j = 0; j < numValue; j++) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" write " + (counters[stream] + j));
|
||||
}
|
||||
// write some large (incl. negative) ints:
|
||||
writer.writeVInt(random.nextInt());
|
||||
writer.writeVInt(counters[stream] + j);
|
||||
}
|
||||
counters[stream] += numValue;
|
||||
uptos[stream] = writer.getAddress();
|
||||
if (VERBOSE) System.out.println(" addr now " + uptos[stream]);
|
||||
}
|
||||
|
||||
for (int stream = 0; stream < NUM_STREAM; stream++) {
|
||||
if (VERBOSE) System.out.println(" stream=" + stream + " count=" + counters[stream]);
|
||||
|
||||
if (starts[stream] != -1 && starts[stream] != uptos[stream]) {
|
||||
reader.init(pool, starts[stream], uptos[stream]);
|
||||
for (int j = 0; j < counters[stream]; j++) {
|
||||
reader.readVInt();
|
||||
assertEquals(j, reader.readVInt());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pool.reset();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -185,10 +185,7 @@ public class FSTCompletionLookup extends Lookup {
|
|||
output.reset(buffer);
|
||||
final int encodedWeight = encodeWeight(iterator.weight());
|
||||
// write bytes for comparing in lexicographically order
|
||||
output.writeByte((byte) (encodedWeight >> 24));
|
||||
output.writeByte((byte) (encodedWeight >> 16));
|
||||
output.writeByte((byte) (encodedWeight >> 8));
|
||||
output.writeByte((byte) encodedWeight);
|
||||
output.writeInt(Integer.reverseBytes(encodedWeight));
|
||||
output.writeBytes(spare.bytes, spare.offset, spare.length);
|
||||
writer.write(buffer, 0, output.getPosition());
|
||||
inputLineCount++;
|
||||
|
|
Loading…
Reference in New Issue