LUCENE-10193: Cut over more array access to VarHandles. (#402)

LZ4 is interesting because it used to read data in little-endian order even
though Directory APIs were big endian. So most calls to LZ4 in backward-codecs
have been changed to change the endianness of the input/output.
This commit is contained in:
Adrien Grand 2021-10-20 19:04:01 +02:00 committed by GitHub
parent 6bb2bbcd6a
commit 0e1f9fcf31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 136 additions and 241 deletions

View File

@ -17,6 +17,7 @@
package org.apache.lucene.backward_codecs.lucene40.blocktree; package org.apache.lucene.backward_codecs.lucene40.blocktree;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.compress.LowercaseAsciiCompression; import org.apache.lucene.util.compress.LowercaseAsciiCompression;
@ -42,7 +43,8 @@ enum CompressionAlgorithm {
@Override @Override
void read(DataInput in, byte[] out, int len) throws IOException { void read(DataInput in, byte[] out, int len) throws IOException {
org.apache.lucene.util.compress.LZ4.decompress(in, len, out, 0); org.apache.lucene.util.compress.LZ4.decompress(
EndiannessReverserUtil.wrapDataInput(in), len, out, 0);
} }
}; };

View File

@ -20,15 +20,24 @@ import java.io.IOException;
import java.util.Objects; import java.util.Objects;
import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50CompressingStoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50CompressingStoredFieldsFormat;
import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicWriter; import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicWriter;
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.compressing.CompressionMode; import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.codecs.compressing.Compressor;
import org.apache.lucene.codecs.compressing.Decompressor;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.compress.LZ4;
/** /**
* Lucene 5.0 stored fields format. * Lucene 5.0 stored fields format.
@ -148,7 +157,7 @@ public class Lucene50StoredFieldsFormat extends StoredFieldsFormat {
switch (mode) { switch (mode) {
case BEST_SPEED: case BEST_SPEED:
return new Lucene50CompressingStoredFieldsFormat( return new Lucene50CompressingStoredFieldsFormat(
"Lucene50StoredFieldsFastData", CompressionMode.FAST, 1 << 14, 128, 10); "Lucene50StoredFieldsFastData", FAST_MODE, 1 << 14, 128, 10);
case BEST_COMPRESSION: case BEST_COMPRESSION:
return new Lucene50CompressingStoredFieldsFormat( return new Lucene50CompressingStoredFieldsFormat(
"Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10); "Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10);
@ -156,4 +165,71 @@ public class Lucene50StoredFieldsFormat extends StoredFieldsFormat {
throw new AssertionError(); throw new AssertionError();
} }
} }
static final CompressionMode FAST_MODE =
new CompressionMode() {
@Override
public Compressor newCompressor() {
return new LZ4FastCompressor();
}
@Override
public Decompressor newDecompressor() {
return LZ4_DECOMPRESSOR;
}
@Override
public String toString() {
return "FAST";
}
};
private static final class LZ4FastCompressor extends Compressor {
private final LZ4.FastCompressionHashTable ht;
LZ4FastCompressor() {
ht = new LZ4.FastCompressionHashTable();
}
@Override
public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
LZ4.compress(bytes, off, len, EndiannessReverserUtil.wrapDataOutput(out), ht);
}
@Override
public void close() throws IOException {
// no-op
}
}
private static final Decompressor LZ4_DECOMPRESSOR =
new Decompressor() {
@Override
public void decompress(
DataInput in, int originalLength, int offset, int length, BytesRef bytes)
throws IOException {
assert offset + length <= originalLength;
// add 7 padding bytes, this is not necessary but can help decompression run faster
if (bytes.bytes.length < originalLength + 7) {
bytes.bytes = new byte[ArrayUtil.oversize(originalLength + 7, 1)];
}
final int decompressedLength =
LZ4.decompress(
EndiannessReverserUtil.wrapDataInput(in), offset + length, bytes.bytes, 0);
if (decompressedLength > originalLength) {
throw new CorruptIndexException(
"Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength, in);
}
bytes.offset = offset;
bytes.length = length;
}
@Override
public Decompressor clone() {
return this;
}
};
} }

View File

@ -20,7 +20,6 @@ import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50Compressin
import org.apache.lucene.backward_codecs.lucene87.Lucene87StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene87.Lucene87StoredFieldsFormat;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.packed.BlockPackedWriter; import org.apache.lucene.util.packed.BlockPackedWriter;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
@ -154,6 +153,6 @@ public final class Lucene50TermVectorsFormat extends Lucene50CompressingTermVect
/** Sole constructor. */ /** Sole constructor. */
public Lucene50TermVectorsFormat() { public Lucene50TermVectorsFormat() {
super("Lucene50TermVectorsData", "", CompressionMode.FAST, 1 << 12, 128, 10); super("Lucene50TermVectorsData", "", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 12, 128, 10);
} }
} }

View File

@ -476,7 +476,8 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
} }
} }
maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength); maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength);
LZ4.compress(block, 0, uncompressedBlockLength, data, ht); LZ4.compress(
block, 0, uncompressedBlockLength, EndiannessReverserUtil.wrapDataOutput(data), ht);
numDocsInCurrentBlock = 0; numDocsInCurrentBlock = 0;
// Ensure initialized with zeroes because full array is always written // Ensure initialized with zeroes because full array is always written
Arrays.fill(docLengths, 0); Arrays.fill(docLengths, 0);
@ -847,7 +848,8 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
int uncompressedLength = bufferedOutput.getPosition(); int uncompressedLength = bufferedOutput.getPosition();
data.writeVInt(uncompressedLength); data.writeVInt(uncompressedLength);
long before = data.getFilePointer(); long before = data.getFilePointer();
LZ4.compress(termsDictBuffer, 0, uncompressedLength, data, ht); LZ4.compress(
termsDictBuffer, 0, uncompressedLength, EndiannessReverserUtil.wrapDataOutput(data), ht);
int compressedLength = (int) (data.getFilePointer() - before); int compressedLength = (int) (data.getFilePointer() - before);
// Block length will be used for creating buffer for decompression, one corner case is that // Block length will be used for creating buffer for decompression, one corner case is that
// compressed length might be bigger than un-compressed length, so just return the bigger one. // compressed length might be bigger than un-compressed length, so just return the bigger one.

View File

@ -898,7 +898,11 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
} }
assert uncompressedBlockLength <= uncompressedBlock.length; assert uncompressedBlockLength <= uncompressedBlock.length;
LZ4.decompress(compressedData, uncompressedBlockLength, uncompressedBlock, 0); LZ4.decompress(
EndiannessReverserUtil.wrapDataInput(compressedData),
uncompressedBlockLength,
uncompressedBlock,
0);
} }
uncompressedBytesRef.offset = uncompressedDocStarts[docInBlockId]; uncompressedBytesRef.offset = uncompressedDocStarts[docInBlockId];
@ -1355,7 +1359,8 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
if (currentCompressedBlockStart != offset) { if (currentCompressedBlockStart != offset) {
int decompressLength = bytes.readVInt(); int decompressLength = bytes.readVInt();
// Decompress the remaining of current block // Decompress the remaining of current block
LZ4.decompress(bytes, decompressLength, blockBuffer.bytes, 0); LZ4.decompress(
EndiannessReverserUtil.wrapDataInput(bytes), decompressLength, blockBuffer.bytes, 0);
currentCompressedBlockStart = offset; currentCompressedBlockStart = offset;
currentCompressedBlockEnd = bytes.getFilePointer(); currentCompressedBlockEnd = bytes.getFilePointer();
} else { } else {

View File

@ -17,6 +17,7 @@
package org.apache.lucene.backward_codecs.lucene87; package org.apache.lucene.backward_codecs.lucene87;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
import org.apache.lucene.codecs.compressing.CompressionMode; import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.codecs.compressing.Compressor; import org.apache.lucene.codecs.compressing.Compressor;
import org.apache.lucene.codecs.compressing.Decompressor; import org.apache.lucene.codecs.compressing.Decompressor;
@ -100,7 +101,8 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
buffer = ArrayUtil.grow(buffer, dictLength + blockLength); buffer = ArrayUtil.grow(buffer, dictLength + blockLength);
bytes.length = 0; bytes.length = 0;
// Read the dictionary // Read the dictionary
if (LZ4.decompress(in, dictLength, buffer, 0) != dictLength) { if (LZ4.decompress(EndiannessReverserUtil.wrapDataInput(in), dictLength, buffer, 0)
!= dictLength) {
throw new CorruptIndexException("Illegal dict length", in); throw new CorruptIndexException("Illegal dict length", in);
} }
@ -128,7 +130,8 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
// Read blocks that intersect with the interval we need // Read blocks that intersect with the interval we need
while (offsetInBlock < offset + length) { while (offsetInBlock < offset + length) {
final int bytesToDecompress = Math.min(blockLength, offset + length - offsetInBlock); final int bytesToDecompress = Math.min(blockLength, offset + length - offsetInBlock);
LZ4.decompress(in, bytesToDecompress, buffer, dictLength); LZ4.decompress(
EndiannessReverserUtil.wrapDataInput(in), bytesToDecompress, buffer, dictLength);
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + bytesToDecompress); bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + bytesToDecompress);
System.arraycopy(buffer, dictLength, bytes.bytes, bytes.length, bytesToDecompress); System.arraycopy(buffer, dictLength, bytes.bytes, bytes.length, bytesToDecompress);
bytes.length += bytesToDecompress; bytes.length += bytesToDecompress;

View File

@ -21,7 +21,7 @@ import org.apache.lucene.store.DataInput;
class EndiannessReverserDataInput extends DataInput { class EndiannessReverserDataInput extends DataInput {
private final DataInput in; final DataInput in;
EndiannessReverserDataInput(DataInput in) { EndiannessReverserDataInput(DataInput in) {
this.in = in; this.in = in;

View File

@ -24,7 +24,7 @@ import org.apache.lucene.store.DataOutput;
class EndiannessReverserDataOutput extends DataOutput { class EndiannessReverserDataOutput extends DataOutput {
private final DataOutput out; final DataOutput out;
EndiannessReverserDataOutput(DataOutput out) { EndiannessReverserDataOutput(DataOutput out) {
this.out = out; this.out = out;

View File

@ -25,7 +25,7 @@ import org.apache.lucene.store.RandomAccessInput;
/** A {@link IndexInput} wrapper that changes the endianness of the provided index input. */ /** A {@link IndexInput} wrapper that changes the endianness of the provided index input. */
final class EndiannessReverserIndexInput extends IndexInput { final class EndiannessReverserIndexInput extends IndexInput {
private final IndexInput in; final IndexInput in;
EndiannessReverserIndexInput(IndexInput in) { EndiannessReverserIndexInput(IndexInput in) {
super("Endianness reverser Index Input wrapper"); super("Endianness reverser Index Input wrapper");

View File

@ -25,7 +25,7 @@ import org.apache.lucene.store.IndexOutput;
/** A {@link IndexOutput} wrapper that changes the endianness of the provided index output. */ /** A {@link IndexOutput} wrapper that changes the endianness of the provided index output. */
final class EndiannessReverserIndexOutput extends IndexOutput { final class EndiannessReverserIndexOutput extends IndexOutput {
private final IndexOutput out; final IndexOutput out;
EndiannessReverserIndexOutput(IndexOutput out) { EndiannessReverserIndexOutput(IndexOutput out) {
super("Endianness reverser Index Output wrapper", out.getName()); super("Endianness reverser Index Output wrapper", out.getName());

View File

@ -62,11 +62,23 @@ public final class EndiannessReverserUtil {
/** wraps a data output */ /** wraps a data output */
public static DataOutput wrapDataOutput(DataOutput dataOutput) { public static DataOutput wrapDataOutput(DataOutput dataOutput) {
if (dataOutput instanceof EndiannessReverserDataOutput) {
return ((EndiannessReverserDataOutput) dataOutput).out;
}
if (dataOutput instanceof EndiannessReverserIndexOutput) {
return ((EndiannessReverserIndexOutput) dataOutput).out;
}
return new EndiannessReverserDataOutput(dataOutput); return new EndiannessReverserDataOutput(dataOutput);
} }
/** wraps a data input */ /** wraps a data input */
public static DataInput wrapDataInput(DataInput dataInput) { public static DataInput wrapDataInput(DataInput dataInput) {
if (dataInput instanceof EndiannessReverserDataInput) {
return ((EndiannessReverserDataInput) dataInput).in;
}
if (dataInput instanceof EndiannessReverserIndexInput) {
return ((EndiannessReverserIndexInput) dataInput).in;
}
return new EndiannessReverserDataInput(dataInput); return new EndiannessReverserDataInput(dataInput);
} }
} }

View File

@ -61,7 +61,7 @@ public final class Lucene50RWStoredFieldsFormat extends Lucene50StoredFieldsForm
switch (mode) { switch (mode) {
case BEST_SPEED: case BEST_SPEED:
return new Lucene50RWCompressingStoredFieldsFormat( return new Lucene50RWCompressingStoredFieldsFormat(
"Lucene50StoredFieldsFastData", CompressionMode.FAST, 1 << 14, 128, 10); "Lucene50StoredFieldsFastData", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 14, 128, 10);
case BEST_COMPRESSION: case BEST_COMPRESSION:
return new Lucene50RWCompressingStoredFieldsFormat( return new Lucene50RWCompressingStoredFieldsFormat(
"Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10); "Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10);

View File

@ -17,13 +17,12 @@
package org.apache.lucene.backward_codecs.lucene50; package org.apache.lucene.backward_codecs.lucene50;
import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50RWCompressingTermVectorsFormat; import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50RWCompressingTermVectorsFormat;
import org.apache.lucene.codecs.compressing.CompressionMode;
/** RW impersonation of Lucene50StoredFieldsFormat. */ /** RW impersonation of Lucene50StoredFieldsFormat. */
public final class Lucene50RWTermVectorsFormat extends Lucene50RWCompressingTermVectorsFormat { public final class Lucene50RWTermVectorsFormat extends Lucene50RWCompressingTermVectorsFormat {
/** Sole constructor. */ /** Sole constructor. */
public Lucene50RWTermVectorsFormat() { public Lucene50RWTermVectorsFormat() {
super("Lucene50TermVectorsData", "", CompressionMode.FAST, 1 << 12, 128, 10); super("Lucene50TermVectorsData", "", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 12, 128, 10);
} }
} }

View File

@ -95,7 +95,7 @@ final class ByteSliceReader extends DataInput {
public void nextSlice() { public void nextSlice() {
// Skip to our next slice // Skip to our next slice
final int nextIndex = (int) BitUtil.VH_BE_INT.get(buffer, limit); final int nextIndex = (int) BitUtil.VH_LE_INT.get(buffer, limit);
level = ByteBlockPool.NEXT_LEVEL_ARRAY[level]; level = ByteBlockPool.NEXT_LEVEL_ARRAY[level];
final int newSize = ByteBlockPool.LEVEL_SIZE_ARRAY[level]; final int newSize = ByteBlockPool.LEVEL_SIZE_ARRAY[level];

View File

@ -1,86 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ByteBlockPool;
/**
* Class to write byte streams into slices of shared byte[]. This is used by DocumentsWriter to hold
* the posting list for many terms in RAM.
*/
final class ByteSliceWriter extends DataOutput {
/* Initial chunks size of the shared byte[] blocks used to
store postings data */
private static final int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK;
private byte[] slice;
private int upto;
private final ByteBlockPool pool;
int offset0;
public ByteSliceWriter(ByteBlockPool pool) {
this.pool = pool;
}
/** Set up the writer to write at address. */
public void init(int address) {
slice = pool.buffers[address >> ByteBlockPool.BYTE_BLOCK_SHIFT];
assert slice != null;
upto = address & ByteBlockPool.BYTE_BLOCK_MASK;
offset0 = address;
assert upto < slice.length;
}
/** Write byte into byte slice stream */
@Override
public void writeByte(byte b) {
assert slice != null;
if (slice[upto] != 0) {
upto = pool.allocSlice(slice, upto);
slice = pool.buffer;
offset0 = pool.byteOffset;
assert slice != null;
}
slice[upto++] = b;
assert upto != slice.length;
}
@Override
public void writeBytes(final byte[] b, int offset, final int len) {
final int offsetEnd = offset + len;
while (offset < offsetEnd) {
if (slice[upto] != 0) {
// End marker
upto = pool.allocSlice(slice, upto);
slice = pool.buffer;
offset0 = pool.byteOffset;
}
slice[upto++] = b[offset++];
assert upto != slice.length;
}
}
public int getAddress() {
return upto + (offset0 & BYTE_BLOCK_NOT_MASK);
}
}

View File

@ -254,17 +254,16 @@ public final class ByteBlockPool implements Accountable {
final int offset = newUpto + byteOffset; final int offset = newUpto + byteOffset;
byteUpto += newSize; byteUpto += newSize;
// Copy forward the past 3 bytes (which we are about // Copy forward the past 3 bytes (which we are about to overwrite with the forwarding address).
// to overwrite with the forwarding address): // We actually copy 4 bytes at once since VarHandles make it cheap.
buffer[newUpto] = slice[upto - 3]; int past3Bytes = ((int) BitUtil.VH_LE_INT.get(slice, upto - 3)) & 0xFFFFFF;
buffer[newUpto + 1] = slice[upto - 2]; // Ensure we're not changing the content of `buffer` by setting 4 bytes instead of 3. This
buffer[newUpto + 2] = slice[upto - 1]; // should never happen since the next `newSize` bytes must be equal to 0.
assert buffer[newUpto + 3] == 0;
BitUtil.VH_LE_INT.set(buffer, newUpto, past3Bytes);
// Write forwarding address at end of last slice: // Write forwarding address at end of last slice:
slice[upto - 3] = (byte) (offset >>> 24); BitUtil.VH_LE_INT.set(slice, upto - 3, offset);
slice[upto - 2] = (byte) (offset >>> 16);
slice[upto - 1] = (byte) (offset >>> 8);
slice[upto] = (byte) offset;
// Write new level: // Write new level:
buffer[byteUpto - 1] = (byte) (16 | newLevel); buffer[byteUpto - 1] = (byte) (16 | newLevel);
@ -308,7 +307,7 @@ public final class ByteBlockPool implements Accountable {
term.offset = pos + 1; term.offset = pos + 1;
} else { } else {
// length is 2 bytes // length is 2 bytes
term.length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7); term.length = ((short) BitUtil.VH_BE_SHORT.get(bytes, pos)) & 0x7FFF;
term.offset = pos + 2; term.offset = pos + 2;
} }
assert term.length >= 0; assert term.length >= 0;

View File

@ -181,7 +181,7 @@ public final class BytesRefHash implements Accountable {
offset = pos + 1; offset = pos + 1;
} else { } else {
// length is 2 bytes // length is 2 bytes
length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7); length = ((short) BitUtil.VH_BE_SHORT.get(bytes, pos)) & 0x7FFF;
offset = pos + 2; offset = pos + 2;
} }
return Arrays.equals(bytes, offset, offset + length, b.bytes, b.offset, b.offset + b.length); return Arrays.equals(bytes, offset, offset + length, b.bytes, b.offset, b.offset + b.length);
@ -282,8 +282,7 @@ public final class BytesRefHash implements Accountable {
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1, length); System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1, length);
} else { } else {
// 2 byte to store length // 2 byte to store length
buffer[bufferUpto] = (byte) (0x80 | (length & 0x7f)); BitUtil.VH_BE_SHORT.set(buffer, bufferUpto, (short) (length | 0x8000));
buffer[bufferUpto + 1] = (byte) ((length >> 7) & 0xff);
pool.byteUpto += length + 2; pool.byteUpto += length + 2;
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 2, length); System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 2, length);
} }
@ -392,7 +391,7 @@ public final class BytesRefHash implements Accountable {
len = bytes[start]; len = bytes[start];
pos = start + 1; pos = start + 1;
} else { } else {
len = (bytes[start] & 0x7f) + ((bytes[start + 1] & 0xff) << 7); len = ((short) BitUtil.VH_BE_SHORT.get(bytes, start)) & 0x7FFF;
pos = start + 2; pos = start + 2;
} }
code = doHash(bytes, pos, len); code = doHash(bytes, pos, len);

View File

@ -128,7 +128,7 @@ public final class PagedBytes implements Accountable {
b.length = block[offset]; b.length = block[offset];
b.offset = offset + 1; b.offset = offset + 1;
} else { } else {
b.length = ((block[offset] & 0x7f) << 8) | (block[1 + offset] & 0xff); b.length = ((short) BitUtil.VH_BE_SHORT.get(block, offset)) & 0x7FFF;
b.offset = offset + 2; b.offset = offset + 2;
assert b.length > 0; assert b.length > 0;
} }
@ -285,8 +285,8 @@ public final class PagedBytes implements Accountable {
if (bytes.length < 128) { if (bytes.length < 128) {
currentBlock[upto++] = (byte) bytes.length; currentBlock[upto++] = (byte) bytes.length;
} else { } else {
currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8)); BitUtil.VH_BE_SHORT.set(currentBlock, upto, (short) (bytes.length | 0x8000));
currentBlock[upto++] = (byte) (bytes.length & 0xff); upto += 2;
} }
System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length); System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
upto += bytes.length; upto += bytes.length;

View File

@ -64,11 +64,8 @@ public final class OfflinePointWriter implements PointWriter {
+ "]"; + "]";
out.writeBytes(packedValue, 0, packedValue.length); out.writeBytes(packedValue, 0, packedValue.length);
// write bytes for comparing in lexicographically order // write bytes in big-endian order for comparing in lexicographically order
out.writeByte((byte) (docID >> 24)); out.writeInt(Integer.reverseBytes(docID));
out.writeByte((byte) (docID >> 16));
out.writeByte((byte) (docID >> 8));
out.writeByte((byte) docID);
count++; count++;
assert expectedCount == 0 || count <= expectedCount assert expectedCount == 0 || count <= expectedCount
: "expectedCount=" + expectedCount + " vs count=" + count; : "expectedCount=" + expectedCount + " vs count=" + count;

View File

@ -107,7 +107,7 @@ public final class LZ4 {
} }
// matchs // matchs
final int matchDec = (compressed.readByte() & 0xFF) | ((compressed.readByte() & 0xFF) << 8); final int matchDec = compressed.readShort() & 0xFFFF;
assert matchDec > 0; assert matchDec > 0;
int matchLen = token & 0x0F; int matchLen = token & 0x0F;
@ -176,8 +176,7 @@ public final class LZ4 {
// encode match dec // encode match dec
final int matchDec = matchOff - matchRef; final int matchDec = matchOff - matchRef;
assert matchDec > 0 && matchDec < 1 << 16; assert matchDec > 0 && matchDec < 1 << 16;
out.writeByte((byte) matchDec); out.writeShort((short) matchDec);
out.writeByte((byte) (matchDec >>> 8));
// encode match len // encode match len
if (matchLen >= MIN_MATCH + 0x0F) { if (matchLen >= MIN_MATCH + 0x0F) {

View File

@ -1,108 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.util.Random;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.RecyclingByteBlockAllocator;
public class TestByteSlices extends LuceneTestCase {
public void testBasic() throws Throwable {
Random random = random();
ByteBlockPool pool =
new ByteBlockPool(
new RecyclingByteBlockAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, random().nextInt(100)));
final int NUM_STREAM = atLeast(random, 100);
ByteSliceWriter writer = new ByteSliceWriter(pool);
int[] starts = new int[NUM_STREAM];
int[] uptos = new int[NUM_STREAM];
int[] counters = new int[NUM_STREAM];
ByteSliceReader reader = new ByteSliceReader();
for (int ti = 0; ti < 100; ti++) {
for (int stream = 0; stream < NUM_STREAM; stream++) {
starts[stream] = -1;
counters[stream] = 0;
}
int num = atLeast(random, 3000);
for (int iter = 0; iter < num; iter++) {
int stream;
if (random.nextBoolean()) {
stream = random.nextInt(3);
} else {
stream = random.nextInt(NUM_STREAM);
}
if (VERBOSE) {
System.out.println("write stream=" + stream);
}
if (starts[stream] == -1) {
final int spot = pool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
starts[stream] = uptos[stream] = spot + pool.byteOffset;
if (VERBOSE) {
System.out.println(" init to " + starts[stream]);
}
}
writer.init(uptos[stream]);
int numValue;
if (random.nextInt(10) == 3) {
numValue = random.nextInt(100);
} else if (random.nextInt(5) == 3) {
numValue = random.nextInt(3);
} else {
numValue = random.nextInt(20);
}
for (int j = 0; j < numValue; j++) {
if (VERBOSE) {
System.out.println(" write " + (counters[stream] + j));
}
// write some large (incl. negative) ints:
writer.writeVInt(random.nextInt());
writer.writeVInt(counters[stream] + j);
}
counters[stream] += numValue;
uptos[stream] = writer.getAddress();
if (VERBOSE) System.out.println(" addr now " + uptos[stream]);
}
for (int stream = 0; stream < NUM_STREAM; stream++) {
if (VERBOSE) System.out.println(" stream=" + stream + " count=" + counters[stream]);
if (starts[stream] != -1 && starts[stream] != uptos[stream]) {
reader.init(pool, starts[stream], uptos[stream]);
for (int j = 0; j < counters[stream]; j++) {
reader.readVInt();
assertEquals(j, reader.readVInt());
}
}
}
pool.reset();
}
}
}

View File

@ -185,10 +185,7 @@ public class FSTCompletionLookup extends Lookup {
output.reset(buffer); output.reset(buffer);
final int encodedWeight = encodeWeight(iterator.weight()); final int encodedWeight = encodeWeight(iterator.weight());
// write bytes for comparing in lexicographically order // write bytes for comparing in lexicographically order
output.writeByte((byte) (encodedWeight >> 24)); output.writeInt(Integer.reverseBytes(encodedWeight));
output.writeByte((byte) (encodedWeight >> 16));
output.writeByte((byte) (encodedWeight >> 8));
output.writeByte((byte) encodedWeight);
output.writeBytes(spare.bytes, spare.offset, spare.length); output.writeBytes(spare.bytes, spare.offset, spare.length);
writer.write(buffer, 0, output.getPosition()); writer.write(buffer, 0, output.getPosition());
inputLineCount++; inputLineCount++;