From 0e1f9fcf31b11419bd495378642b8aefad344049 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 20 Oct 2021 19:04:01 +0200 Subject: [PATCH] LUCENE-10193: Cut over more array access to VarHandles. (#402) LZ4 is interesting because it used to read data in little-endian order even though Directory APIs were big endian. So most calls to LZ4 in backward-codecs have been changed to change the endianness of the input/output. --- .../blocktree/CompressionAlgorithm.java | 4 +- .../lucene50/Lucene50StoredFieldsFormat.java | 78 ++++++++++++- .../lucene50/Lucene50TermVectorsFormat.java | 3 +- .../lucene80/Lucene80DocValuesConsumer.java | 6 +- .../lucene80/Lucene80DocValuesProducer.java | 9 +- .../LZ4WithPresetDictCompressionMode.java | 7 +- .../store/EndiannessReverserDataInput.java | 2 +- .../store/EndiannessReverserDataOutput.java | 2 +- .../store/EndiannessReverserIndexInput.java | 2 +- .../store/EndiannessReverserIndexOutput.java | 2 +- .../store/EndiannessReverserUtil.java | 12 ++ .../Lucene50RWStoredFieldsFormat.java | 2 +- .../lucene50/Lucene50RWTermVectorsFormat.java | 3 +- .../apache/lucene/index/ByteSliceReader.java | 2 +- .../apache/lucene/index/ByteSliceWriter.java | 86 -------------- .../org/apache/lucene/util/ByteBlockPool.java | 19 ++- .../org/apache/lucene/util/BytesRefHash.java | 7 +- .../org/apache/lucene/util/PagedBytes.java | 6 +- .../lucene/util/bkd/OfflinePointWriter.java | 7 +- .../org/apache/lucene/util/compress/LZ4.java | 5 +- .../apache/lucene/index/TestByteSlices.java | 108 ------------------ .../suggest/fst/FSTCompletionLookup.java | 5 +- 22 files changed, 136 insertions(+), 241 deletions(-) delete mode 100644 lucene/core/src/java/org/apache/lucene/index/ByteSliceWriter.java delete mode 100644 lucene/core/src/test/org/apache/lucene/index/TestByteSlices.java diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/CompressionAlgorithm.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/CompressionAlgorithm.java index ceb1838dd9d..469e773e233 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/CompressionAlgorithm.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/CompressionAlgorithm.java @@ -17,6 +17,7 @@ package org.apache.lucene.backward_codecs.lucene40.blocktree; import java.io.IOException; +import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil; import org.apache.lucene.store.DataInput; import org.apache.lucene.util.compress.LowercaseAsciiCompression; @@ -42,7 +43,8 @@ enum CompressionAlgorithm { @Override void read(DataInput in, byte[] out, int len) throws IOException { - org.apache.lucene.util.compress.LZ4.decompress(in, len, out, 0); + org.apache.lucene.util.compress.LZ4.decompress( + EndiannessReverserUtil.wrapDataInput(in), len, out, 0); } }; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50StoredFieldsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50StoredFieldsFormat.java index 05b6324dab5..e6341284d54 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50StoredFieldsFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50StoredFieldsFormat.java @@ -20,15 +20,24 @@ import java.io.IOException; import java.util.Objects; import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50CompressingStoredFieldsFormat; import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicWriter; +import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.compressing.CompressionMode; +import org.apache.lucene.codecs.compressing.Compressor; +import org.apache.lucene.codecs.compressing.Decompressor; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.compress.LZ4; /** * Lucene 5.0 stored fields format. @@ -148,7 +157,7 @@ public class Lucene50StoredFieldsFormat extends StoredFieldsFormat { switch (mode) { case BEST_SPEED: return new Lucene50CompressingStoredFieldsFormat( - "Lucene50StoredFieldsFastData", CompressionMode.FAST, 1 << 14, 128, 10); + "Lucene50StoredFieldsFastData", FAST_MODE, 1 << 14, 128, 10); case BEST_COMPRESSION: return new Lucene50CompressingStoredFieldsFormat( "Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10); @@ -156,4 +165,71 @@ public class Lucene50StoredFieldsFormat extends StoredFieldsFormat { throw new AssertionError(); } } + + static final CompressionMode FAST_MODE = + new CompressionMode() { + + @Override + public Compressor newCompressor() { + return new LZ4FastCompressor(); + } + + @Override + public Decompressor newDecompressor() { + return LZ4_DECOMPRESSOR; + } + + @Override + public String toString() { + return "FAST"; + } + }; + + private static final class LZ4FastCompressor extends Compressor { + + private final LZ4.FastCompressionHashTable ht; + + LZ4FastCompressor() { + ht = new LZ4.FastCompressionHashTable(); + } + + @Override + public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException { + LZ4.compress(bytes, off, len, EndiannessReverserUtil.wrapDataOutput(out), ht); + } + + @Override + public void close() throws IOException { + // no-op + } + } + + private static final Decompressor LZ4_DECOMPRESSOR = + new Decompressor() { + + @Override + public void decompress( + DataInput in, int originalLength, int offset, int length, BytesRef bytes) + throws IOException { + assert offset + length <= originalLength; + // add 7 padding bytes, this is not necessary but can help decompression run faster + if (bytes.bytes.length < originalLength + 7) { + bytes.bytes = new byte[ArrayUtil.oversize(originalLength + 7, 1)]; + } + final int decompressedLength = + LZ4.decompress( + EndiannessReverserUtil.wrapDataInput(in), offset + length, bytes.bytes, 0); + if (decompressedLength > originalLength) { + throw new CorruptIndexException( + "Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength, in); + } + bytes.offset = offset; + bytes.length = length; + } + + @Override + public Decompressor clone() { + return this; + } + }; } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50TermVectorsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50TermVectorsFormat.java index 7db9fad0702..5a37dc450af 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50TermVectorsFormat.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50TermVectorsFormat.java @@ -20,7 +20,6 @@ import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50Compressin import org.apache.lucene.backward_codecs.lucene87.Lucene87StoredFieldsFormat; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.TermVectorsFormat; -import org.apache.lucene.codecs.compressing.CompressionMode; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.packed.BlockPackedWriter; import org.apache.lucene.util.packed.PackedInts; @@ -154,6 +153,6 @@ public final class Lucene50TermVectorsFormat extends Lucene50CompressingTermVect /** Sole constructor. */ public Lucene50TermVectorsFormat() { - super("Lucene50TermVectorsData", "", CompressionMode.FAST, 1 << 12, 128, 10); + super("Lucene50TermVectorsData", "", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 12, 128, 10); } } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesConsumer.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesConsumer.java index 12d5b770f7d..f9a53311189 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesConsumer.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesConsumer.java @@ -476,7 +476,8 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer { } } maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength); - LZ4.compress(block, 0, uncompressedBlockLength, data, ht); + LZ4.compress( + block, 0, uncompressedBlockLength, EndiannessReverserUtil.wrapDataOutput(data), ht); numDocsInCurrentBlock = 0; // Ensure initialized with zeroes because full array is always written Arrays.fill(docLengths, 0); @@ -847,7 +848,8 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer { int uncompressedLength = bufferedOutput.getPosition(); data.writeVInt(uncompressedLength); long before = data.getFilePointer(); - LZ4.compress(termsDictBuffer, 0, uncompressedLength, data, ht); + LZ4.compress( + termsDictBuffer, 0, uncompressedLength, EndiannessReverserUtil.wrapDataOutput(data), ht); int compressedLength = (int) (data.getFilePointer() - before); // Block length will be used for creating buffer for decompression, one corner case is that // compressed length might be bigger than un-compressed length, so just return the bigger one. diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java index 98b4a8a08ad..118b91a3011 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java @@ -898,7 +898,11 @@ final class Lucene80DocValuesProducer extends DocValuesProducer { } assert uncompressedBlockLength <= uncompressedBlock.length; - LZ4.decompress(compressedData, uncompressedBlockLength, uncompressedBlock, 0); + LZ4.decompress( + EndiannessReverserUtil.wrapDataInput(compressedData), + uncompressedBlockLength, + uncompressedBlock, + 0); } uncompressedBytesRef.offset = uncompressedDocStarts[docInBlockId]; @@ -1355,7 +1359,8 @@ final class Lucene80DocValuesProducer extends DocValuesProducer { if (currentCompressedBlockStart != offset) { int decompressLength = bytes.readVInt(); // Decompress the remaining of current block - LZ4.decompress(bytes, decompressLength, blockBuffer.bytes, 0); + LZ4.decompress( + EndiannessReverserUtil.wrapDataInput(bytes), decompressLength, blockBuffer.bytes, 0); currentCompressedBlockStart = offset; currentCompressedBlockEnd = bytes.getFilePointer(); } else { diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/LZ4WithPresetDictCompressionMode.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/LZ4WithPresetDictCompressionMode.java index 8a16bd736af..70f803c7f2e 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/LZ4WithPresetDictCompressionMode.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/LZ4WithPresetDictCompressionMode.java @@ -17,6 +17,7 @@ package org.apache.lucene.backward_codecs.lucene87; import java.io.IOException; +import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil; import org.apache.lucene.codecs.compressing.CompressionMode; import org.apache.lucene.codecs.compressing.Compressor; import org.apache.lucene.codecs.compressing.Decompressor; @@ -100,7 +101,8 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode { buffer = ArrayUtil.grow(buffer, dictLength + blockLength); bytes.length = 0; // Read the dictionary - if (LZ4.decompress(in, dictLength, buffer, 0) != dictLength) { + if (LZ4.decompress(EndiannessReverserUtil.wrapDataInput(in), dictLength, buffer, 0) + != dictLength) { throw new CorruptIndexException("Illegal dict length", in); } @@ -128,7 +130,8 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode { // Read blocks that intersect with the interval we need while (offsetInBlock < offset + length) { final int bytesToDecompress = Math.min(blockLength, offset + length - offsetInBlock); - LZ4.decompress(in, bytesToDecompress, buffer, dictLength); + LZ4.decompress( + EndiannessReverserUtil.wrapDataInput(in), bytesToDecompress, buffer, dictLength); bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + bytesToDecompress); System.arraycopy(buffer, dictLength, bytes.bytes, bytes.length, bytesToDecompress); bytes.length += bytesToDecompress; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserDataInput.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserDataInput.java index 0d9ccd345fc..c957e826596 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserDataInput.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserDataInput.java @@ -21,7 +21,7 @@ import org.apache.lucene.store.DataInput; class EndiannessReverserDataInput extends DataInput { - private final DataInput in; + final DataInput in; EndiannessReverserDataInput(DataInput in) { this.in = in; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserDataOutput.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserDataOutput.java index c042ed7a8ad..c0cf3206b11 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserDataOutput.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserDataOutput.java @@ -24,7 +24,7 @@ import org.apache.lucene.store.DataOutput; class EndiannessReverserDataOutput extends DataOutput { - private final DataOutput out; + final DataOutput out; EndiannessReverserDataOutput(DataOutput out) { this.out = out; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexInput.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexInput.java index 1bfc3c08a9b..b02232115c9 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexInput.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexInput.java @@ -25,7 +25,7 @@ import org.apache.lucene.store.RandomAccessInput; /** A {@link IndexInput} wrapper that changes the endianness of the provided index input. */ final class EndiannessReverserIndexInput extends IndexInput { - private final IndexInput in; + final IndexInput in; EndiannessReverserIndexInput(IndexInput in) { super("Endianness reverser Index Input wrapper"); diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexOutput.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexOutput.java index 53b65a1aff4..7da9909da70 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexOutput.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexOutput.java @@ -25,7 +25,7 @@ import org.apache.lucene.store.IndexOutput; /** A {@link IndexOutput} wrapper that changes the endianness of the provided index output. */ final class EndiannessReverserIndexOutput extends IndexOutput { - private final IndexOutput out; + final IndexOutput out; EndiannessReverserIndexOutput(IndexOutput out) { super("Endianness reverser Index Output wrapper", out.getName()); diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserUtil.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserUtil.java index 4a4829a1b99..44e3c217ccd 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserUtil.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserUtil.java @@ -62,11 +62,23 @@ public final class EndiannessReverserUtil { /** wraps a data output */ public static DataOutput wrapDataOutput(DataOutput dataOutput) { + if (dataOutput instanceof EndiannessReverserDataOutput) { + return ((EndiannessReverserDataOutput) dataOutput).out; + } + if (dataOutput instanceof EndiannessReverserIndexOutput) { + return ((EndiannessReverserIndexOutput) dataOutput).out; + } return new EndiannessReverserDataOutput(dataOutput); } /** wraps a data input */ public static DataInput wrapDataInput(DataInput dataInput) { + if (dataInput instanceof EndiannessReverserDataInput) { + return ((EndiannessReverserDataInput) dataInput).in; + } + if (dataInput instanceof EndiannessReverserIndexInput) { + return ((EndiannessReverserIndexInput) dataInput).in; + } return new EndiannessReverserDataInput(dataInput); } } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWStoredFieldsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWStoredFieldsFormat.java index 9f8fdae1534..21f864ad3cf 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWStoredFieldsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWStoredFieldsFormat.java @@ -61,7 +61,7 @@ public final class Lucene50RWStoredFieldsFormat extends Lucene50StoredFieldsForm switch (mode) { case BEST_SPEED: return new Lucene50RWCompressingStoredFieldsFormat( - "Lucene50StoredFieldsFastData", CompressionMode.FAST, 1 << 14, 128, 10); + "Lucene50StoredFieldsFastData", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 14, 128, 10); case BEST_COMPRESSION: return new Lucene50RWCompressingStoredFieldsFormat( "Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10); diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWTermVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWTermVectorsFormat.java index 5155489479d..d711a3f267c 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWTermVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/Lucene50RWTermVectorsFormat.java @@ -17,13 +17,12 @@ package org.apache.lucene.backward_codecs.lucene50; import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50RWCompressingTermVectorsFormat; -import org.apache.lucene.codecs.compressing.CompressionMode; /** RW impersonation of Lucene50StoredFieldsFormat. */ public final class Lucene50RWTermVectorsFormat extends Lucene50RWCompressingTermVectorsFormat { /** Sole constructor. */ public Lucene50RWTermVectorsFormat() { - super("Lucene50TermVectorsData", "", CompressionMode.FAST, 1 << 12, 128, 10); + super("Lucene50TermVectorsData", "", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 12, 128, 10); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/ByteSliceReader.java b/lucene/core/src/java/org/apache/lucene/index/ByteSliceReader.java index 857388a0a8e..cf0d6fc211c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ByteSliceReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/ByteSliceReader.java @@ -95,7 +95,7 @@ final class ByteSliceReader extends DataInput { public void nextSlice() { // Skip to our next slice - final int nextIndex = (int) BitUtil.VH_BE_INT.get(buffer, limit); + final int nextIndex = (int) BitUtil.VH_LE_INT.get(buffer, limit); level = ByteBlockPool.NEXT_LEVEL_ARRAY[level]; final int newSize = ByteBlockPool.LEVEL_SIZE_ARRAY[level]; diff --git a/lucene/core/src/java/org/apache/lucene/index/ByteSliceWriter.java b/lucene/core/src/java/org/apache/lucene/index/ByteSliceWriter.java deleted file mode 100644 index 10b370cfc55..00000000000 --- a/lucene/core/src/java/org/apache/lucene/index/ByteSliceWriter.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.index; - -import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK; - -import org.apache.lucene.store.DataOutput; -import org.apache.lucene.util.ByteBlockPool; - -/** - * Class to write byte streams into slices of shared byte[]. This is used by DocumentsWriter to hold - * the posting list for many terms in RAM. - */ -final class ByteSliceWriter extends DataOutput { - - /* Initial chunks size of the shared byte[] blocks used to - store postings data */ - private static final int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK; - - private byte[] slice; - private int upto; - private final ByteBlockPool pool; - - int offset0; - - public ByteSliceWriter(ByteBlockPool pool) { - this.pool = pool; - } - - /** Set up the writer to write at address. */ - public void init(int address) { - slice = pool.buffers[address >> ByteBlockPool.BYTE_BLOCK_SHIFT]; - assert slice != null; - upto = address & ByteBlockPool.BYTE_BLOCK_MASK; - offset0 = address; - assert upto < slice.length; - } - - /** Write byte into byte slice stream */ - @Override - public void writeByte(byte b) { - assert slice != null; - if (slice[upto] != 0) { - upto = pool.allocSlice(slice, upto); - slice = pool.buffer; - offset0 = pool.byteOffset; - assert slice != null; - } - slice[upto++] = b; - assert upto != slice.length; - } - - @Override - public void writeBytes(final byte[] b, int offset, final int len) { - final int offsetEnd = offset + len; - while (offset < offsetEnd) { - if (slice[upto] != 0) { - // End marker - upto = pool.allocSlice(slice, upto); - slice = pool.buffer; - offset0 = pool.byteOffset; - } - - slice[upto++] = b[offset++]; - assert upto != slice.length; - } - } - - public int getAddress() { - return upto + (offset0 & BYTE_BLOCK_NOT_MASK); - } -} diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java index 4b8c14635b6..5bf5ffc9a79 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -254,17 +254,16 @@ public final class ByteBlockPool implements Accountable { final int offset = newUpto + byteOffset; byteUpto += newSize; - // Copy forward the past 3 bytes (which we are about - // to overwrite with the forwarding address): - buffer[newUpto] = slice[upto - 3]; - buffer[newUpto + 1] = slice[upto - 2]; - buffer[newUpto + 2] = slice[upto - 1]; + // Copy forward the past 3 bytes (which we are about to overwrite with the forwarding address). + // We actually copy 4 bytes at once since VarHandles make it cheap. + int past3Bytes = ((int) BitUtil.VH_LE_INT.get(slice, upto - 3)) & 0xFFFFFF; + // Ensure we're not changing the content of `buffer` by setting 4 bytes instead of 3. This + // should never happen since the next `newSize` bytes must be equal to 0. + assert buffer[newUpto + 3] == 0; + BitUtil.VH_LE_INT.set(buffer, newUpto, past3Bytes); // Write forwarding address at end of last slice: - slice[upto - 3] = (byte) (offset >>> 24); - slice[upto - 2] = (byte) (offset >>> 16); - slice[upto - 1] = (byte) (offset >>> 8); - slice[upto] = (byte) offset; + BitUtil.VH_LE_INT.set(slice, upto - 3, offset); // Write new level: buffer[byteUpto - 1] = (byte) (16 | newLevel); @@ -308,7 +307,7 @@ public final class ByteBlockPool implements Accountable { term.offset = pos + 1; } else { // length is 2 bytes - term.length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7); + term.length = ((short) BitUtil.VH_BE_SHORT.get(bytes, pos)) & 0x7FFF; term.offset = pos + 2; } assert term.length >= 0; diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java index e82bf130d8f..f70f79e8a4d 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java @@ -181,7 +181,7 @@ public final class BytesRefHash implements Accountable { offset = pos + 1; } else { // length is 2 bytes - length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7); + length = ((short) BitUtil.VH_BE_SHORT.get(bytes, pos)) & 0x7FFF; offset = pos + 2; } return Arrays.equals(bytes, offset, offset + length, b.bytes, b.offset, b.offset + b.length); @@ -282,8 +282,7 @@ public final class BytesRefHash implements Accountable { System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1, length); } else { // 2 byte to store length - buffer[bufferUpto] = (byte) (0x80 | (length & 0x7f)); - buffer[bufferUpto + 1] = (byte) ((length >> 7) & 0xff); + BitUtil.VH_BE_SHORT.set(buffer, bufferUpto, (short) (length | 0x8000)); pool.byteUpto += length + 2; System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 2, length); } @@ -392,7 +391,7 @@ public final class BytesRefHash implements Accountable { len = bytes[start]; pos = start + 1; } else { - len = (bytes[start] & 0x7f) + ((bytes[start + 1] & 0xff) << 7); + len = ((short) BitUtil.VH_BE_SHORT.get(bytes, start)) & 0x7FFF; pos = start + 2; } code = doHash(bytes, pos, len); diff --git a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java index d67c9168edb..134fa11560a 100644 --- a/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java +++ b/lucene/core/src/java/org/apache/lucene/util/PagedBytes.java @@ -128,7 +128,7 @@ public final class PagedBytes implements Accountable { b.length = block[offset]; b.offset = offset + 1; } else { - b.length = ((block[offset] & 0x7f) << 8) | (block[1 + offset] & 0xff); + b.length = ((short) BitUtil.VH_BE_SHORT.get(block, offset)) & 0x7FFF; b.offset = offset + 2; assert b.length > 0; } @@ -285,8 +285,8 @@ public final class PagedBytes implements Accountable { if (bytes.length < 128) { currentBlock[upto++] = (byte) bytes.length; } else { - currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8)); - currentBlock[upto++] = (byte) (bytes.length & 0xff); + BitUtil.VH_BE_SHORT.set(currentBlock, upto, (short) (bytes.length | 0x8000)); + upto += 2; } System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length); upto += bytes.length; diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java index fe26d37029b..84dc99c79a6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java @@ -64,11 +64,8 @@ public final class OfflinePointWriter implements PointWriter { + "]"; out.writeBytes(packedValue, 0, packedValue.length); - // write bytes for comparing in lexicographically order - out.writeByte((byte) (docID >> 24)); - out.writeByte((byte) (docID >> 16)); - out.writeByte((byte) (docID >> 8)); - out.writeByte((byte) docID); + // write bytes in big-endian order for comparing in lexicographically order + out.writeInt(Integer.reverseBytes(docID)); count++; assert expectedCount == 0 || count <= expectedCount : "expectedCount=" + expectedCount + " vs count=" + count; diff --git a/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java b/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java index a4ff902d660..0deb228356d 100644 --- a/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java +++ b/lucene/core/src/java/org/apache/lucene/util/compress/LZ4.java @@ -107,7 +107,7 @@ public final class LZ4 { } // matchs - final int matchDec = (compressed.readByte() & 0xFF) | ((compressed.readByte() & 0xFF) << 8); + final int matchDec = compressed.readShort() & 0xFFFF; assert matchDec > 0; int matchLen = token & 0x0F; @@ -176,8 +176,7 @@ public final class LZ4 { // encode match dec final int matchDec = matchOff - matchRef; assert matchDec > 0 && matchDec < 1 << 16; - out.writeByte((byte) matchDec); - out.writeByte((byte) (matchDec >>> 8)); + out.writeShort((short) matchDec); // encode match len if (matchLen >= MIN_MATCH + 0x0F) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestByteSlices.java b/lucene/core/src/test/org/apache/lucene/index/TestByteSlices.java deleted file mode 100644 index 3488387b1b5..00000000000 --- a/lucene/core/src/test/org/apache/lucene/index/TestByteSlices.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.index; - -import java.util.Random; -import org.apache.lucene.util.ByteBlockPool; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.RecyclingByteBlockAllocator; - -public class TestByteSlices extends LuceneTestCase { - - public void testBasic() throws Throwable { - Random random = random(); - ByteBlockPool pool = - new ByteBlockPool( - new RecyclingByteBlockAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, random().nextInt(100))); - - final int NUM_STREAM = atLeast(random, 100); - - ByteSliceWriter writer = new ByteSliceWriter(pool); - - int[] starts = new int[NUM_STREAM]; - int[] uptos = new int[NUM_STREAM]; - int[] counters = new int[NUM_STREAM]; - - ByteSliceReader reader = new ByteSliceReader(); - - for (int ti = 0; ti < 100; ti++) { - - for (int stream = 0; stream < NUM_STREAM; stream++) { - starts[stream] = -1; - counters[stream] = 0; - } - - int num = atLeast(random, 3000); - for (int iter = 0; iter < num; iter++) { - int stream; - if (random.nextBoolean()) { - stream = random.nextInt(3); - } else { - stream = random.nextInt(NUM_STREAM); - } - - if (VERBOSE) { - System.out.println("write stream=" + stream); - } - - if (starts[stream] == -1) { - final int spot = pool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE); - starts[stream] = uptos[stream] = spot + pool.byteOffset; - if (VERBOSE) { - System.out.println(" init to " + starts[stream]); - } - } - - writer.init(uptos[stream]); - int numValue; - if (random.nextInt(10) == 3) { - numValue = random.nextInt(100); - } else if (random.nextInt(5) == 3) { - numValue = random.nextInt(3); - } else { - numValue = random.nextInt(20); - } - - for (int j = 0; j < numValue; j++) { - if (VERBOSE) { - System.out.println(" write " + (counters[stream] + j)); - } - // write some large (incl. negative) ints: - writer.writeVInt(random.nextInt()); - writer.writeVInt(counters[stream] + j); - } - counters[stream] += numValue; - uptos[stream] = writer.getAddress(); - if (VERBOSE) System.out.println(" addr now " + uptos[stream]); - } - - for (int stream = 0; stream < NUM_STREAM; stream++) { - if (VERBOSE) System.out.println(" stream=" + stream + " count=" + counters[stream]); - - if (starts[stream] != -1 && starts[stream] != uptos[stream]) { - reader.init(pool, starts[stream], uptos[stream]); - for (int j = 0; j < counters[stream]; j++) { - reader.readVInt(); - assertEquals(j, reader.readVInt()); - } - } - } - - pool.reset(); - } - } -} diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java index 9cf9db3f854..c76824bc92a 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java @@ -185,10 +185,7 @@ public class FSTCompletionLookup extends Lookup { output.reset(buffer); final int encodedWeight = encodeWeight(iterator.weight()); // write bytes for comparing in lexicographically order - output.writeByte((byte) (encodedWeight >> 24)); - output.writeByte((byte) (encodedWeight >> 16)); - output.writeByte((byte) (encodedWeight >> 8)); - output.writeByte((byte) encodedWeight); + output.writeInt(Integer.reverseBytes(encodedWeight)); output.writeBytes(spare.bytes, spare.offset, spare.length); writer.write(buffer, 0, output.getPosition()); inputLineCount++;