mirror of https://github.com/apache/lucene.git
LUCENE-10193: Cut over more array access to VarHandles. (#402)
LZ4 is interesting because it used to read data in little-endian order even though Directory APIs were big endian. So most calls to LZ4 in backward-codecs have been changed to change the endianness of the input/output.
This commit is contained in:
parent
6bb2bbcd6a
commit
0e1f9fcf31
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.backward_codecs.lucene40.blocktree;
|
package org.apache.lucene.backward_codecs.lucene40.blocktree;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
|
import org.apache.lucene.util.compress.LowercaseAsciiCompression;
|
||||||
|
|
||||||
|
@ -42,7 +43,8 @@ enum CompressionAlgorithm {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void read(DataInput in, byte[] out, int len) throws IOException {
|
void read(DataInput in, byte[] out, int len) throws IOException {
|
||||||
org.apache.lucene.util.compress.LZ4.decompress(in, len, out, 0);
|
org.apache.lucene.util.compress.LZ4.decompress(
|
||||||
|
EndiannessReverserUtil.wrapDataInput(in), len, out, 0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -20,15 +20,24 @@ import java.io.IOException;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50CompressingStoredFieldsFormat;
|
import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50CompressingStoredFieldsFormat;
|
||||||
import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicWriter;
|
import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicWriter;
|
||||||
|
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||||
import org.apache.lucene.codecs.compressing.CompressionMode;
|
import org.apache.lucene.codecs.compressing.CompressionMode;
|
||||||
|
import org.apache.lucene.codecs.compressing.Compressor;
|
||||||
|
import org.apache.lucene.codecs.compressing.Decompressor;
|
||||||
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.StoredFieldVisitor;
|
import org.apache.lucene.index.StoredFieldVisitor;
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.compress.LZ4;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 5.0 stored fields format.
|
* Lucene 5.0 stored fields format.
|
||||||
|
@ -148,7 +157,7 @@ public class Lucene50StoredFieldsFormat extends StoredFieldsFormat {
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case BEST_SPEED:
|
case BEST_SPEED:
|
||||||
return new Lucene50CompressingStoredFieldsFormat(
|
return new Lucene50CompressingStoredFieldsFormat(
|
||||||
"Lucene50StoredFieldsFastData", CompressionMode.FAST, 1 << 14, 128, 10);
|
"Lucene50StoredFieldsFastData", FAST_MODE, 1 << 14, 128, 10);
|
||||||
case BEST_COMPRESSION:
|
case BEST_COMPRESSION:
|
||||||
return new Lucene50CompressingStoredFieldsFormat(
|
return new Lucene50CompressingStoredFieldsFormat(
|
||||||
"Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10);
|
"Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10);
|
||||||
|
@ -156,4 +165,71 @@ public class Lucene50StoredFieldsFormat extends StoredFieldsFormat {
|
||||||
throw new AssertionError();
|
throw new AssertionError();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static final CompressionMode FAST_MODE =
|
||||||
|
new CompressionMode() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Compressor newCompressor() {
|
||||||
|
return new LZ4FastCompressor();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Decompressor newDecompressor() {
|
||||||
|
return LZ4_DECOMPRESSOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "FAST";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
private static final class LZ4FastCompressor extends Compressor {
|
||||||
|
|
||||||
|
private final LZ4.FastCompressionHashTable ht;
|
||||||
|
|
||||||
|
LZ4FastCompressor() {
|
||||||
|
ht = new LZ4.FastCompressionHashTable();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
|
||||||
|
LZ4.compress(bytes, off, len, EndiannessReverserUtil.wrapDataOutput(out), ht);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
// no-op
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final Decompressor LZ4_DECOMPRESSOR =
|
||||||
|
new Decompressor() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void decompress(
|
||||||
|
DataInput in, int originalLength, int offset, int length, BytesRef bytes)
|
||||||
|
throws IOException {
|
||||||
|
assert offset + length <= originalLength;
|
||||||
|
// add 7 padding bytes, this is not necessary but can help decompression run faster
|
||||||
|
if (bytes.bytes.length < originalLength + 7) {
|
||||||
|
bytes.bytes = new byte[ArrayUtil.oversize(originalLength + 7, 1)];
|
||||||
|
}
|
||||||
|
final int decompressedLength =
|
||||||
|
LZ4.decompress(
|
||||||
|
EndiannessReverserUtil.wrapDataInput(in), offset + length, bytes.bytes, 0);
|
||||||
|
if (decompressedLength > originalLength) {
|
||||||
|
throw new CorruptIndexException(
|
||||||
|
"Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength, in);
|
||||||
|
}
|
||||||
|
bytes.offset = offset;
|
||||||
|
bytes.length = length;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Decompressor clone() {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,6 @@ import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50Compressin
|
||||||
import org.apache.lucene.backward_codecs.lucene87.Lucene87StoredFieldsFormat;
|
import org.apache.lucene.backward_codecs.lucene87.Lucene87StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.compressing.CompressionMode;
|
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.util.packed.BlockPackedWriter;
|
import org.apache.lucene.util.packed.BlockPackedWriter;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
@ -154,6 +153,6 @@ public final class Lucene50TermVectorsFormat extends Lucene50CompressingTermVect
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene50TermVectorsFormat() {
|
public Lucene50TermVectorsFormat() {
|
||||||
super("Lucene50TermVectorsData", "", CompressionMode.FAST, 1 << 12, 128, 10);
|
super("Lucene50TermVectorsData", "", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 12, 128, 10);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -476,7 +476,8 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength);
|
maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength);
|
||||||
LZ4.compress(block, 0, uncompressedBlockLength, data, ht);
|
LZ4.compress(
|
||||||
|
block, 0, uncompressedBlockLength, EndiannessReverserUtil.wrapDataOutput(data), ht);
|
||||||
numDocsInCurrentBlock = 0;
|
numDocsInCurrentBlock = 0;
|
||||||
// Ensure initialized with zeroes because full array is always written
|
// Ensure initialized with zeroes because full array is always written
|
||||||
Arrays.fill(docLengths, 0);
|
Arrays.fill(docLengths, 0);
|
||||||
|
@ -847,7 +848,8 @@ final class Lucene80DocValuesConsumer extends DocValuesConsumer {
|
||||||
int uncompressedLength = bufferedOutput.getPosition();
|
int uncompressedLength = bufferedOutput.getPosition();
|
||||||
data.writeVInt(uncompressedLength);
|
data.writeVInt(uncompressedLength);
|
||||||
long before = data.getFilePointer();
|
long before = data.getFilePointer();
|
||||||
LZ4.compress(termsDictBuffer, 0, uncompressedLength, data, ht);
|
LZ4.compress(
|
||||||
|
termsDictBuffer, 0, uncompressedLength, EndiannessReverserUtil.wrapDataOutput(data), ht);
|
||||||
int compressedLength = (int) (data.getFilePointer() - before);
|
int compressedLength = (int) (data.getFilePointer() - before);
|
||||||
// Block length will be used for creating buffer for decompression, one corner case is that
|
// Block length will be used for creating buffer for decompression, one corner case is that
|
||||||
// compressed length might be bigger than un-compressed length, so just return the bigger one.
|
// compressed length might be bigger than un-compressed length, so just return the bigger one.
|
||||||
|
|
|
@ -898,7 +898,11 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
||||||
}
|
}
|
||||||
|
|
||||||
assert uncompressedBlockLength <= uncompressedBlock.length;
|
assert uncompressedBlockLength <= uncompressedBlock.length;
|
||||||
LZ4.decompress(compressedData, uncompressedBlockLength, uncompressedBlock, 0);
|
LZ4.decompress(
|
||||||
|
EndiannessReverserUtil.wrapDataInput(compressedData),
|
||||||
|
uncompressedBlockLength,
|
||||||
|
uncompressedBlock,
|
||||||
|
0);
|
||||||
}
|
}
|
||||||
|
|
||||||
uncompressedBytesRef.offset = uncompressedDocStarts[docInBlockId];
|
uncompressedBytesRef.offset = uncompressedDocStarts[docInBlockId];
|
||||||
|
@ -1355,7 +1359,8 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
||||||
if (currentCompressedBlockStart != offset) {
|
if (currentCompressedBlockStart != offset) {
|
||||||
int decompressLength = bytes.readVInt();
|
int decompressLength = bytes.readVInt();
|
||||||
// Decompress the remaining of current block
|
// Decompress the remaining of current block
|
||||||
LZ4.decompress(bytes, decompressLength, blockBuffer.bytes, 0);
|
LZ4.decompress(
|
||||||
|
EndiannessReverserUtil.wrapDataInput(bytes), decompressLength, blockBuffer.bytes, 0);
|
||||||
currentCompressedBlockStart = offset;
|
currentCompressedBlockStart = offset;
|
||||||
currentCompressedBlockEnd = bytes.getFilePointer();
|
currentCompressedBlockEnd = bytes.getFilePointer();
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.backward_codecs.lucene87;
|
package org.apache.lucene.backward_codecs.lucene87;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
|
||||||
import org.apache.lucene.codecs.compressing.CompressionMode;
|
import org.apache.lucene.codecs.compressing.CompressionMode;
|
||||||
import org.apache.lucene.codecs.compressing.Compressor;
|
import org.apache.lucene.codecs.compressing.Compressor;
|
||||||
import org.apache.lucene.codecs.compressing.Decompressor;
|
import org.apache.lucene.codecs.compressing.Decompressor;
|
||||||
|
@ -100,7 +101,8 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
|
||||||
buffer = ArrayUtil.grow(buffer, dictLength + blockLength);
|
buffer = ArrayUtil.grow(buffer, dictLength + blockLength);
|
||||||
bytes.length = 0;
|
bytes.length = 0;
|
||||||
// Read the dictionary
|
// Read the dictionary
|
||||||
if (LZ4.decompress(in, dictLength, buffer, 0) != dictLength) {
|
if (LZ4.decompress(EndiannessReverserUtil.wrapDataInput(in), dictLength, buffer, 0)
|
||||||
|
!= dictLength) {
|
||||||
throw new CorruptIndexException("Illegal dict length", in);
|
throw new CorruptIndexException("Illegal dict length", in);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -128,7 +130,8 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
|
||||||
// Read blocks that intersect with the interval we need
|
// Read blocks that intersect with the interval we need
|
||||||
while (offsetInBlock < offset + length) {
|
while (offsetInBlock < offset + length) {
|
||||||
final int bytesToDecompress = Math.min(blockLength, offset + length - offsetInBlock);
|
final int bytesToDecompress = Math.min(blockLength, offset + length - offsetInBlock);
|
||||||
LZ4.decompress(in, bytesToDecompress, buffer, dictLength);
|
LZ4.decompress(
|
||||||
|
EndiannessReverserUtil.wrapDataInput(in), bytesToDecompress, buffer, dictLength);
|
||||||
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + bytesToDecompress);
|
bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + bytesToDecompress);
|
||||||
System.arraycopy(buffer, dictLength, bytes.bytes, bytes.length, bytesToDecompress);
|
System.arraycopy(buffer, dictLength, bytes.bytes, bytes.length, bytesToDecompress);
|
||||||
bytes.length += bytesToDecompress;
|
bytes.length += bytesToDecompress;
|
||||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.store.DataInput;
|
||||||
|
|
||||||
class EndiannessReverserDataInput extends DataInput {
|
class EndiannessReverserDataInput extends DataInput {
|
||||||
|
|
||||||
private final DataInput in;
|
final DataInput in;
|
||||||
|
|
||||||
EndiannessReverserDataInput(DataInput in) {
|
EndiannessReverserDataInput(DataInput in) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.store.DataOutput;
|
||||||
|
|
||||||
class EndiannessReverserDataOutput extends DataOutput {
|
class EndiannessReverserDataOutput extends DataOutput {
|
||||||
|
|
||||||
private final DataOutput out;
|
final DataOutput out;
|
||||||
|
|
||||||
EndiannessReverserDataOutput(DataOutput out) {
|
EndiannessReverserDataOutput(DataOutput out) {
|
||||||
this.out = out;
|
this.out = out;
|
||||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.store.RandomAccessInput;
|
||||||
/** A {@link IndexInput} wrapper that changes the endianness of the provided index input. */
|
/** A {@link IndexInput} wrapper that changes the endianness of the provided index input. */
|
||||||
final class EndiannessReverserIndexInput extends IndexInput {
|
final class EndiannessReverserIndexInput extends IndexInput {
|
||||||
|
|
||||||
private final IndexInput in;
|
final IndexInput in;
|
||||||
|
|
||||||
EndiannessReverserIndexInput(IndexInput in) {
|
EndiannessReverserIndexInput(IndexInput in) {
|
||||||
super("Endianness reverser Index Input wrapper");
|
super("Endianness reverser Index Input wrapper");
|
||||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.store.IndexOutput;
|
||||||
/** A {@link IndexOutput} wrapper that changes the endianness of the provided index output. */
|
/** A {@link IndexOutput} wrapper that changes the endianness of the provided index output. */
|
||||||
final class EndiannessReverserIndexOutput extends IndexOutput {
|
final class EndiannessReverserIndexOutput extends IndexOutput {
|
||||||
|
|
||||||
private final IndexOutput out;
|
final IndexOutput out;
|
||||||
|
|
||||||
EndiannessReverserIndexOutput(IndexOutput out) {
|
EndiannessReverserIndexOutput(IndexOutput out) {
|
||||||
super("Endianness reverser Index Output wrapper", out.getName());
|
super("Endianness reverser Index Output wrapper", out.getName());
|
||||||
|
|
|
@ -62,11 +62,23 @@ public final class EndiannessReverserUtil {
|
||||||
|
|
||||||
/** wraps a data output */
|
/** wraps a data output */
|
||||||
public static DataOutput wrapDataOutput(DataOutput dataOutput) {
|
public static DataOutput wrapDataOutput(DataOutput dataOutput) {
|
||||||
|
if (dataOutput instanceof EndiannessReverserDataOutput) {
|
||||||
|
return ((EndiannessReverserDataOutput) dataOutput).out;
|
||||||
|
}
|
||||||
|
if (dataOutput instanceof EndiannessReverserIndexOutput) {
|
||||||
|
return ((EndiannessReverserIndexOutput) dataOutput).out;
|
||||||
|
}
|
||||||
return new EndiannessReverserDataOutput(dataOutput);
|
return new EndiannessReverserDataOutput(dataOutput);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** wraps a data input */
|
/** wraps a data input */
|
||||||
public static DataInput wrapDataInput(DataInput dataInput) {
|
public static DataInput wrapDataInput(DataInput dataInput) {
|
||||||
|
if (dataInput instanceof EndiannessReverserDataInput) {
|
||||||
|
return ((EndiannessReverserDataInput) dataInput).in;
|
||||||
|
}
|
||||||
|
if (dataInput instanceof EndiannessReverserIndexInput) {
|
||||||
|
return ((EndiannessReverserIndexInput) dataInput).in;
|
||||||
|
}
|
||||||
return new EndiannessReverserDataInput(dataInput);
|
return new EndiannessReverserDataInput(dataInput);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,7 @@ public final class Lucene50RWStoredFieldsFormat extends Lucene50StoredFieldsForm
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case BEST_SPEED:
|
case BEST_SPEED:
|
||||||
return new Lucene50RWCompressingStoredFieldsFormat(
|
return new Lucene50RWCompressingStoredFieldsFormat(
|
||||||
"Lucene50StoredFieldsFastData", CompressionMode.FAST, 1 << 14, 128, 10);
|
"Lucene50StoredFieldsFastData", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 14, 128, 10);
|
||||||
case BEST_COMPRESSION:
|
case BEST_COMPRESSION:
|
||||||
return new Lucene50RWCompressingStoredFieldsFormat(
|
return new Lucene50RWCompressingStoredFieldsFormat(
|
||||||
"Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10);
|
"Lucene50StoredFieldsHighData", CompressionMode.HIGH_COMPRESSION, 61440, 512, 10);
|
||||||
|
|
|
@ -17,13 +17,12 @@
|
||||||
package org.apache.lucene.backward_codecs.lucene50;
|
package org.apache.lucene.backward_codecs.lucene50;
|
||||||
|
|
||||||
import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50RWCompressingTermVectorsFormat;
|
import org.apache.lucene.backward_codecs.lucene50.compressing.Lucene50RWCompressingTermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.compressing.CompressionMode;
|
|
||||||
|
|
||||||
/** RW impersonation of Lucene50StoredFieldsFormat. */
|
/** RW impersonation of Lucene50StoredFieldsFormat. */
|
||||||
public final class Lucene50RWTermVectorsFormat extends Lucene50RWCompressingTermVectorsFormat {
|
public final class Lucene50RWTermVectorsFormat extends Lucene50RWCompressingTermVectorsFormat {
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene50RWTermVectorsFormat() {
|
public Lucene50RWTermVectorsFormat() {
|
||||||
super("Lucene50TermVectorsData", "", CompressionMode.FAST, 1 << 12, 128, 10);
|
super("Lucene50TermVectorsData", "", Lucene50StoredFieldsFormat.FAST_MODE, 1 << 12, 128, 10);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -95,7 +95,7 @@ final class ByteSliceReader extends DataInput {
|
||||||
public void nextSlice() {
|
public void nextSlice() {
|
||||||
|
|
||||||
// Skip to our next slice
|
// Skip to our next slice
|
||||||
final int nextIndex = (int) BitUtil.VH_BE_INT.get(buffer, limit);
|
final int nextIndex = (int) BitUtil.VH_LE_INT.get(buffer, limit);
|
||||||
|
|
||||||
level = ByteBlockPool.NEXT_LEVEL_ARRAY[level];
|
level = ByteBlockPool.NEXT_LEVEL_ARRAY[level];
|
||||||
final int newSize = ByteBlockPool.LEVEL_SIZE_ARRAY[level];
|
final int newSize = ByteBlockPool.LEVEL_SIZE_ARRAY[level];
|
||||||
|
|
|
@ -1,86 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.index;
|
|
||||||
|
|
||||||
import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
|
|
||||||
|
|
||||||
import org.apache.lucene.store.DataOutput;
|
|
||||||
import org.apache.lucene.util.ByteBlockPool;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Class to write byte streams into slices of shared byte[]. This is used by DocumentsWriter to hold
|
|
||||||
* the posting list for many terms in RAM.
|
|
||||||
*/
|
|
||||||
final class ByteSliceWriter extends DataOutput {
|
|
||||||
|
|
||||||
/* Initial chunks size of the shared byte[] blocks used to
|
|
||||||
store postings data */
|
|
||||||
private static final int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK;
|
|
||||||
|
|
||||||
private byte[] slice;
|
|
||||||
private int upto;
|
|
||||||
private final ByteBlockPool pool;
|
|
||||||
|
|
||||||
int offset0;
|
|
||||||
|
|
||||||
public ByteSliceWriter(ByteBlockPool pool) {
|
|
||||||
this.pool = pool;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Set up the writer to write at address. */
|
|
||||||
public void init(int address) {
|
|
||||||
slice = pool.buffers[address >> ByteBlockPool.BYTE_BLOCK_SHIFT];
|
|
||||||
assert slice != null;
|
|
||||||
upto = address & ByteBlockPool.BYTE_BLOCK_MASK;
|
|
||||||
offset0 = address;
|
|
||||||
assert upto < slice.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Write byte into byte slice stream */
|
|
||||||
@Override
|
|
||||||
public void writeByte(byte b) {
|
|
||||||
assert slice != null;
|
|
||||||
if (slice[upto] != 0) {
|
|
||||||
upto = pool.allocSlice(slice, upto);
|
|
||||||
slice = pool.buffer;
|
|
||||||
offset0 = pool.byteOffset;
|
|
||||||
assert slice != null;
|
|
||||||
}
|
|
||||||
slice[upto++] = b;
|
|
||||||
assert upto != slice.length;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void writeBytes(final byte[] b, int offset, final int len) {
|
|
||||||
final int offsetEnd = offset + len;
|
|
||||||
while (offset < offsetEnd) {
|
|
||||||
if (slice[upto] != 0) {
|
|
||||||
// End marker
|
|
||||||
upto = pool.allocSlice(slice, upto);
|
|
||||||
slice = pool.buffer;
|
|
||||||
offset0 = pool.byteOffset;
|
|
||||||
}
|
|
||||||
|
|
||||||
slice[upto++] = b[offset++];
|
|
||||||
assert upto != slice.length;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getAddress() {
|
|
||||||
return upto + (offset0 & BYTE_BLOCK_NOT_MASK);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -254,17 +254,16 @@ public final class ByteBlockPool implements Accountable {
|
||||||
final int offset = newUpto + byteOffset;
|
final int offset = newUpto + byteOffset;
|
||||||
byteUpto += newSize;
|
byteUpto += newSize;
|
||||||
|
|
||||||
// Copy forward the past 3 bytes (which we are about
|
// Copy forward the past 3 bytes (which we are about to overwrite with the forwarding address).
|
||||||
// to overwrite with the forwarding address):
|
// We actually copy 4 bytes at once since VarHandles make it cheap.
|
||||||
buffer[newUpto] = slice[upto - 3];
|
int past3Bytes = ((int) BitUtil.VH_LE_INT.get(slice, upto - 3)) & 0xFFFFFF;
|
||||||
buffer[newUpto + 1] = slice[upto - 2];
|
// Ensure we're not changing the content of `buffer` by setting 4 bytes instead of 3. This
|
||||||
buffer[newUpto + 2] = slice[upto - 1];
|
// should never happen since the next `newSize` bytes must be equal to 0.
|
||||||
|
assert buffer[newUpto + 3] == 0;
|
||||||
|
BitUtil.VH_LE_INT.set(buffer, newUpto, past3Bytes);
|
||||||
|
|
||||||
// Write forwarding address at end of last slice:
|
// Write forwarding address at end of last slice:
|
||||||
slice[upto - 3] = (byte) (offset >>> 24);
|
BitUtil.VH_LE_INT.set(slice, upto - 3, offset);
|
||||||
slice[upto - 2] = (byte) (offset >>> 16);
|
|
||||||
slice[upto - 1] = (byte) (offset >>> 8);
|
|
||||||
slice[upto] = (byte) offset;
|
|
||||||
|
|
||||||
// Write new level:
|
// Write new level:
|
||||||
buffer[byteUpto - 1] = (byte) (16 | newLevel);
|
buffer[byteUpto - 1] = (byte) (16 | newLevel);
|
||||||
|
@ -308,7 +307,7 @@ public final class ByteBlockPool implements Accountable {
|
||||||
term.offset = pos + 1;
|
term.offset = pos + 1;
|
||||||
} else {
|
} else {
|
||||||
// length is 2 bytes
|
// length is 2 bytes
|
||||||
term.length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7);
|
term.length = ((short) BitUtil.VH_BE_SHORT.get(bytes, pos)) & 0x7FFF;
|
||||||
term.offset = pos + 2;
|
term.offset = pos + 2;
|
||||||
}
|
}
|
||||||
assert term.length >= 0;
|
assert term.length >= 0;
|
||||||
|
|
|
@ -181,7 +181,7 @@ public final class BytesRefHash implements Accountable {
|
||||||
offset = pos + 1;
|
offset = pos + 1;
|
||||||
} else {
|
} else {
|
||||||
// length is 2 bytes
|
// length is 2 bytes
|
||||||
length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7);
|
length = ((short) BitUtil.VH_BE_SHORT.get(bytes, pos)) & 0x7FFF;
|
||||||
offset = pos + 2;
|
offset = pos + 2;
|
||||||
}
|
}
|
||||||
return Arrays.equals(bytes, offset, offset + length, b.bytes, b.offset, b.offset + b.length);
|
return Arrays.equals(bytes, offset, offset + length, b.bytes, b.offset, b.offset + b.length);
|
||||||
|
@ -282,8 +282,7 @@ public final class BytesRefHash implements Accountable {
|
||||||
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1, length);
|
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1, length);
|
||||||
} else {
|
} else {
|
||||||
// 2 byte to store length
|
// 2 byte to store length
|
||||||
buffer[bufferUpto] = (byte) (0x80 | (length & 0x7f));
|
BitUtil.VH_BE_SHORT.set(buffer, bufferUpto, (short) (length | 0x8000));
|
||||||
buffer[bufferUpto + 1] = (byte) ((length >> 7) & 0xff);
|
|
||||||
pool.byteUpto += length + 2;
|
pool.byteUpto += length + 2;
|
||||||
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 2, length);
|
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 2, length);
|
||||||
}
|
}
|
||||||
|
@ -392,7 +391,7 @@ public final class BytesRefHash implements Accountable {
|
||||||
len = bytes[start];
|
len = bytes[start];
|
||||||
pos = start + 1;
|
pos = start + 1;
|
||||||
} else {
|
} else {
|
||||||
len = (bytes[start] & 0x7f) + ((bytes[start + 1] & 0xff) << 7);
|
len = ((short) BitUtil.VH_BE_SHORT.get(bytes, start)) & 0x7FFF;
|
||||||
pos = start + 2;
|
pos = start + 2;
|
||||||
}
|
}
|
||||||
code = doHash(bytes, pos, len);
|
code = doHash(bytes, pos, len);
|
||||||
|
|
|
@ -128,7 +128,7 @@ public final class PagedBytes implements Accountable {
|
||||||
b.length = block[offset];
|
b.length = block[offset];
|
||||||
b.offset = offset + 1;
|
b.offset = offset + 1;
|
||||||
} else {
|
} else {
|
||||||
b.length = ((block[offset] & 0x7f) << 8) | (block[1 + offset] & 0xff);
|
b.length = ((short) BitUtil.VH_BE_SHORT.get(block, offset)) & 0x7FFF;
|
||||||
b.offset = offset + 2;
|
b.offset = offset + 2;
|
||||||
assert b.length > 0;
|
assert b.length > 0;
|
||||||
}
|
}
|
||||||
|
@ -285,8 +285,8 @@ public final class PagedBytes implements Accountable {
|
||||||
if (bytes.length < 128) {
|
if (bytes.length < 128) {
|
||||||
currentBlock[upto++] = (byte) bytes.length;
|
currentBlock[upto++] = (byte) bytes.length;
|
||||||
} else {
|
} else {
|
||||||
currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8));
|
BitUtil.VH_BE_SHORT.set(currentBlock, upto, (short) (bytes.length | 0x8000));
|
||||||
currentBlock[upto++] = (byte) (bytes.length & 0xff);
|
upto += 2;
|
||||||
}
|
}
|
||||||
System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
|
System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
|
||||||
upto += bytes.length;
|
upto += bytes.length;
|
||||||
|
|
|
@ -64,11 +64,8 @@ public final class OfflinePointWriter implements PointWriter {
|
||||||
+ "]";
|
+ "]";
|
||||||
|
|
||||||
out.writeBytes(packedValue, 0, packedValue.length);
|
out.writeBytes(packedValue, 0, packedValue.length);
|
||||||
// write bytes for comparing in lexicographically order
|
// write bytes in big-endian order for comparing in lexicographically order
|
||||||
out.writeByte((byte) (docID >> 24));
|
out.writeInt(Integer.reverseBytes(docID));
|
||||||
out.writeByte((byte) (docID >> 16));
|
|
||||||
out.writeByte((byte) (docID >> 8));
|
|
||||||
out.writeByte((byte) docID);
|
|
||||||
count++;
|
count++;
|
||||||
assert expectedCount == 0 || count <= expectedCount
|
assert expectedCount == 0 || count <= expectedCount
|
||||||
: "expectedCount=" + expectedCount + " vs count=" + count;
|
: "expectedCount=" + expectedCount + " vs count=" + count;
|
||||||
|
|
|
@ -107,7 +107,7 @@ public final class LZ4 {
|
||||||
}
|
}
|
||||||
|
|
||||||
// matchs
|
// matchs
|
||||||
final int matchDec = (compressed.readByte() & 0xFF) | ((compressed.readByte() & 0xFF) << 8);
|
final int matchDec = compressed.readShort() & 0xFFFF;
|
||||||
assert matchDec > 0;
|
assert matchDec > 0;
|
||||||
|
|
||||||
int matchLen = token & 0x0F;
|
int matchLen = token & 0x0F;
|
||||||
|
@ -176,8 +176,7 @@ public final class LZ4 {
|
||||||
// encode match dec
|
// encode match dec
|
||||||
final int matchDec = matchOff - matchRef;
|
final int matchDec = matchOff - matchRef;
|
||||||
assert matchDec > 0 && matchDec < 1 << 16;
|
assert matchDec > 0 && matchDec < 1 << 16;
|
||||||
out.writeByte((byte) matchDec);
|
out.writeShort((short) matchDec);
|
||||||
out.writeByte((byte) (matchDec >>> 8));
|
|
||||||
|
|
||||||
// encode match len
|
// encode match len
|
||||||
if (matchLen >= MIN_MATCH + 0x0F) {
|
if (matchLen >= MIN_MATCH + 0x0F) {
|
||||||
|
|
|
@ -1,108 +0,0 @@
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.index;
|
|
||||||
|
|
||||||
import java.util.Random;
|
|
||||||
import org.apache.lucene.util.ByteBlockPool;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
import org.apache.lucene.util.RecyclingByteBlockAllocator;
|
|
||||||
|
|
||||||
public class TestByteSlices extends LuceneTestCase {
|
|
||||||
|
|
||||||
public void testBasic() throws Throwable {
|
|
||||||
Random random = random();
|
|
||||||
ByteBlockPool pool =
|
|
||||||
new ByteBlockPool(
|
|
||||||
new RecyclingByteBlockAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, random().nextInt(100)));
|
|
||||||
|
|
||||||
final int NUM_STREAM = atLeast(random, 100);
|
|
||||||
|
|
||||||
ByteSliceWriter writer = new ByteSliceWriter(pool);
|
|
||||||
|
|
||||||
int[] starts = new int[NUM_STREAM];
|
|
||||||
int[] uptos = new int[NUM_STREAM];
|
|
||||||
int[] counters = new int[NUM_STREAM];
|
|
||||||
|
|
||||||
ByteSliceReader reader = new ByteSliceReader();
|
|
||||||
|
|
||||||
for (int ti = 0; ti < 100; ti++) {
|
|
||||||
|
|
||||||
for (int stream = 0; stream < NUM_STREAM; stream++) {
|
|
||||||
starts[stream] = -1;
|
|
||||||
counters[stream] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int num = atLeast(random, 3000);
|
|
||||||
for (int iter = 0; iter < num; iter++) {
|
|
||||||
int stream;
|
|
||||||
if (random.nextBoolean()) {
|
|
||||||
stream = random.nextInt(3);
|
|
||||||
} else {
|
|
||||||
stream = random.nextInt(NUM_STREAM);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (VERBOSE) {
|
|
||||||
System.out.println("write stream=" + stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (starts[stream] == -1) {
|
|
||||||
final int spot = pool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
|
|
||||||
starts[stream] = uptos[stream] = spot + pool.byteOffset;
|
|
||||||
if (VERBOSE) {
|
|
||||||
System.out.println(" init to " + starts[stream]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
writer.init(uptos[stream]);
|
|
||||||
int numValue;
|
|
||||||
if (random.nextInt(10) == 3) {
|
|
||||||
numValue = random.nextInt(100);
|
|
||||||
} else if (random.nextInt(5) == 3) {
|
|
||||||
numValue = random.nextInt(3);
|
|
||||||
} else {
|
|
||||||
numValue = random.nextInt(20);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int j = 0; j < numValue; j++) {
|
|
||||||
if (VERBOSE) {
|
|
||||||
System.out.println(" write " + (counters[stream] + j));
|
|
||||||
}
|
|
||||||
// write some large (incl. negative) ints:
|
|
||||||
writer.writeVInt(random.nextInt());
|
|
||||||
writer.writeVInt(counters[stream] + j);
|
|
||||||
}
|
|
||||||
counters[stream] += numValue;
|
|
||||||
uptos[stream] = writer.getAddress();
|
|
||||||
if (VERBOSE) System.out.println(" addr now " + uptos[stream]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int stream = 0; stream < NUM_STREAM; stream++) {
|
|
||||||
if (VERBOSE) System.out.println(" stream=" + stream + " count=" + counters[stream]);
|
|
||||||
|
|
||||||
if (starts[stream] != -1 && starts[stream] != uptos[stream]) {
|
|
||||||
reader.init(pool, starts[stream], uptos[stream]);
|
|
||||||
for (int j = 0; j < counters[stream]; j++) {
|
|
||||||
reader.readVInt();
|
|
||||||
assertEquals(j, reader.readVInt());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pool.reset();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -185,10 +185,7 @@ public class FSTCompletionLookup extends Lookup {
|
||||||
output.reset(buffer);
|
output.reset(buffer);
|
||||||
final int encodedWeight = encodeWeight(iterator.weight());
|
final int encodedWeight = encodeWeight(iterator.weight());
|
||||||
// write bytes for comparing in lexicographically order
|
// write bytes for comparing in lexicographically order
|
||||||
output.writeByte((byte) (encodedWeight >> 24));
|
output.writeInt(Integer.reverseBytes(encodedWeight));
|
||||||
output.writeByte((byte) (encodedWeight >> 16));
|
|
||||||
output.writeByte((byte) (encodedWeight >> 8));
|
|
||||||
output.writeByte((byte) encodedWeight);
|
|
||||||
output.writeBytes(spare.bytes, spare.offset, spare.length);
|
output.writeBytes(spare.bytes, spare.offset, spare.length);
|
||||||
writer.write(buffer, 0, output.getPosition());
|
writer.write(buffer, 0, output.getPosition());
|
||||||
inputLineCount++;
|
inputLineCount++;
|
||||||
|
|
Loading…
Reference in New Issue