LUCENE-9907: Remove packedInts#getReaderNoHeader dependency on TermsVectorFieldsFormat (#72)

This commit is contained in:
Ignacio Vera 2021-04-15 16:04:13 +02:00 committed by GitHub
parent d03662c48b
commit 873ac5f162
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 55 additions and 57 deletions

View File

@ -30,6 +30,8 @@ import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingT
import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_START;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.Iterator;
import java.util.NoSuchElementException;
import org.apache.lucene.codecs.CodecUtil;
@ -50,15 +52,21 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteBuffersDataInput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.BlockPackedReaderIterator;
import org.apache.lucene.util.packed.DirectReader;
import org.apache.lucene.util.packed.DirectWriter;
import org.apache.lucene.util.packed.PackedInts;
/**
@ -295,6 +303,13 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
return new Lucene90CompressingTermVectorsReader(this);
}
private static RandomAccessInput slice(IndexInput in) throws IOException {
final int length = in.readVInt();
final byte[] bytes = new byte[length];
in.readBytes(bytes, 0, length);
return new ByteBuffersDataInput(Collections.singletonList(ByteBuffer.wrap(bytes)));
}
@Override
public Fields get(int doc) throws IOException {
ensureOpen();
@ -368,38 +383,25 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
// read field numbers and flags
final int[] fieldNumOffs = new int[numFields];
final PackedInts.Reader flags;
final LongValues flags;
{
final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1);
final PackedInts.Reader allFieldNumOffs =
PackedInts.getReaderNoHeader(
vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
final int bitsPerOff = DirectWriter.bitsRequired(fieldNums.length - 1);
final LongValues allFieldNumOffs = DirectReader.getInstance(slice(vectorsStream), bitsPerOff);
switch (vectorsStream.readVInt()) {
case 0:
final PackedInts.Reader fieldFlags =
PackedInts.getReaderNoHeader(
vectorsStream,
PackedInts.Format.PACKED,
packedIntsVersion,
fieldNums.length,
FLAGS_BITS);
PackedInts.Mutable f = PackedInts.getMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT);
final LongValues fieldFlags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
final ByteBuffersDataOutput out = new ByteBuffersDataOutput();
final DirectWriter writer = DirectWriter.getInstance(out, totalFields, FLAGS_BITS);
for (int i = 0; i < totalFields; ++i) {
final int fieldNumOff = (int) allFieldNumOffs.get(i);
assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length;
final int fgs = (int) fieldFlags.get(fieldNumOff);
f.set(i, fgs);
writer.add(fieldFlags.get(fieldNumOff));
}
flags = f;
writer.finish();
flags = DirectReader.getInstance(out.toDataInput(), FLAGS_BITS);
break;
case 1:
flags =
PackedInts.getReaderNoHeader(
vectorsStream,
PackedInts.Format.PACKED,
packedIntsVersion,
totalFields,
FLAGS_BITS);
flags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
break;
default:
throw new AssertionError();
@ -410,17 +412,11 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
}
// number of terms per field for all fields
final PackedInts.Reader numTerms;
final LongValues numTerms;
final int totalTerms;
{
final int bitsRequired = vectorsStream.readVInt();
numTerms =
PackedInts.getReaderNoHeader(
vectorsStream,
PackedInts.Format.PACKED,
packedIntsVersion,
totalFields,
bitsRequired);
numTerms = DirectReader.getInstance(slice(vectorsStream), bitsRequired);
int sum = 0;
for (int i = 0; i < totalFields; ++i) {
sum += numTerms.get(i);
@ -711,8 +707,7 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
}
// field -> term index -> position index
private int[][] positionIndex(
int skip, int numFields, PackedInts.Reader numTerms, int[] termFreqs) {
private int[][] positionIndex(int skip, int numFields, LongValues numTerms, int[] termFreqs) {
final int[][] positionIndex = new int[numFields][];
int termIndex = 0;
for (int i = 0; i < skip; ++i) {
@ -734,8 +729,8 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
private int[][] readPositions(
int skip,
int numFields,
PackedInts.Reader flags,
PackedInts.Reader numTerms,
LongValues flags,
LongValues numTerms,
int[] termFreqs,
int flag,
final int totalPositions,

View File

@ -51,6 +51,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.BlockPackedWriter;
import org.apache.lucene.util.packed.DirectWriter;
import org.apache.lucene.util.packed.PackedInts;
/**
@ -74,7 +75,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
static final int POSITIONS = 0x01;
static final int OFFSETS = 0x02;
static final int PAYLOADS = 0x04;
static final int FLAGS_BITS = PackedInts.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
static final int FLAGS_BITS = DirectWriter.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
private final String segment;
private FieldsIndexWriter indexWriter;
@ -223,6 +224,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
private final ByteBuffersDataOutput payloadBytes; // buffered term payloads
private final BlockPackedWriter writer;
private final int maxDocsPerChunk; // hard limit on number of docs per chunk
private final ByteBuffersDataOutput scratchBuffer = ByteBuffersDataOutput.newResettableInstance();
/** Sole constructor. */
Lucene90CompressingTermVectorsWriter(
@ -478,13 +480,10 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
}
private void flushFields(int totalFields, int[] fieldNums) throws IOException {
final PackedInts.Writer writer =
PackedInts.getWriterNoHeader(
vectorsStream,
PackedInts.Format.PACKED,
totalFields,
PackedInts.bitsRequired(fieldNums.length - 1),
1);
scratchBuffer.reset();
final DirectWriter writer =
DirectWriter.getInstance(
scratchBuffer, totalFields, DirectWriter.bitsRequired(fieldNums.length - 1));
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum);
@ -493,6 +492,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
}
}
writer.finish();
vectorsStream.writeVLong(scratchBuffer.size());
scratchBuffer.copyTo(vectorsStream);
}
private void flushFlags(int totalFields, int[] fieldNums) throws IOException {
@ -517,28 +518,29 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
if (nonChangingFlags) {
// write one flag per field num
vectorsStream.writeVInt(0);
final PackedInts.Writer writer =
PackedInts.getWriterNoHeader(
vectorsStream, PackedInts.Format.PACKED, fieldFlags.length, FLAGS_BITS, 1);
scratchBuffer.reset();
final DirectWriter writer =
DirectWriter.getInstance(scratchBuffer, fieldFlags.length, FLAGS_BITS);
for (int flags : fieldFlags) {
assert flags >= 0;
writer.add(flags);
}
assert writer.ord() == fieldFlags.length - 1;
writer.finish();
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
scratchBuffer.copyTo(vectorsStream);
} else {
// write one flag for every field instance
vectorsStream.writeVInt(1);
final PackedInts.Writer writer =
PackedInts.getWriterNoHeader(
vectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1);
scratchBuffer.reset();
final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, FLAGS_BITS);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
writer.add(fd.flags);
}
}
assert writer.ord() == totalFields - 1;
writer.finish();
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
scratchBuffer.copyTo(vectorsStream);
}
}
@ -549,18 +551,18 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
maxNumTerms |= fd.numTerms;
}
}
final int bitsRequired = PackedInts.bitsRequired(maxNumTerms);
final int bitsRequired = DirectWriter.bitsRequired(maxNumTerms);
vectorsStream.writeVInt(bitsRequired);
final PackedInts.Writer writer =
PackedInts.getWriterNoHeader(
vectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
scratchBuffer.reset();
final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, bitsRequired);
for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) {
writer.add(fd.numTerms);
}
}
assert writer.ord() == totalFields - 1;
writer.finish();
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
scratchBuffer.copyTo(vectorsStream);
}
private void flushTermLengths() throws IOException {
@ -954,7 +956,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
+ payloadLengthsBuf.length
+ termSuffixes.ramBytesUsed()
+ payloadBytes.ramBytesUsed()
+ lastTerm.bytes.length;
+ lastTerm.bytes.length
+ scratchBuffer.ramBytesUsed();
}
@Override