mirror of https://github.com/apache/lucene.git
LUCENE-9907: Remove packedInts#getReaderNoHeader dependency on TermsVectorFieldsFormat (#72)
This commit is contained in:
parent
d03662c48b
commit
873ac5f162
|
@ -30,6 +30,8 @@ import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingT
|
|||
import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_START;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
|
@ -50,15 +52,21 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ByteBuffersDataInput;
|
||||
import org.apache.lucene.store.ByteBuffersDataOutput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RandomAccessInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.lucene.util.LongsRef;
|
||||
import org.apache.lucene.util.packed.BlockPackedReaderIterator;
|
||||
import org.apache.lucene.util.packed.DirectReader;
|
||||
import org.apache.lucene.util.packed.DirectWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
|
@ -295,6 +303,13 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
|||
return new Lucene90CompressingTermVectorsReader(this);
|
||||
}
|
||||
|
||||
private static RandomAccessInput slice(IndexInput in) throws IOException {
|
||||
final int length = in.readVInt();
|
||||
final byte[] bytes = new byte[length];
|
||||
in.readBytes(bytes, 0, length);
|
||||
return new ByteBuffersDataInput(Collections.singletonList(ByteBuffer.wrap(bytes)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields get(int doc) throws IOException {
|
||||
ensureOpen();
|
||||
|
@ -368,38 +383,25 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
|||
|
||||
// read field numbers and flags
|
||||
final int[] fieldNumOffs = new int[numFields];
|
||||
final PackedInts.Reader flags;
|
||||
final LongValues flags;
|
||||
{
|
||||
final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1);
|
||||
final PackedInts.Reader allFieldNumOffs =
|
||||
PackedInts.getReaderNoHeader(
|
||||
vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
|
||||
final int bitsPerOff = DirectWriter.bitsRequired(fieldNums.length - 1);
|
||||
final LongValues allFieldNumOffs = DirectReader.getInstance(slice(vectorsStream), bitsPerOff);
|
||||
switch (vectorsStream.readVInt()) {
|
||||
case 0:
|
||||
final PackedInts.Reader fieldFlags =
|
||||
PackedInts.getReaderNoHeader(
|
||||
vectorsStream,
|
||||
PackedInts.Format.PACKED,
|
||||
packedIntsVersion,
|
||||
fieldNums.length,
|
||||
FLAGS_BITS);
|
||||
PackedInts.Mutable f = PackedInts.getMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT);
|
||||
final LongValues fieldFlags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
|
||||
final ByteBuffersDataOutput out = new ByteBuffersDataOutput();
|
||||
final DirectWriter writer = DirectWriter.getInstance(out, totalFields, FLAGS_BITS);
|
||||
for (int i = 0; i < totalFields; ++i) {
|
||||
final int fieldNumOff = (int) allFieldNumOffs.get(i);
|
||||
assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length;
|
||||
final int fgs = (int) fieldFlags.get(fieldNumOff);
|
||||
f.set(i, fgs);
|
||||
writer.add(fieldFlags.get(fieldNumOff));
|
||||
}
|
||||
flags = f;
|
||||
writer.finish();
|
||||
flags = DirectReader.getInstance(out.toDataInput(), FLAGS_BITS);
|
||||
break;
|
||||
case 1:
|
||||
flags =
|
||||
PackedInts.getReaderNoHeader(
|
||||
vectorsStream,
|
||||
PackedInts.Format.PACKED,
|
||||
packedIntsVersion,
|
||||
totalFields,
|
||||
FLAGS_BITS);
|
||||
flags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
|
@ -410,17 +412,11 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
|||
}
|
||||
|
||||
// number of terms per field for all fields
|
||||
final PackedInts.Reader numTerms;
|
||||
final LongValues numTerms;
|
||||
final int totalTerms;
|
||||
{
|
||||
final int bitsRequired = vectorsStream.readVInt();
|
||||
numTerms =
|
||||
PackedInts.getReaderNoHeader(
|
||||
vectorsStream,
|
||||
PackedInts.Format.PACKED,
|
||||
packedIntsVersion,
|
||||
totalFields,
|
||||
bitsRequired);
|
||||
numTerms = DirectReader.getInstance(slice(vectorsStream), bitsRequired);
|
||||
int sum = 0;
|
||||
for (int i = 0; i < totalFields; ++i) {
|
||||
sum += numTerms.get(i);
|
||||
|
@ -711,8 +707,7 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
|||
}
|
||||
|
||||
// field -> term index -> position index
|
||||
private int[][] positionIndex(
|
||||
int skip, int numFields, PackedInts.Reader numTerms, int[] termFreqs) {
|
||||
private int[][] positionIndex(int skip, int numFields, LongValues numTerms, int[] termFreqs) {
|
||||
final int[][] positionIndex = new int[numFields][];
|
||||
int termIndex = 0;
|
||||
for (int i = 0; i < skip; ++i) {
|
||||
|
@ -734,8 +729,8 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
|||
private int[][] readPositions(
|
||||
int skip,
|
||||
int numFields,
|
||||
PackedInts.Reader flags,
|
||||
PackedInts.Reader numTerms,
|
||||
LongValues flags,
|
||||
LongValues numTerms,
|
||||
int[] termFreqs,
|
||||
int flag,
|
||||
final int totalPositions,
|
||||
|
|
|
@ -51,6 +51,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.packed.BlockPackedWriter;
|
||||
import org.apache.lucene.util.packed.DirectWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
|
@ -74,7 +75,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
|||
static final int POSITIONS = 0x01;
|
||||
static final int OFFSETS = 0x02;
|
||||
static final int PAYLOADS = 0x04;
|
||||
static final int FLAGS_BITS = PackedInts.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
|
||||
static final int FLAGS_BITS = DirectWriter.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
|
||||
|
||||
private final String segment;
|
||||
private FieldsIndexWriter indexWriter;
|
||||
|
@ -223,6 +224,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
|||
private final ByteBuffersDataOutput payloadBytes; // buffered term payloads
|
||||
private final BlockPackedWriter writer;
|
||||
private final int maxDocsPerChunk; // hard limit on number of docs per chunk
|
||||
private final ByteBuffersDataOutput scratchBuffer = ByteBuffersDataOutput.newResettableInstance();
|
||||
|
||||
/** Sole constructor. */
|
||||
Lucene90CompressingTermVectorsWriter(
|
||||
|
@ -478,13 +480,10 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
|||
}
|
||||
|
||||
private void flushFields(int totalFields, int[] fieldNums) throws IOException {
|
||||
final PackedInts.Writer writer =
|
||||
PackedInts.getWriterNoHeader(
|
||||
vectorsStream,
|
||||
PackedInts.Format.PACKED,
|
||||
totalFields,
|
||||
PackedInts.bitsRequired(fieldNums.length - 1),
|
||||
1);
|
||||
scratchBuffer.reset();
|
||||
final DirectWriter writer =
|
||||
DirectWriter.getInstance(
|
||||
scratchBuffer, totalFields, DirectWriter.bitsRequired(fieldNums.length - 1));
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum);
|
||||
|
@ -493,6 +492,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
|||
}
|
||||
}
|
||||
writer.finish();
|
||||
vectorsStream.writeVLong(scratchBuffer.size());
|
||||
scratchBuffer.copyTo(vectorsStream);
|
||||
}
|
||||
|
||||
private void flushFlags(int totalFields, int[] fieldNums) throws IOException {
|
||||
|
@ -517,28 +518,29 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
|||
if (nonChangingFlags) {
|
||||
// write one flag per field num
|
||||
vectorsStream.writeVInt(0);
|
||||
final PackedInts.Writer writer =
|
||||
PackedInts.getWriterNoHeader(
|
||||
vectorsStream, PackedInts.Format.PACKED, fieldFlags.length, FLAGS_BITS, 1);
|
||||
scratchBuffer.reset();
|
||||
final DirectWriter writer =
|
||||
DirectWriter.getInstance(scratchBuffer, fieldFlags.length, FLAGS_BITS);
|
||||
for (int flags : fieldFlags) {
|
||||
assert flags >= 0;
|
||||
writer.add(flags);
|
||||
}
|
||||
assert writer.ord() == fieldFlags.length - 1;
|
||||
writer.finish();
|
||||
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
|
||||
scratchBuffer.copyTo(vectorsStream);
|
||||
} else {
|
||||
// write one flag for every field instance
|
||||
vectorsStream.writeVInt(1);
|
||||
final PackedInts.Writer writer =
|
||||
PackedInts.getWriterNoHeader(
|
||||
vectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1);
|
||||
scratchBuffer.reset();
|
||||
final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, FLAGS_BITS);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
writer.add(fd.flags);
|
||||
}
|
||||
}
|
||||
assert writer.ord() == totalFields - 1;
|
||||
writer.finish();
|
||||
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
|
||||
scratchBuffer.copyTo(vectorsStream);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -549,18 +551,18 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
|||
maxNumTerms |= fd.numTerms;
|
||||
}
|
||||
}
|
||||
final int bitsRequired = PackedInts.bitsRequired(maxNumTerms);
|
||||
final int bitsRequired = DirectWriter.bitsRequired(maxNumTerms);
|
||||
vectorsStream.writeVInt(bitsRequired);
|
||||
final PackedInts.Writer writer =
|
||||
PackedInts.getWriterNoHeader(
|
||||
vectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
|
||||
scratchBuffer.reset();
|
||||
final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, bitsRequired);
|
||||
for (DocData dd : pendingDocs) {
|
||||
for (FieldData fd : dd.fields) {
|
||||
writer.add(fd.numTerms);
|
||||
}
|
||||
}
|
||||
assert writer.ord() == totalFields - 1;
|
||||
writer.finish();
|
||||
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
|
||||
scratchBuffer.copyTo(vectorsStream);
|
||||
}
|
||||
|
||||
private void flushTermLengths() throws IOException {
|
||||
|
@ -954,7 +956,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
|||
+ payloadLengthsBuf.length
|
||||
+ termSuffixes.ramBytesUsed()
|
||||
+ payloadBytes.ramBytesUsed()
|
||||
+ lastTerm.bytes.length;
|
||||
+ lastTerm.bytes.length
|
||||
+ scratchBuffer.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue