mirror of https://github.com/apache/lucene.git
LUCENE-9907: Remove packedInts#getReaderNoHeader dependency on TermsVectorFieldsFormat (#72)
This commit is contained in:
parent
d03662c48b
commit
873ac5f162
|
@ -30,6 +30,8 @@ import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingT
|
||||||
import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_START;
|
import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_START;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
|
@ -50,15 +52,21 @@ import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
|
import org.apache.lucene.store.ByteBuffersDataInput;
|
||||||
|
import org.apache.lucene.store.ByteBuffersDataOutput;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.RandomAccessInput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.LongValues;
|
||||||
import org.apache.lucene.util.LongsRef;
|
import org.apache.lucene.util.LongsRef;
|
||||||
import org.apache.lucene.util.packed.BlockPackedReaderIterator;
|
import org.apache.lucene.util.packed.BlockPackedReaderIterator;
|
||||||
|
import org.apache.lucene.util.packed.DirectReader;
|
||||||
|
import org.apache.lucene.util.packed.DirectWriter;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -295,6 +303,13 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
||||||
return new Lucene90CompressingTermVectorsReader(this);
|
return new Lucene90CompressingTermVectorsReader(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static RandomAccessInput slice(IndexInput in) throws IOException {
|
||||||
|
final int length = in.readVInt();
|
||||||
|
final byte[] bytes = new byte[length];
|
||||||
|
in.readBytes(bytes, 0, length);
|
||||||
|
return new ByteBuffersDataInput(Collections.singletonList(ByteBuffer.wrap(bytes)));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Fields get(int doc) throws IOException {
|
public Fields get(int doc) throws IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
|
@ -368,38 +383,25 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
||||||
|
|
||||||
// read field numbers and flags
|
// read field numbers and flags
|
||||||
final int[] fieldNumOffs = new int[numFields];
|
final int[] fieldNumOffs = new int[numFields];
|
||||||
final PackedInts.Reader flags;
|
final LongValues flags;
|
||||||
{
|
{
|
||||||
final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1);
|
final int bitsPerOff = DirectWriter.bitsRequired(fieldNums.length - 1);
|
||||||
final PackedInts.Reader allFieldNumOffs =
|
final LongValues allFieldNumOffs = DirectReader.getInstance(slice(vectorsStream), bitsPerOff);
|
||||||
PackedInts.getReaderNoHeader(
|
|
||||||
vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
|
|
||||||
switch (vectorsStream.readVInt()) {
|
switch (vectorsStream.readVInt()) {
|
||||||
case 0:
|
case 0:
|
||||||
final PackedInts.Reader fieldFlags =
|
final LongValues fieldFlags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
|
||||||
PackedInts.getReaderNoHeader(
|
final ByteBuffersDataOutput out = new ByteBuffersDataOutput();
|
||||||
vectorsStream,
|
final DirectWriter writer = DirectWriter.getInstance(out, totalFields, FLAGS_BITS);
|
||||||
PackedInts.Format.PACKED,
|
|
||||||
packedIntsVersion,
|
|
||||||
fieldNums.length,
|
|
||||||
FLAGS_BITS);
|
|
||||||
PackedInts.Mutable f = PackedInts.getMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT);
|
|
||||||
for (int i = 0; i < totalFields; ++i) {
|
for (int i = 0; i < totalFields; ++i) {
|
||||||
final int fieldNumOff = (int) allFieldNumOffs.get(i);
|
final int fieldNumOff = (int) allFieldNumOffs.get(i);
|
||||||
assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length;
|
assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length;
|
||||||
final int fgs = (int) fieldFlags.get(fieldNumOff);
|
writer.add(fieldFlags.get(fieldNumOff));
|
||||||
f.set(i, fgs);
|
|
||||||
}
|
}
|
||||||
flags = f;
|
writer.finish();
|
||||||
|
flags = DirectReader.getInstance(out.toDataInput(), FLAGS_BITS);
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
flags =
|
flags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
|
||||||
PackedInts.getReaderNoHeader(
|
|
||||||
vectorsStream,
|
|
||||||
PackedInts.Format.PACKED,
|
|
||||||
packedIntsVersion,
|
|
||||||
totalFields,
|
|
||||||
FLAGS_BITS);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new AssertionError();
|
throw new AssertionError();
|
||||||
|
@ -410,17 +412,11 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
||||||
}
|
}
|
||||||
|
|
||||||
// number of terms per field for all fields
|
// number of terms per field for all fields
|
||||||
final PackedInts.Reader numTerms;
|
final LongValues numTerms;
|
||||||
final int totalTerms;
|
final int totalTerms;
|
||||||
{
|
{
|
||||||
final int bitsRequired = vectorsStream.readVInt();
|
final int bitsRequired = vectorsStream.readVInt();
|
||||||
numTerms =
|
numTerms = DirectReader.getInstance(slice(vectorsStream), bitsRequired);
|
||||||
PackedInts.getReaderNoHeader(
|
|
||||||
vectorsStream,
|
|
||||||
PackedInts.Format.PACKED,
|
|
||||||
packedIntsVersion,
|
|
||||||
totalFields,
|
|
||||||
bitsRequired);
|
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
for (int i = 0; i < totalFields; ++i) {
|
for (int i = 0; i < totalFields; ++i) {
|
||||||
sum += numTerms.get(i);
|
sum += numTerms.get(i);
|
||||||
|
@ -711,8 +707,7 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
||||||
}
|
}
|
||||||
|
|
||||||
// field -> term index -> position index
|
// field -> term index -> position index
|
||||||
private int[][] positionIndex(
|
private int[][] positionIndex(int skip, int numFields, LongValues numTerms, int[] termFreqs) {
|
||||||
int skip, int numFields, PackedInts.Reader numTerms, int[] termFreqs) {
|
|
||||||
final int[][] positionIndex = new int[numFields][];
|
final int[][] positionIndex = new int[numFields][];
|
||||||
int termIndex = 0;
|
int termIndex = 0;
|
||||||
for (int i = 0; i < skip; ++i) {
|
for (int i = 0; i < skip; ++i) {
|
||||||
|
@ -734,8 +729,8 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
||||||
private int[][] readPositions(
|
private int[][] readPositions(
|
||||||
int skip,
|
int skip,
|
||||||
int numFields,
|
int numFields,
|
||||||
PackedInts.Reader flags,
|
LongValues flags,
|
||||||
PackedInts.Reader numTerms,
|
LongValues numTerms,
|
||||||
int[] termFreqs,
|
int[] termFreqs,
|
||||||
int flag,
|
int flag,
|
||||||
final int totalPositions,
|
final int totalPositions,
|
||||||
|
|
|
@ -51,6 +51,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.lucene.util.packed.BlockPackedWriter;
|
import org.apache.lucene.util.packed.BlockPackedWriter;
|
||||||
|
import org.apache.lucene.util.packed.DirectWriter;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -74,7 +75,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
||||||
static final int POSITIONS = 0x01;
|
static final int POSITIONS = 0x01;
|
||||||
static final int OFFSETS = 0x02;
|
static final int OFFSETS = 0x02;
|
||||||
static final int PAYLOADS = 0x04;
|
static final int PAYLOADS = 0x04;
|
||||||
static final int FLAGS_BITS = PackedInts.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
|
static final int FLAGS_BITS = DirectWriter.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
|
||||||
|
|
||||||
private final String segment;
|
private final String segment;
|
||||||
private FieldsIndexWriter indexWriter;
|
private FieldsIndexWriter indexWriter;
|
||||||
|
@ -223,6 +224,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
||||||
private final ByteBuffersDataOutput payloadBytes; // buffered term payloads
|
private final ByteBuffersDataOutput payloadBytes; // buffered term payloads
|
||||||
private final BlockPackedWriter writer;
|
private final BlockPackedWriter writer;
|
||||||
private final int maxDocsPerChunk; // hard limit on number of docs per chunk
|
private final int maxDocsPerChunk; // hard limit on number of docs per chunk
|
||||||
|
private final ByteBuffersDataOutput scratchBuffer = ByteBuffersDataOutput.newResettableInstance();
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
Lucene90CompressingTermVectorsWriter(
|
Lucene90CompressingTermVectorsWriter(
|
||||||
|
@ -478,13 +480,10 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
||||||
}
|
}
|
||||||
|
|
||||||
private void flushFields(int totalFields, int[] fieldNums) throws IOException {
|
private void flushFields(int totalFields, int[] fieldNums) throws IOException {
|
||||||
final PackedInts.Writer writer =
|
scratchBuffer.reset();
|
||||||
PackedInts.getWriterNoHeader(
|
final DirectWriter writer =
|
||||||
vectorsStream,
|
DirectWriter.getInstance(
|
||||||
PackedInts.Format.PACKED,
|
scratchBuffer, totalFields, DirectWriter.bitsRequired(fieldNums.length - 1));
|
||||||
totalFields,
|
|
||||||
PackedInts.bitsRequired(fieldNums.length - 1),
|
|
||||||
1);
|
|
||||||
for (DocData dd : pendingDocs) {
|
for (DocData dd : pendingDocs) {
|
||||||
for (FieldData fd : dd.fields) {
|
for (FieldData fd : dd.fields) {
|
||||||
final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum);
|
final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum);
|
||||||
|
@ -493,6 +492,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
writer.finish();
|
writer.finish();
|
||||||
|
vectorsStream.writeVLong(scratchBuffer.size());
|
||||||
|
scratchBuffer.copyTo(vectorsStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void flushFlags(int totalFields, int[] fieldNums) throws IOException {
|
private void flushFlags(int totalFields, int[] fieldNums) throws IOException {
|
||||||
|
@ -517,28 +518,29 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
||||||
if (nonChangingFlags) {
|
if (nonChangingFlags) {
|
||||||
// write one flag per field num
|
// write one flag per field num
|
||||||
vectorsStream.writeVInt(0);
|
vectorsStream.writeVInt(0);
|
||||||
final PackedInts.Writer writer =
|
scratchBuffer.reset();
|
||||||
PackedInts.getWriterNoHeader(
|
final DirectWriter writer =
|
||||||
vectorsStream, PackedInts.Format.PACKED, fieldFlags.length, FLAGS_BITS, 1);
|
DirectWriter.getInstance(scratchBuffer, fieldFlags.length, FLAGS_BITS);
|
||||||
for (int flags : fieldFlags) {
|
for (int flags : fieldFlags) {
|
||||||
assert flags >= 0;
|
assert flags >= 0;
|
||||||
writer.add(flags);
|
writer.add(flags);
|
||||||
}
|
}
|
||||||
assert writer.ord() == fieldFlags.length - 1;
|
|
||||||
writer.finish();
|
writer.finish();
|
||||||
|
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
|
||||||
|
scratchBuffer.copyTo(vectorsStream);
|
||||||
} else {
|
} else {
|
||||||
// write one flag for every field instance
|
// write one flag for every field instance
|
||||||
vectorsStream.writeVInt(1);
|
vectorsStream.writeVInt(1);
|
||||||
final PackedInts.Writer writer =
|
scratchBuffer.reset();
|
||||||
PackedInts.getWriterNoHeader(
|
final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, FLAGS_BITS);
|
||||||
vectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1);
|
|
||||||
for (DocData dd : pendingDocs) {
|
for (DocData dd : pendingDocs) {
|
||||||
for (FieldData fd : dd.fields) {
|
for (FieldData fd : dd.fields) {
|
||||||
writer.add(fd.flags);
|
writer.add(fd.flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert writer.ord() == totalFields - 1;
|
|
||||||
writer.finish();
|
writer.finish();
|
||||||
|
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
|
||||||
|
scratchBuffer.copyTo(vectorsStream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -549,18 +551,18 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
||||||
maxNumTerms |= fd.numTerms;
|
maxNumTerms |= fd.numTerms;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final int bitsRequired = PackedInts.bitsRequired(maxNumTerms);
|
final int bitsRequired = DirectWriter.bitsRequired(maxNumTerms);
|
||||||
vectorsStream.writeVInt(bitsRequired);
|
vectorsStream.writeVInt(bitsRequired);
|
||||||
final PackedInts.Writer writer =
|
scratchBuffer.reset();
|
||||||
PackedInts.getWriterNoHeader(
|
final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, bitsRequired);
|
||||||
vectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
|
|
||||||
for (DocData dd : pendingDocs) {
|
for (DocData dd : pendingDocs) {
|
||||||
for (FieldData fd : dd.fields) {
|
for (FieldData fd : dd.fields) {
|
||||||
writer.add(fd.numTerms);
|
writer.add(fd.numTerms);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert writer.ord() == totalFields - 1;
|
|
||||||
writer.finish();
|
writer.finish();
|
||||||
|
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
|
||||||
|
scratchBuffer.copyTo(vectorsStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void flushTermLengths() throws IOException {
|
private void flushTermLengths() throws IOException {
|
||||||
|
@ -954,7 +956,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
|
||||||
+ payloadLengthsBuf.length
|
+ payloadLengthsBuf.length
|
||||||
+ termSuffixes.ramBytesUsed()
|
+ termSuffixes.ramBytesUsed()
|
||||||
+ payloadBytes.ramBytesUsed()
|
+ payloadBytes.ramBytesUsed()
|
||||||
+ lastTerm.bytes.length;
|
+ lastTerm.bytes.length
|
||||||
|
+ scratchBuffer.ramBytesUsed();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue