LUCENE-9907: Remove packedInts#getReaderNoHeader dependency on TermsVectorFieldsFormat (#72)

This commit is contained in:
Ignacio Vera 2021-04-15 16:04:13 +02:00 committed by GitHub
parent d03662c48b
commit 873ac5f162
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 55 additions and 57 deletions

View File

@ -30,6 +30,8 @@ import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingT
import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_START; import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_START;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.Iterator; import java.util.Iterator;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
@ -50,15 +52,21 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteBuffersDataInput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.packed.BlockPackedReaderIterator; import org.apache.lucene.util.packed.BlockPackedReaderIterator;
import org.apache.lucene.util.packed.DirectReader;
import org.apache.lucene.util.packed.DirectWriter;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
/** /**
@ -295,6 +303,13 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
return new Lucene90CompressingTermVectorsReader(this); return new Lucene90CompressingTermVectorsReader(this);
} }
private static RandomAccessInput slice(IndexInput in) throws IOException {
final int length = in.readVInt();
final byte[] bytes = new byte[length];
in.readBytes(bytes, 0, length);
return new ByteBuffersDataInput(Collections.singletonList(ByteBuffer.wrap(bytes)));
}
@Override @Override
public Fields get(int doc) throws IOException { public Fields get(int doc) throws IOException {
ensureOpen(); ensureOpen();
@ -368,38 +383,25 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
// read field numbers and flags // read field numbers and flags
final int[] fieldNumOffs = new int[numFields]; final int[] fieldNumOffs = new int[numFields];
final PackedInts.Reader flags; final LongValues flags;
{ {
final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1); final int bitsPerOff = DirectWriter.bitsRequired(fieldNums.length - 1);
final PackedInts.Reader allFieldNumOffs = final LongValues allFieldNumOffs = DirectReader.getInstance(slice(vectorsStream), bitsPerOff);
PackedInts.getReaderNoHeader(
vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff);
switch (vectorsStream.readVInt()) { switch (vectorsStream.readVInt()) {
case 0: case 0:
final PackedInts.Reader fieldFlags = final LongValues fieldFlags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
PackedInts.getReaderNoHeader( final ByteBuffersDataOutput out = new ByteBuffersDataOutput();
vectorsStream, final DirectWriter writer = DirectWriter.getInstance(out, totalFields, FLAGS_BITS);
PackedInts.Format.PACKED,
packedIntsVersion,
fieldNums.length,
FLAGS_BITS);
PackedInts.Mutable f = PackedInts.getMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT);
for (int i = 0; i < totalFields; ++i) { for (int i = 0; i < totalFields; ++i) {
final int fieldNumOff = (int) allFieldNumOffs.get(i); final int fieldNumOff = (int) allFieldNumOffs.get(i);
assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length; assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length;
final int fgs = (int) fieldFlags.get(fieldNumOff); writer.add(fieldFlags.get(fieldNumOff));
f.set(i, fgs);
} }
flags = f; writer.finish();
flags = DirectReader.getInstance(out.toDataInput(), FLAGS_BITS);
break; break;
case 1: case 1:
flags = flags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS);
PackedInts.getReaderNoHeader(
vectorsStream,
PackedInts.Format.PACKED,
packedIntsVersion,
totalFields,
FLAGS_BITS);
break; break;
default: default:
throw new AssertionError(); throw new AssertionError();
@ -410,17 +412,11 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
} }
// number of terms per field for all fields // number of terms per field for all fields
final PackedInts.Reader numTerms; final LongValues numTerms;
final int totalTerms; final int totalTerms;
{ {
final int bitsRequired = vectorsStream.readVInt(); final int bitsRequired = vectorsStream.readVInt();
numTerms = numTerms = DirectReader.getInstance(slice(vectorsStream), bitsRequired);
PackedInts.getReaderNoHeader(
vectorsStream,
PackedInts.Format.PACKED,
packedIntsVersion,
totalFields,
bitsRequired);
int sum = 0; int sum = 0;
for (int i = 0; i < totalFields; ++i) { for (int i = 0; i < totalFields; ++i) {
sum += numTerms.get(i); sum += numTerms.get(i);
@ -711,8 +707,7 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
} }
// field -> term index -> position index // field -> term index -> position index
private int[][] positionIndex( private int[][] positionIndex(int skip, int numFields, LongValues numTerms, int[] termFreqs) {
int skip, int numFields, PackedInts.Reader numTerms, int[] termFreqs) {
final int[][] positionIndex = new int[numFields][]; final int[][] positionIndex = new int[numFields][];
int termIndex = 0; int termIndex = 0;
for (int i = 0; i < skip; ++i) { for (int i = 0; i < skip; ++i) {
@ -734,8 +729,8 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
private int[][] readPositions( private int[][] readPositions(
int skip, int skip,
int numFields, int numFields,
PackedInts.Reader flags, LongValues flags,
PackedInts.Reader numTerms, LongValues numTerms,
int[] termFreqs, int[] termFreqs,
int flag, int flag,
final int totalPositions, final int totalPositions,

View File

@ -51,6 +51,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.BlockPackedWriter; import org.apache.lucene.util.packed.BlockPackedWriter;
import org.apache.lucene.util.packed.DirectWriter;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
/** /**
@ -74,7 +75,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
static final int POSITIONS = 0x01; static final int POSITIONS = 0x01;
static final int OFFSETS = 0x02; static final int OFFSETS = 0x02;
static final int PAYLOADS = 0x04; static final int PAYLOADS = 0x04;
static final int FLAGS_BITS = PackedInts.bitsRequired(POSITIONS | OFFSETS | PAYLOADS); static final int FLAGS_BITS = DirectWriter.bitsRequired(POSITIONS | OFFSETS | PAYLOADS);
private final String segment; private final String segment;
private FieldsIndexWriter indexWriter; private FieldsIndexWriter indexWriter;
@ -223,6 +224,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
private final ByteBuffersDataOutput payloadBytes; // buffered term payloads private final ByteBuffersDataOutput payloadBytes; // buffered term payloads
private final BlockPackedWriter writer; private final BlockPackedWriter writer;
private final int maxDocsPerChunk; // hard limit on number of docs per chunk private final int maxDocsPerChunk; // hard limit on number of docs per chunk
private final ByteBuffersDataOutput scratchBuffer = ByteBuffersDataOutput.newResettableInstance();
/** Sole constructor. */ /** Sole constructor. */
Lucene90CompressingTermVectorsWriter( Lucene90CompressingTermVectorsWriter(
@ -478,13 +480,10 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
} }
private void flushFields(int totalFields, int[] fieldNums) throws IOException { private void flushFields(int totalFields, int[] fieldNums) throws IOException {
final PackedInts.Writer writer = scratchBuffer.reset();
PackedInts.getWriterNoHeader( final DirectWriter writer =
vectorsStream, DirectWriter.getInstance(
PackedInts.Format.PACKED, scratchBuffer, totalFields, DirectWriter.bitsRequired(fieldNums.length - 1));
totalFields,
PackedInts.bitsRequired(fieldNums.length - 1),
1);
for (DocData dd : pendingDocs) { for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) { for (FieldData fd : dd.fields) {
final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum); final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum);
@ -493,6 +492,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
} }
} }
writer.finish(); writer.finish();
vectorsStream.writeVLong(scratchBuffer.size());
scratchBuffer.copyTo(vectorsStream);
} }
private void flushFlags(int totalFields, int[] fieldNums) throws IOException { private void flushFlags(int totalFields, int[] fieldNums) throws IOException {
@ -517,28 +518,29 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
if (nonChangingFlags) { if (nonChangingFlags) {
// write one flag per field num // write one flag per field num
vectorsStream.writeVInt(0); vectorsStream.writeVInt(0);
final PackedInts.Writer writer = scratchBuffer.reset();
PackedInts.getWriterNoHeader( final DirectWriter writer =
vectorsStream, PackedInts.Format.PACKED, fieldFlags.length, FLAGS_BITS, 1); DirectWriter.getInstance(scratchBuffer, fieldFlags.length, FLAGS_BITS);
for (int flags : fieldFlags) { for (int flags : fieldFlags) {
assert flags >= 0; assert flags >= 0;
writer.add(flags); writer.add(flags);
} }
assert writer.ord() == fieldFlags.length - 1;
writer.finish(); writer.finish();
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
scratchBuffer.copyTo(vectorsStream);
} else { } else {
// write one flag for every field instance // write one flag for every field instance
vectorsStream.writeVInt(1); vectorsStream.writeVInt(1);
final PackedInts.Writer writer = scratchBuffer.reset();
PackedInts.getWriterNoHeader( final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, FLAGS_BITS);
vectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1);
for (DocData dd : pendingDocs) { for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) { for (FieldData fd : dd.fields) {
writer.add(fd.flags); writer.add(fd.flags);
} }
} }
assert writer.ord() == totalFields - 1;
writer.finish(); writer.finish();
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
scratchBuffer.copyTo(vectorsStream);
} }
} }
@ -549,18 +551,18 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
maxNumTerms |= fd.numTerms; maxNumTerms |= fd.numTerms;
} }
} }
final int bitsRequired = PackedInts.bitsRequired(maxNumTerms); final int bitsRequired = DirectWriter.bitsRequired(maxNumTerms);
vectorsStream.writeVInt(bitsRequired); vectorsStream.writeVInt(bitsRequired);
final PackedInts.Writer writer = scratchBuffer.reset();
PackedInts.getWriterNoHeader( final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, bitsRequired);
vectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1);
for (DocData dd : pendingDocs) { for (DocData dd : pendingDocs) {
for (FieldData fd : dd.fields) { for (FieldData fd : dd.fields) {
writer.add(fd.numTerms); writer.add(fd.numTerms);
} }
} }
assert writer.ord() == totalFields - 1;
writer.finish(); writer.finish();
vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size()));
scratchBuffer.copyTo(vectorsStream);
} }
private void flushTermLengths() throws IOException { private void flushTermLengths() throws IOException {
@ -954,7 +956,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
+ payloadLengthsBuf.length + payloadLengthsBuf.length
+ termSuffixes.ramBytesUsed() + termSuffixes.ramBytesUsed()
+ payloadBytes.ramBytesUsed() + payloadBytes.ramBytesUsed()
+ lastTerm.bytes.length; + lastTerm.bytes.length
+ scratchBuffer.ramBytesUsed();
} }
@Override @Override