diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java index 6e5e0d3bdca..ec8823d70de 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java @@ -30,6 +30,8 @@ import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingT import static org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingTermVectorsWriter.VERSION_START; import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; import java.util.Iterator; import java.util.NoSuchElementException; import org.apache.lucene.codecs.CodecUtil; @@ -50,15 +52,21 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.ByteBuffersDataInput; +import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LongsRef; import org.apache.lucene.util.packed.BlockPackedReaderIterator; +import org.apache.lucene.util.packed.DirectReader; +import org.apache.lucene.util.packed.DirectWriter; import org.apache.lucene.util.packed.PackedInts; /** @@ -295,6 +303,13 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade return new Lucene90CompressingTermVectorsReader(this); } + private static RandomAccessInput slice(IndexInput in) throws IOException { + final int length = in.readVInt(); + final byte[] bytes = new byte[length]; + in.readBytes(bytes, 0, length); + return new ByteBuffersDataInput(Collections.singletonList(ByteBuffer.wrap(bytes))); + } + @Override public Fields get(int doc) throws IOException { ensureOpen(); @@ -368,38 +383,25 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade // read field numbers and flags final int[] fieldNumOffs = new int[numFields]; - final PackedInts.Reader flags; + final LongValues flags; { - final int bitsPerOff = PackedInts.bitsRequired(fieldNums.length - 1); - final PackedInts.Reader allFieldNumOffs = - PackedInts.getReaderNoHeader( - vectorsStream, PackedInts.Format.PACKED, packedIntsVersion, totalFields, bitsPerOff); + final int bitsPerOff = DirectWriter.bitsRequired(fieldNums.length - 1); + final LongValues allFieldNumOffs = DirectReader.getInstance(slice(vectorsStream), bitsPerOff); switch (vectorsStream.readVInt()) { case 0: - final PackedInts.Reader fieldFlags = - PackedInts.getReaderNoHeader( - vectorsStream, - PackedInts.Format.PACKED, - packedIntsVersion, - fieldNums.length, - FLAGS_BITS); - PackedInts.Mutable f = PackedInts.getMutable(totalFields, FLAGS_BITS, PackedInts.COMPACT); + final LongValues fieldFlags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS); + final ByteBuffersDataOutput out = new ByteBuffersDataOutput(); + final DirectWriter writer = DirectWriter.getInstance(out, totalFields, FLAGS_BITS); for (int i = 0; i < totalFields; ++i) { final int fieldNumOff = (int) allFieldNumOffs.get(i); assert fieldNumOff >= 0 && fieldNumOff < fieldNums.length; - final int fgs = (int) fieldFlags.get(fieldNumOff); - f.set(i, fgs); + writer.add(fieldFlags.get(fieldNumOff)); } - flags = f; + writer.finish(); + flags = DirectReader.getInstance(out.toDataInput(), FLAGS_BITS); break; case 1: - flags = - PackedInts.getReaderNoHeader( - vectorsStream, - PackedInts.Format.PACKED, - packedIntsVersion, - totalFields, - FLAGS_BITS); + flags = DirectReader.getInstance(slice(vectorsStream), FLAGS_BITS); break; default: throw new AssertionError(); @@ -410,17 +412,11 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade } // number of terms per field for all fields - final PackedInts.Reader numTerms; + final LongValues numTerms; final int totalTerms; { final int bitsRequired = vectorsStream.readVInt(); - numTerms = - PackedInts.getReaderNoHeader( - vectorsStream, - PackedInts.Format.PACKED, - packedIntsVersion, - totalFields, - bitsRequired); + numTerms = DirectReader.getInstance(slice(vectorsStream), bitsRequired); int sum = 0; for (int i = 0; i < totalFields; ++i) { sum += numTerms.get(i); @@ -711,8 +707,7 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade } // field -> term index -> position index - private int[][] positionIndex( - int skip, int numFields, PackedInts.Reader numTerms, int[] termFreqs) { + private int[][] positionIndex(int skip, int numFields, LongValues numTerms, int[] termFreqs) { final int[][] positionIndex = new int[numFields][]; int termIndex = 0; for (int i = 0; i < skip; ++i) { @@ -734,8 +729,8 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade private int[][] readPositions( int skip, int numFields, - PackedInts.Reader flags, - PackedInts.Reader numTerms, + LongValues flags, + LongValues numTerms, int[] termFreqs, int flag, final int totalPositions, diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java index 65b34429f7f..ed54ce736a3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java @@ -51,6 +51,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.packed.BlockPackedWriter; +import org.apache.lucene.util.packed.DirectWriter; import org.apache.lucene.util.packed.PackedInts; /** @@ -74,7 +75,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite static final int POSITIONS = 0x01; static final int OFFSETS = 0x02; static final int PAYLOADS = 0x04; - static final int FLAGS_BITS = PackedInts.bitsRequired(POSITIONS | OFFSETS | PAYLOADS); + static final int FLAGS_BITS = DirectWriter.bitsRequired(POSITIONS | OFFSETS | PAYLOADS); private final String segment; private FieldsIndexWriter indexWriter; @@ -223,6 +224,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite private final ByteBuffersDataOutput payloadBytes; // buffered term payloads private final BlockPackedWriter writer; private final int maxDocsPerChunk; // hard limit on number of docs per chunk + private final ByteBuffersDataOutput scratchBuffer = ByteBuffersDataOutput.newResettableInstance(); /** Sole constructor. */ Lucene90CompressingTermVectorsWriter( @@ -478,13 +480,10 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite } private void flushFields(int totalFields, int[] fieldNums) throws IOException { - final PackedInts.Writer writer = - PackedInts.getWriterNoHeader( - vectorsStream, - PackedInts.Format.PACKED, - totalFields, - PackedInts.bitsRequired(fieldNums.length - 1), - 1); + scratchBuffer.reset(); + final DirectWriter writer = + DirectWriter.getInstance( + scratchBuffer, totalFields, DirectWriter.bitsRequired(fieldNums.length - 1)); for (DocData dd : pendingDocs) { for (FieldData fd : dd.fields) { final int fieldNumIndex = Arrays.binarySearch(fieldNums, fd.fieldNum); @@ -493,6 +492,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite } } writer.finish(); + vectorsStream.writeVLong(scratchBuffer.size()); + scratchBuffer.copyTo(vectorsStream); } private void flushFlags(int totalFields, int[] fieldNums) throws IOException { @@ -517,28 +518,29 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite if (nonChangingFlags) { // write one flag per field num vectorsStream.writeVInt(0); - final PackedInts.Writer writer = - PackedInts.getWriterNoHeader( - vectorsStream, PackedInts.Format.PACKED, fieldFlags.length, FLAGS_BITS, 1); + scratchBuffer.reset(); + final DirectWriter writer = + DirectWriter.getInstance(scratchBuffer, fieldFlags.length, FLAGS_BITS); for (int flags : fieldFlags) { assert flags >= 0; writer.add(flags); } - assert writer.ord() == fieldFlags.length - 1; writer.finish(); + vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size())); + scratchBuffer.copyTo(vectorsStream); } else { // write one flag for every field instance vectorsStream.writeVInt(1); - final PackedInts.Writer writer = - PackedInts.getWriterNoHeader( - vectorsStream, PackedInts.Format.PACKED, totalFields, FLAGS_BITS, 1); + scratchBuffer.reset(); + final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, FLAGS_BITS); for (DocData dd : pendingDocs) { for (FieldData fd : dd.fields) { writer.add(fd.flags); } } - assert writer.ord() == totalFields - 1; writer.finish(); + vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size())); + scratchBuffer.copyTo(vectorsStream); } } @@ -549,18 +551,18 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite maxNumTerms |= fd.numTerms; } } - final int bitsRequired = PackedInts.bitsRequired(maxNumTerms); + final int bitsRequired = DirectWriter.bitsRequired(maxNumTerms); vectorsStream.writeVInt(bitsRequired); - final PackedInts.Writer writer = - PackedInts.getWriterNoHeader( - vectorsStream, PackedInts.Format.PACKED, totalFields, bitsRequired, 1); + scratchBuffer.reset(); + final DirectWriter writer = DirectWriter.getInstance(scratchBuffer, totalFields, bitsRequired); for (DocData dd : pendingDocs) { for (FieldData fd : dd.fields) { writer.add(fd.numTerms); } } - assert writer.ord() == totalFields - 1; writer.finish(); + vectorsStream.writeVInt(Math.toIntExact(scratchBuffer.size())); + scratchBuffer.copyTo(vectorsStream); } private void flushTermLengths() throws IOException { @@ -954,7 +956,8 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite + payloadLengthsBuf.length + termSuffixes.ramBytesUsed() + payloadBytes.ramBytesUsed() - + lastTerm.bytes.length; + + lastTerm.bytes.length + + scratchBuffer.ramBytesUsed(); } @Override