From fb3ca8d000d6e5203a57625942b754f1d5757fac Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 17 Jan 2020 13:39:45 +0100 Subject: [PATCH] LUCENE-9116: Remove long[] from `PostingsWriterBase#encodeTerm`. (#1149) (#1158) All the metadata can be directly encoded in the `DataOutput`. --- lucene/CHANGES.txt | 6 + .../lucene50/Lucene50PostingsReader.java | 8 +- .../lucene50/Lucene50PostingsWriter.java | 19 +- .../codecs/blockterms/BlockTermsReader.java | 14 +- .../codecs/blockterms/BlockTermsWriter.java | 20 +- .../OrdsBlockTreeTermsReader.java | 3 +- .../OrdsBlockTreeTermsWriter.java | 29 +- .../codecs/blocktreeords/OrdsFieldReader.java | 4 +- .../OrdsIntersectTermsEnumFrame.java | 12 +- .../OrdsSegmentTermsEnumFrame.java | 12 +- .../codecs/memory/FSTOrdPostingsFormat.java | 78 -- .../codecs/memory/FSTOrdTermsReader.java | 884 ------------------ .../codecs/memory/FSTOrdTermsWriter.java | 386 -------- .../lucene/codecs/memory/FSTTermOutputs.java | 147 +-- .../lucene/codecs/memory/FSTTermsReader.java | 13 +- .../lucene/codecs/memory/FSTTermsWriter.java | 15 +- .../DeltaBaseTermStateSerializer.java | 4 +- .../org.apache.lucene.codecs.PostingsFormat | 1 - .../memory/TestFSTOrdPostingsFormat.java | 34 - .../uniformsplit/TestTermBytesComparator.java | 2 +- .../sharedterms/STBlockReaderTest.java | 2 +- .../lucene/codecs/PostingsReaderBase.java | 2 +- .../lucene/codecs/PostingsWriterBase.java | 15 +- .../lucene/codecs/PushPostingsWriterBase.java | 4 +- .../blocktree/BlockTreeTermsReader.java | 15 +- .../blocktree/BlockTreeTermsWriter.java | 30 +- .../lucene/codecs/blocktree/FieldReader.java | 4 +- .../blocktree/IntersectTermsEnumFrame.java | 13 +- .../blocktree/SegmentTermsEnumFrame.java | 12 +- .../lucene84/Lucene84PostingsReader.java | 8 +- .../lucene84/Lucene84PostingsWriter.java | 19 +- .../idversion/IDVersionPostingsReader.java | 2 +- .../idversion/IDVersionPostingsWriter.java | 7 +- .../IDVersionSegmentTermsEnumFrame.java | 12 +- .../VersionBlockTreeTermsReader.java | 3 +- .../VersionBlockTreeTermsWriter.java | 29 +- .../codecs/idversion/VersionFieldReader.java | 4 +- .../mockrandom/MockRandomPostingsFormat.java | 34 +- .../org/apache/lucene/index/RandomCodec.java | 2 - 39 files changed, 128 insertions(+), 1780 deletions(-) delete mode 100644 lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java delete mode 100644 lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java delete mode 100644 lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java delete mode 100644 lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTOrdPostingsFormat.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 59fea40d7a2..2f1c2e7babb 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -91,6 +91,12 @@ API Changes yield Passages sized a little different due to the fact that the sizing pivot is now the center of the first match and not its left edge. +* LUCENE-9116: PostingsWriterBase and PostingsReaderBase no longer support + setting a field's metadata via a `long[]`. (Adrien Grand) + +* LUCENE-9116: The FSTOrd postings format has been removed. + (Adrien Grand) + * LUCENE-8369: Remove obsolete spatial module. (Nick Knize, David Smiley) New Features diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java index 0ea8c802cd2..adae891c4ab 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsReader.java @@ -154,7 +154,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase { } @Override - public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) + public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) throws IOException { final IntBlockTermState termState = (IntBlockTermState) _termState; final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; @@ -167,11 +167,11 @@ public final class Lucene50PostingsReader extends PostingsReaderBase { termState.payStartFP = 0; } - termState.docStartFP += longs[0]; + termState.docStartFP += in.readVLong(); if (fieldHasPositions) { - termState.posStartFP += longs[1]; + termState.posStartFP += in.readVLong(); if (fieldHasOffsets || fieldHasPayloads) { - termState.payStartFP += longs[2]; + termState.payStartFP += in.readVLong(); } } if (termState.docFreq == 1) { diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java index a600e61fb32..8f425a2036c 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java @@ -187,20 +187,11 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase { } @Override - public int setField(FieldInfo fieldInfo) { + public void setField(FieldInfo fieldInfo) { super.setField(fieldInfo); skipWriter.setField(writePositions, writeOffsets, writePayloads); lastState = emptyState; fieldHasNorms = fieldInfo.hasNorms(); - if (writePositions) { - if (writePayloads || writeOffsets) { - return 3; // doc + pos + pay FP - } else { - return 2; // doc + pos FP - } - } else { - return 1; // doc FP - } } @Override @@ -463,16 +454,16 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase { } @Override - public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException { + public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException { IntBlockTermState state = (IntBlockTermState)_state; if (absolute) { lastState = emptyState; } - longs[0] = state.docStartFP - lastState.docStartFP; + out.writeVLong(state.docStartFP - lastState.docStartFP); if (writePositions) { - longs[1] = state.posStartFP - lastState.posStartFP; + out.writeVLong(state.posStartFP - lastState.posStartFP); if (writePayloads || writeOffsets) { - longs[2] = state.payStartFP - lastState.payStartFP; + out.writeVLong(state.payStartFP - lastState.payStartFP); } } if (state.singletonDocID != -1) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java index 964f616c6ff..480f5fde271 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java @@ -145,7 +145,6 @@ public class BlockTermsReader extends FieldsProducer { // when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong(); final int docCount = in.readVInt(); - final int longsSize = in.readVInt(); if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), in); } @@ -155,7 +154,7 @@ public class BlockTermsReader extends FieldsProducer { if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in); } - FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); + FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount)); if (previous != null) { throw new CorruptIndexException("duplicate fields: " + fieldInfo.name, in); } @@ -223,9 +222,8 @@ public class BlockTermsReader extends FieldsProducer { final long sumTotalTermFreq; final long sumDocFreq; final int docCount; - final int longsSize; - FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) { + FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) { assert numTerms > 0; this.fieldInfo = fieldInfo; this.numTerms = numTerms; @@ -233,7 +231,6 @@ public class BlockTermsReader extends FieldsProducer { this.sumTotalTermFreq = sumTotalTermFreq; this.sumDocFreq = sumDocFreq; this.docCount = docCount; - this.longsSize = longsSize; } @Override @@ -326,7 +323,6 @@ public class BlockTermsReader extends FieldsProducer { private final ByteArrayDataInput freqReader = new ByteArrayDataInput(); private int metaDataUpto; - private long[] longs; private byte[] bytes; private ByteArrayDataInput bytesReader; @@ -343,7 +339,6 @@ public class BlockTermsReader extends FieldsProducer { termSuffixes = new byte[128]; docFreqBytes = new byte[64]; //System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader); - longs = new long[longsSize]; } // TODO: we may want an alternate mode here which is @@ -826,10 +821,7 @@ public class BlockTermsReader extends FieldsProducer { //System.out.println(" totTF=" + state.totalTermFreq); } // metadata - for (int i = 0; i < longs.length; i++) { - longs[i] = bytesReader.readVLong(); - } - postingsReader.decodeTerm(longs, bytesReader, fieldInfo, state, absolute); + postingsReader.decodeTerm(bytesReader, fieldInfo, state, absolute); metaDataUpto++; absolute = false; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java index f620bd83d0f..e064aa1ecf2 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsWriter.java @@ -81,9 +81,8 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { public final long sumTotalTermFreq; public final long sumDocFreq; public final int docCount; - public final int longsSize; - public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) { + public FieldMetaData(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq, long sumDocFreq, int docCount) { assert numTerms > 0; this.fieldInfo = fieldInfo; this.termsStartPointer = termsStartPointer; @@ -91,7 +90,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { this.sumTotalTermFreq = sumTotalTermFreq; this.sumDocFreq = sumDocFreq; this.docCount = docCount; - this.longsSize = longsSize; } } @@ -176,7 +174,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { } out.writeVLong(field.sumDocFreq); out.writeVInt(field.docCount); - out.writeVInt(field.longsSize); } writeTrailer(dirStart); CodecUtil.writeFooter(out); @@ -206,7 +203,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { long sumTotalTermFreq; long sumDocFreq; int docCount; - int longsSize; private TermEntry[] pendingTerms; @@ -226,7 +222,7 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { } termsStartPointer = out.getFilePointer(); this.postingsWriter = postingsWriter; - this.longsSize = postingsWriter.setField(fieldInfo); + postingsWriter.setField(fieldInfo); } private final BytesRefBuilder lastPrevTerm = new BytesRefBuilder(); @@ -285,8 +281,7 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { termsStartPointer, fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 ? sumTotalTermFreq : -1, sumDocFreq, - docsSeen.cardinality(), - longsSize)); + docsSeen.cardinality())); } } @@ -307,7 +302,6 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { } private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance(); - private final ByteBuffersDataOutput bufferWriter = ByteBuffersDataOutput.newResettableInstance(); private void flushBlock() throws IOException { //System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer()); @@ -353,16 +347,10 @@ public class BlockTermsWriter extends FieldsConsumer implements Closeable { bytesWriter.reset(); // 4th pass: write the metadata - long[] longs = new long[longsSize]; boolean absolute = true; for(int termCount=0;termCount 0; this.fieldInfo = fieldInfo; @@ -159,7 +158,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { this.sumTotalTermFreq = sumTotalTermFreq; this.sumDocFreq = sumDocFreq; this.docCount = docCount; - this.longsSize = longsSize; this.minTerm = minTerm; this.maxTerm = maxTerm; } @@ -424,7 +422,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { class TermsWriter { private final FieldInfo fieldInfo; - private final int longsSize; private long numTerms; final FixedBitSet docsSeen; long sumTotalTermFreq; @@ -439,8 +436,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { private final BytesRefBuilder lastTerm = new BytesRefBuilder(); private int[] prefixStarts = new int[8]; - private final long[] longs; - // Pending stack of terms and blocks. As terms arrive (in sorted order) // we append to this stack, and once the top of the stack has enough // terms starting with a common prefix, we write a new block with @@ -633,13 +628,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { } // Write term meta data - postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute); - for (int pos = 0; pos < longsSize; pos++) { - assert longs[pos] >= 0; - metaWriter.writeVLong(longs[pos]); - } - bytesWriter.copyTo(metaWriter); - bytesWriter.reset(); + postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute); absolute = false; } totalTermCount = end-start; @@ -684,13 +673,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { // separate anymore: // Write term meta data - postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute); - for (int pos = 0; pos < longsSize; pos++) { - assert longs[pos] >= 0; - metaWriter.writeVLong(longs[pos]); - } - bytesWriter.copyTo(metaWriter); - bytesWriter.reset(); + postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute); absolute = false; totalTermCount++; @@ -763,8 +746,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { TermsWriter(FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; docsSeen = new FixedBitSet(maxDoc); - this.longsSize = postingsWriter.setField(fieldInfo); - this.longs = new long[longsSize]; + postingsWriter.setField(fieldInfo); } /** Writes one term's worth of postings. */ @@ -874,7 +856,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { sumTotalTermFreq, sumDocFreq, docsSeen.cardinality(), - longsSize, minTerm, maxTerm)); } else { assert docsSeen.cardinality() == 0; @@ -884,7 +865,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { private final ByteBuffersDataOutput suffixWriter = ByteBuffersDataOutput.newResettableInstance(); private final ByteBuffersDataOutput statsWriter = ByteBuffersDataOutput.newResettableInstance(); private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance(); - private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance(); } private boolean closed; @@ -916,7 +896,6 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer { } out.writeVLong(field.sumDocFreq); out.writeVInt(field.docCount); - out.writeVInt(field.longsSize); indexOut.writeVLong(field.indexStartFP); writeBytesRef(out, field.minTerm); writeBytesRef(out, field.maxTerm); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsFieldReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsFieldReader.java index 5d02258837d..54954e85d3d 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsFieldReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsFieldReader.java @@ -46,7 +46,6 @@ final class OrdsFieldReader extends Terms implements Accountable { final Output rootCode; final BytesRef minTerm; final BytesRef maxTerm; - final int longsSize; final OrdsBlockTreeTermsReader parent; final FST index; @@ -54,7 +53,7 @@ final class OrdsFieldReader extends Terms implements Accountable { OrdsFieldReader(OrdsBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, Output rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, - long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException { + long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException { assert numTerms > 0; this.fieldInfo = fieldInfo; //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id"); @@ -65,7 +64,6 @@ final class OrdsFieldReader extends Terms implements Accountable { this.docCount = docCount; this.indexStartFP = indexStartFP; this.rootCode = rootCode; - this.longsSize = longsSize; this.minTerm = minTerm; this.maxTerm = maxTerm; // if (DEBUG) { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java index a34f0fda1d0..ab7eab73427 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java @@ -84,9 +84,7 @@ final class OrdsIntersectTermsEnumFrame { final BlockTermState termState; - // metadata buffer, holding monotonic values - public long[] longs; - // metadata buffer, holding general values + // metadata public byte[] bytes; ByteArrayDataInput bytesReader; @@ -103,7 +101,6 @@ final class OrdsIntersectTermsEnumFrame { this.ord = ord; this.termState = ite.fr.parent.postingsReader.newTermState(); this.termState.totalTermFreq = -1; - this.longs = new long[ite.fr.longsSize]; } void loadNextFloorBlock() throws IOException { @@ -298,11 +295,8 @@ final class OrdsIntersectTermsEnumFrame { termState.totalTermFreq = termState.docFreq + statsReader.readVLong(); //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq); } - // metadata - for (int i = 0; i < ite.fr.longsSize; i++) { - longs[i] = bytesReader.readVLong(); - } - ite.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ite.fr.fieldInfo, termState, absolute); + // metadata + ite.fr.parent.postingsReader.decodeTerm(bytesReader, ite.fr.fieldInfo, termState, absolute); metaDataUpto++; absolute = false; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java index ee3782f29cd..240e781c7cc 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java @@ -97,9 +97,7 @@ final class OrdsSegmentTermsEnumFrame { final BlockTermState state; - // metadata buffer, holding monotonic values - public long[] longs; - // metadata buffer, holding general values + // metadata public byte[] bytes; ByteArrayDataInput bytesReader; @@ -110,7 +108,6 @@ final class OrdsSegmentTermsEnumFrame { this.ord = ord; this.state = ste.fr.parent.postingsReader.newTermState(); this.state.totalTermFreq = -1; - this.longs = new long[ste.fr.longsSize]; } public void setFloorData(ByteArrayDataInput in, BytesRef source) { @@ -507,11 +504,8 @@ final class OrdsSegmentTermsEnumFrame { } //if (DEBUG) System.out.println(" longsSize=" + ste.fr.longsSize); - // metadata - for (int i = 0; i < ste.fr.longsSize; i++) { - longs[i] = bytesReader.readVLong(); - } - ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute); + // metadata + ste.fr.parent.postingsReader.decodeTerm(bytesReader, ste.fr.fieldInfo, state, absolute); metaDataUpto++; absolute = false; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java deleted file mode 100644 index 0ce12178a90..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdPostingsFormat.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.codecs.memory; - - - -import java.io.IOException; - -import org.apache.lucene.codecs.FieldsConsumer; -import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.codecs.PostingsWriterBase; -import org.apache.lucene.codecs.lucene84.Lucene84PostingsReader; -import org.apache.lucene.codecs.lucene84.Lucene84PostingsWriter; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.util.IOUtils; - -/** - * FSTOrd term dict + Lucene50PBF - */ - -public final class FSTOrdPostingsFormat extends PostingsFormat { - public FSTOrdPostingsFormat() { - super("FSTOrd50"); - } - - @Override - public String toString() { - return getName(); - } - - @Override - public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { - PostingsWriterBase postingsWriter = new Lucene84PostingsWriter(state); - - boolean success = false; - try { - FieldsConsumer ret = new FSTOrdTermsWriter(state, postingsWriter); - success = true; - return ret; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(postingsWriter); - } - } - } - - @Override - public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { - PostingsReaderBase postingsReader = new Lucene84PostingsReader(state); - boolean success = false; - try { - FieldsProducer ret = new FSTOrdTermsReader(state, postingsReader); - success = true; - return ret; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(postingsReader); - } - } - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java deleted file mode 100644 index 7ecf19cc9f6..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java +++ /dev/null @@ -1,884 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.codecs.memory; - - -import java.io.IOException; -import java.util.ArrayList; -import java.util.BitSet; -import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.TreeMap; - -import org.apache.lucene.codecs.BlockTermState; -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.PostingsReaderBase; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.ImpactsEnum; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.TermState; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.ChecksumIndexInput; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.automaton.ByteRunAutomaton; -import org.apache.lucene.util.automaton.CompiledAutomaton; -import org.apache.lucene.util.fst.BytesRefFSTEnum; -import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput; -import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.Outputs; -import org.apache.lucene.util.fst.PositiveIntOutputs; -import org.apache.lucene.util.fst.Util; - -/** - * FST-based terms dictionary reader. - * - * The FST index maps each term and its ord, and during seek - * the ord is used to fetch metadata from a single block. - * The term dictionary is fully memory resident. - * - * @lucene.experimental - */ -public class FSTOrdTermsReader extends FieldsProducer { - static final int INTERVAL = FSTOrdTermsWriter.SKIP_INTERVAL; - final TreeMap fields = new TreeMap<>(); - final PostingsReaderBase postingsReader; - //static final boolean TEST = false; - - public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException { - final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION); - final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION); - - this.postingsReader = postingsReader; - ChecksumIndexInput indexIn = null; - IndexInput blockIn = null; - boolean success = false; - try { - indexIn = state.directory.openChecksumInput(termsIndexFileName, state.context); - blockIn = state.directory.openInput(termsBlockFileName, state.context); - int version = CodecUtil.checkIndexHeader(indexIn, FSTOrdTermsWriter.TERMS_INDEX_CODEC_NAME, - FSTOrdTermsWriter.VERSION_START, - FSTOrdTermsWriter.VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - int version2 = CodecUtil.checkIndexHeader(blockIn, FSTOrdTermsWriter.TERMS_CODEC_NAME, - FSTOrdTermsWriter.VERSION_START, - FSTOrdTermsWriter.VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - - if (version != version2) { - throw new CorruptIndexException("Format versions mismatch: index=" + version + ", terms=" + version2, blockIn); - } - - CodecUtil.checksumEntireFile(blockIn); - - this.postingsReader.init(blockIn, state); - seekDir(blockIn); - - final FieldInfos fieldInfos = state.fieldInfos; - final int numFields = blockIn.readVInt(); - for (int i = 0; i < numFields; i++) { - FieldInfo fieldInfo = fieldInfos.fieldInfo(blockIn.readVInt()); - boolean hasFreq = fieldInfo.getIndexOptions() != IndexOptions.DOCS; - long numTerms = blockIn.readVLong(); - long sumTotalTermFreq = blockIn.readVLong(); - // if freqs are omitted, sumDocFreq=sumTotalTermFreq and we only write one value - long sumDocFreq = hasFreq ? blockIn.readVLong() : sumTotalTermFreq; - int docCount = blockIn.readVInt(); - int longsSize = blockIn.readVInt(); - FST index = new FST<>(indexIn, PositiveIntOutputs.getSingleton()); - - TermsReader current = new TermsReader(fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); - TermsReader previous = fields.put(fieldInfo.name, current); - checkFieldSummary(state.segmentInfo, indexIn, blockIn, current, previous); - } - CodecUtil.checkFooter(indexIn); - success = true; - } finally { - if (success) { - IOUtils.close(indexIn, blockIn); - } else { - IOUtils.closeWhileHandlingException(indexIn, blockIn); - } - } - } - - private void seekDir(IndexInput in) throws IOException { - in.seek(in.length() - CodecUtil.footerLength() - 8); - in.seek(in.readLong()); - } - private void checkFieldSummary(SegmentInfo info, IndexInput indexIn, IndexInput blockIn, TermsReader field, TermsReader previous) throws IOException { - // #docs with field must be <= #docs - if (field.docCount < 0 || field.docCount > info.maxDoc()) { - throw new CorruptIndexException("invalid docCount: " + field.docCount + " maxDoc: " + info.maxDoc() + " (blockIn=" + blockIn + ")", indexIn); - } - // #postings must be >= #docs with field - if (field.sumDocFreq < field.docCount) { - throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (blockIn=" + blockIn + ")", indexIn); - } - // #positions must be >= #postings - if (field.sumTotalTermFreq < field.sumDocFreq) { - throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (blockIn=" + blockIn + ")", indexIn); - } - if (previous != null) { - throw new CorruptIndexException("duplicate fields: " + field.fieldInfo.name + " (blockIn=" + blockIn + ")", indexIn); - } - } - - @Override - public Iterator iterator() { - return Collections.unmodifiableSet(fields.keySet()).iterator(); - } - - @Override - public Terms terms(String field) throws IOException { - assert field != null; - return fields.get(field); - } - - @Override - public int size() { - return fields.size(); - } - - @Override - public void close() throws IOException { - try { - IOUtils.close(postingsReader); - } finally { - fields.clear(); - } - } - - final class TermsReader extends Terms implements Accountable { - final FieldInfo fieldInfo; - final long numTerms; - final long sumTotalTermFreq; - final long sumDocFreq; - final int docCount; - final int longsSize; - final FST index; - - final int numSkipInfo; - final long[] skipInfo; - final byte[] statsBlock; - final byte[] metaLongsBlock; - final byte[] metaBytesBlock; - - TermsReader(FieldInfo fieldInfo, IndexInput blockIn, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST index) throws IOException { - this.fieldInfo = fieldInfo; - this.numTerms = numTerms; - this.sumTotalTermFreq = sumTotalTermFreq; - this.sumDocFreq = sumDocFreq; - this.docCount = docCount; - this.longsSize = longsSize; - this.index = index; - - assert (numTerms & (~0xffffffffL)) == 0; - final int numBlocks = (int)(numTerms + INTERVAL - 1) / INTERVAL; - this.numSkipInfo = longsSize + 3; - this.skipInfo = new long[numBlocks * numSkipInfo]; - this.statsBlock = new byte[(int)blockIn.readVLong()]; - this.metaLongsBlock = new byte[(int)blockIn.readVLong()]; - this.metaBytesBlock = new byte[(int)blockIn.readVLong()]; - - int last = 0, next = 0; - for (int i = 1; i < numBlocks; i++) { - next = numSkipInfo * i; - for (int j = 0; j < numSkipInfo; j++) { - skipInfo[next + j] = skipInfo[last + j] + blockIn.readVLong(); - } - last = next; - } - blockIn.readBytes(statsBlock, 0, statsBlock.length); - blockIn.readBytes(metaLongsBlock, 0, metaLongsBlock.length); - blockIn.readBytes(metaBytesBlock, 0, metaBytesBlock.length); - } - - public boolean hasFreqs() { - return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; - } - - @Override - public boolean hasOffsets() { - return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; - } - - @Override - public boolean hasPositions() { - return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; - } - - @Override - public boolean hasPayloads() { - return fieldInfo.hasPayloads(); - } - - @Override - public long size() { - return numTerms; - } - - @Override - public long getSumTotalTermFreq() { - return sumTotalTermFreq; - } - - @Override - public long getSumDocFreq() throws IOException { - return sumDocFreq; - } - - @Override - public int getDocCount() throws IOException { - return docCount; - } - - @Override - public TermsEnum iterator() throws IOException { - return new SegmentTermsEnum(); - } - - @Override - public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException { - if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) { - throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead"); - } - return new IntersectTermsEnum(compiled, startTerm); - } - - @Override - public long ramBytesUsed() { - long ramBytesUsed = 0; - if (index != null) { - ramBytesUsed += index.ramBytesUsed(); - ramBytesUsed += RamUsageEstimator.sizeOf(metaBytesBlock); - ramBytesUsed += RamUsageEstimator.sizeOf(metaLongsBlock); - ramBytesUsed += RamUsageEstimator.sizeOf(skipInfo); - ramBytesUsed += RamUsageEstimator.sizeOf(statsBlock); - } - return ramBytesUsed; - } - - @Override - public Collection getChildResources() { - if (index == null) { - return Collections.emptyList(); - } else { - return Collections.singletonList(Accountables.namedAccountable("terms", index)); - } - } - - @Override - public String toString() { - return "FSTOrdTerms(terms=" + numTerms + ",postings=" + sumDocFreq + ",positions=" + sumTotalTermFreq + ",docs=" + docCount + ")"; - } - - // Only wraps common operations for PBF interact - abstract class BaseTermsEnum extends org.apache.lucene.index.BaseTermsEnum { - - /* Current term's ord, starts from 0 */ - long ord; - - /* Current term stats + decoded metadata (customized by PBF) */ - final BlockTermState state; - - /* Datainput to load stats & metadata */ - final ByteArrayDataInput statsReader = new ByteArrayDataInput(); - final ByteArrayDataInput metaLongsReader = new ByteArrayDataInput(); - final ByteArrayDataInput metaBytesReader = new ByteArrayDataInput(); - - /* To which block is buffered */ - int statsBlockOrd; - int metaBlockOrd; - - /* Current buffered metadata (long[] & byte[]) */ - long[][] longs; - int[] bytesStart; - int[] bytesLength; - - /* Current buffered stats (df & ttf) */ - int[] docFreq; - long[] totalTermFreq; - - BaseTermsEnum() throws IOException { - this.state = postingsReader.newTermState(); - this.statsReader.reset(statsBlock); - this.metaLongsReader.reset(metaLongsBlock); - this.metaBytesReader.reset(metaBytesBlock); - - this.longs = new long[INTERVAL][longsSize]; - this.bytesStart = new int[INTERVAL]; - this.bytesLength = new int[INTERVAL]; - this.docFreq = new int[INTERVAL]; - this.totalTermFreq = new long[INTERVAL]; - this.statsBlockOrd = -1; - this.metaBlockOrd = -1; - } - - /** Decodes stats data into term state */ - void decodeStats() throws IOException { - final int upto = (int)ord % INTERVAL; - final int oldBlockOrd = statsBlockOrd; - statsBlockOrd = (int)ord / INTERVAL; - if (oldBlockOrd != statsBlockOrd) { - refillStats(); - } - state.docFreq = docFreq[upto]; - state.totalTermFreq = totalTermFreq[upto]; - } - - /** Let PBF decode metadata */ - void decodeMetaData() throws IOException { - final int upto = (int)ord % INTERVAL; - final int oldBlockOrd = metaBlockOrd; - metaBlockOrd = (int)ord / INTERVAL; - if (metaBlockOrd != oldBlockOrd) { - refillMetadata(); - } - metaBytesReader.setPosition(bytesStart[upto]); - postingsReader.decodeTerm(longs[upto], metaBytesReader, fieldInfo, state, true); - } - - /** Load current stats shard */ - final void refillStats() throws IOException { - final int offset = statsBlockOrd * numSkipInfo; - final int statsFP = (int)skipInfo[offset]; - statsReader.setPosition(statsFP); - for (int i = 0; i < INTERVAL && !statsReader.eof(); i++) { - int code = statsReader.readVInt(); - if (hasFreqs()) { - docFreq[i] = (code >>> 1); - if ((code & 1) == 1) { - totalTermFreq[i] = docFreq[i]; - } else { - totalTermFreq[i] = docFreq[i] + statsReader.readVLong(); - } - } else { - docFreq[i] = code; - totalTermFreq[i] = code; - } - } - } - - /** Load current metadata shard */ - final void refillMetadata() throws IOException { - final int offset = metaBlockOrd * numSkipInfo; - final int metaLongsFP = (int)skipInfo[offset + 1]; - final int metaBytesFP = (int)skipInfo[offset + 2]; - metaLongsReader.setPosition(metaLongsFP); - for (int j = 0; j < longsSize; j++) { - longs[0][j] = skipInfo[offset + 3 + j] + metaLongsReader.readVLong(); - } - bytesStart[0] = metaBytesFP; - bytesLength[0] = (int)metaLongsReader.readVLong(); - for (int i = 1; i < INTERVAL && !metaLongsReader.eof(); i++) { - for (int j = 0; j < longsSize; j++) { - longs[i][j] = longs[i-1][j] + metaLongsReader.readVLong(); - } - bytesStart[i] = bytesStart[i-1] + bytesLength[i-1]; - bytesLength[i] = (int)metaLongsReader.readVLong(); - } - } - - @Override - public TermState termState() throws IOException { - decodeMetaData(); - return state.clone(); - } - - @Override - public int docFreq() throws IOException { - return state.docFreq; - } - - @Override - public long totalTermFreq() throws IOException { - return state.totalTermFreq; - } - - @Override - public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { - decodeMetaData(); - return postingsReader.postings(fieldInfo, state, reuse, flags); - } - - @Override - public ImpactsEnum impacts(int flags) throws IOException { - decodeMetaData(); - return postingsReader.impacts(fieldInfo, state, flags); - } - - // TODO: this can be achieved by making use of Util.getByOutput() - // and should have related tests - @Override - public void seekExact(long ord) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public long ord() { - throw new UnsupportedOperationException(); - } - } - - // Iterates through all terms in this field - private final class SegmentTermsEnum extends BaseTermsEnum { - final BytesRefFSTEnum fstEnum; - /* Current term, null when enum ends or unpositioned */ - BytesRef term; - - /* True when current term's metadata is decoded */ - boolean decoded; - - /* True when current enum is 'positioned' by seekExact(TermState) */ - boolean seekPending; - - SegmentTermsEnum() throws IOException { - this.fstEnum = new BytesRefFSTEnum<>(index); - this.decoded = false; - this.seekPending = false; - } - - @Override - public BytesRef term() throws IOException { - return term; - } - - @Override - void decodeMetaData() throws IOException { - if (!decoded && !seekPending) { - super.decodeMetaData(); - decoded = true; - } - } - - // Update current enum according to FSTEnum - void updateEnum(final InputOutput pair) throws IOException { - if (pair == null) { - term = null; - } else { - term = pair.input; - ord = pair.output; - decodeStats(); - } - decoded = false; - seekPending = false; - } - - @Override - public BytesRef next() throws IOException { - if (seekPending) { // previously positioned, but termOutputs not fetched - seekPending = false; - SeekStatus status = seekCeil(term); - assert status == SeekStatus.FOUND; // must positioned on valid term - } - updateEnum(fstEnum.next()); - return term; - } - - @Override - public boolean seekExact(BytesRef target) throws IOException { - updateEnum(fstEnum.seekExact(target)); - return term != null; - } - - @Override - public SeekStatus seekCeil(BytesRef target) throws IOException { - updateEnum(fstEnum.seekCeil(target)); - if (term == null) { - return SeekStatus.END; - } else { - return term.equals(target) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND; - } - } - - @Override - public void seekExact(BytesRef target, TermState otherState) { - if (!target.equals(term)) { - state.copyFrom(otherState); - term = BytesRef.deepCopyOf(target); - seekPending = true; - } - } - } - - // Iterates intersect result with automaton (cannot seek!) - private final class IntersectTermsEnum extends BaseTermsEnum { - /* Current term, null when enum ends or unpositioned */ - BytesRefBuilder term; - - /* True when current term's metadata is decoded */ - boolean decoded; - - /* True when there is pending term when calling next() */ - boolean pending; - - /* stack to record how current term is constructed, - * used to accumulate metadata or rewind term: - * level == term.length + 1, - * == 0 when term is null */ - Frame[] stack; - int level; - - /* term dict fst */ - final FST fst; - final FST.BytesReader fstReader; - final Outputs fstOutputs; - - /* query automaton to intersect with */ - final ByteRunAutomaton fsa; - - private final class Frame { - /* fst stats */ - FST.Arc arc; - - Long output; - - /* automaton stats */ - int state; - - Frame() { - this.arc = new FST.Arc<>(); - this.state = -1; - } - - public String toString() { - return "arc=" + arc + " state=" + state; - } - } - - IntersectTermsEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException { - //if (TEST) System.out.println("Enum init, startTerm=" + startTerm); - this.fst = index; - this.fstReader = fst.getBytesReader(); - this.fstOutputs = index.outputs; - this.fsa = compiled.runAutomaton; - this.level = -1; - this.stack = new Frame[16]; - for (int i = 0 ; i < stack.length; i++) { - this.stack[i] = new Frame(); - } - - Frame frame; - frame = loadVirtualFrame(newFrame()); - this.level++; - frame = loadFirstFrame(newFrame()); - pushFrame(frame); - - this.decoded = false; - this.pending = false; - - if (startTerm == null) { - pending = isAccept(topFrame()); - } else { - doSeekCeil(startTerm); - pending = (term == null || !startTerm.equals(term.get())) && isValid(topFrame()) && isAccept(topFrame()); - } - } - - @Override - public BytesRef term() throws IOException { - return term == null ? null : term.get(); - } - - @Override - void decodeMetaData() throws IOException { - if (!decoded) { - super.decodeMetaData(); - decoded = true; - } - } - - @Override - void decodeStats() throws IOException { - ord = topFrame().output; - super.decodeStats(); - } - - @Override - public SeekStatus seekCeil(BytesRef target) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public BytesRef next() throws IOException { - //if (TEST) System.out.println("Enum next()"); - if (pending) { - pending = false; - decodeStats(); - return term(); - } - decoded = false; - DFS: - while (level > 0) { - Frame frame = newFrame(); - if (loadExpandFrame(topFrame(), frame) != null) { // has valid target - pushFrame(frame); - if (isAccept(frame)) { // gotcha - break; - } - continue; // check next target - } - frame = popFrame(); - while(level > 0) { - if (loadNextFrame(topFrame(), frame) != null) { // has valid sibling - pushFrame(frame); - if (isAccept(frame)) { // gotcha - break DFS; - } - continue DFS; // check next target - } - frame = popFrame(); - } - return null; - } - decodeStats(); - return term(); - } - - BytesRef doSeekCeil(BytesRef target) throws IOException { - //if (TEST) System.out.println("Enum doSeekCeil()"); - Frame frame= null; - int label, upto = 0, limit = target.length; - while (upto < limit) { // to target prefix, or ceil label (rewind prefix) - frame = newFrame(); - label = target.bytes[upto] & 0xff; - frame = loadCeilFrame(label, topFrame(), frame); - if (frame == null || frame.arc.label() != label) { - break; - } - assert isValid(frame); // target must be fetched from automaton - pushFrame(frame); - upto++; - } - if (upto == limit) { // got target - return term(); - } - if (frame != null) { // got larger term('s prefix) - pushFrame(frame); - return isAccept(frame) ? term() : next(); - } - while (level > 0) { // got target's prefix, advance to larger term - frame = popFrame(); - while (level > 0 && !canRewind(frame)) { - frame = popFrame(); - } - if (loadNextFrame(topFrame(), frame) != null) { - pushFrame(frame); - return isAccept(frame) ? term() : next(); - } - } - return null; - } - - /** Virtual frame, never pop */ - Frame loadVirtualFrame(Frame frame) { - frame.output = fstOutputs.getNoOutput(); - frame.state = -1; - return frame; - } - - /** Load frame for start arc(node) on fst */ - Frame loadFirstFrame(Frame frame) { - frame.arc = fst.getFirstArc(frame.arc); - frame.output = frame.arc.output(); - frame.state = 0; - return frame; - } - - /** Load frame for target arc(node) on fst */ - Frame loadExpandFrame(Frame top, Frame frame) throws IOException { - if (!canGrow(top)) { - return null; - } - frame.arc = fst.readFirstRealTargetArc(top.arc.target(), frame.arc, fstReader); - frame.state = fsa.step(top.state, frame.arc.label()); - frame.output = frame.arc.output(); - //if (TEST) System.out.println(" loadExpand frame="+frame); - if (frame.state == -1) { - return loadNextFrame(top, frame); - } - return frame; - } - - /** Load frame for sibling arc(node) on fst */ - Frame loadNextFrame(Frame top, Frame frame) throws IOException { - if (!canRewind(frame)) { - return null; - } - while (!frame.arc.isLast()) { - frame.arc = fst.readNextRealArc(frame.arc, fstReader); - frame.output = frame.arc.output(); - frame.state = fsa.step(top.state, frame.arc.label()); - if (frame.state != -1) { - break; - } - } - //if (TEST) System.out.println(" loadNext frame="+frame); - if (frame.state == -1) { - return null; - } - return frame; - } - - /** Load frame for target arc(node) on fst, so that - * arc.label >= label and !fsa.reject(arc.label) */ - Frame loadCeilFrame(int label, Frame top, Frame frame) throws IOException { - FST.Arc arc = frame.arc; - arc = Util.readCeilArc(label, fst, top.arc, arc, fstReader); - if (arc == null) { - return null; - } - frame.state = fsa.step(top.state, arc.label()); - //if (TEST) System.out.println(" loadCeil frame="+frame); - if (frame.state == -1) { - return loadNextFrame(top, frame); - } - frame.output = arc.output(); - return frame; - } - - boolean isAccept(Frame frame) { // reach a term both fst&fsa accepts - return fsa.isAccept(frame.state) && frame.arc.isFinal(); - } - boolean isValid(Frame frame) { // reach a prefix both fst&fsa won't reject - return /*frame != null &&*/ frame.state != -1; - } - boolean canGrow(Frame frame) { // can walk forward on both fst&fsa - return frame.state != -1 && FST.targetHasArcs(frame.arc); - } - boolean canRewind(Frame frame) { // can jump to sibling - return !frame.arc.isLast(); - } - - void pushFrame(Frame frame) { - final FST.Arc arc = frame.arc; - frame.output = fstOutputs.add(topFrame().output, frame.output); - term = grow(arc.label()); - level++; - assert frame == stack[level]; - } - - Frame popFrame() { - term = shrink(); - return stack[level--]; - } - - Frame newFrame() { - if (level+1 == stack.length) { - final Frame[] temp = new Frame[ArrayUtil.oversize(level+2, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; - System.arraycopy(stack, 0, temp, 0, stack.length); - for (int i = stack.length; i < temp.length; i++) { - temp[i] = new Frame(); - } - stack = temp; - } - return stack[level+1]; - } - - Frame topFrame() { - return stack[level]; - } - - BytesRefBuilder grow(int label) { - if (term == null) { - term = new BytesRefBuilder(); - } else { - term.append((byte) label); - } - return term; - } - - BytesRefBuilder shrink() { - if (term.length() == 0) { - term = null; - } else { - term.setLength(term.length() - 1); - } - return term; - } - } - } - - static void walk(FST fst) throws IOException { - final ArrayList> queue = new ArrayList<>(); - final BitSet seen = new BitSet(); - final FST.BytesReader reader = fst.getBytesReader(); - final FST.Arc startArc = fst.getFirstArc(new FST.Arc()); - queue.add(startArc); - while (!queue.isEmpty()) { - final FST.Arc arc = queue.remove(0); - final long node = arc.target(); - //System.out.println(arc); - if (FST.targetHasArcs(arc) && !seen.get((int) node)) { - seen.set((int) node); - fst.readFirstRealTargetArc(node, arc, reader); - while (true) { - queue.add(new FST.Arc().copyFrom(arc)); - if (arc.isLast()) { - break; - } else { - fst.readNextRealArc(arc, reader); - } - } - } - } - } - - @Override - public long ramBytesUsed() { - long ramBytesUsed = postingsReader.ramBytesUsed(); - for (TermsReader r : fields.values()) { - ramBytesUsed += r.ramBytesUsed(); - } - return ramBytesUsed; - } - - @Override - public Collection getChildResources() { - List resources = new ArrayList<>(Accountables.namedAccountables("field", fields)); - resources.add(Accountables.namedAccountable("delegate", postingsReader)); - return Collections.unmodifiableList(resources); - } - - @Override - public String toString() { - return getClass().getSimpleName() + "(fields=" + fields.size() + ",delegate=" + postingsReader + ")"; - } - - @Override - public void checkIntegrity() throws IOException { - postingsReader.checkIntegrity(); - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java deleted file mode 100644 index a31a2f940b3..00000000000 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsWriter.java +++ /dev/null @@ -1,386 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.codecs.memory; - - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.lucene.codecs.BlockTermState; -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.FieldsConsumer; -import org.apache.lucene.codecs.NormsProducer; -import org.apache.lucene.codecs.PostingsWriterBase; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.store.ByteBuffersDataOutput; -import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.IntsRefBuilder; -import org.apache.lucene.util.fst.FSTCompiler; -import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.PositiveIntOutputs; -import org.apache.lucene.util.fst.Util; - -/** - * FST-based term dict, using ord as FST output. - * - * The FST holds the mapping between <term, ord>, and - * term's metadata is delta encoded into a single byte block. - * - * Typically the byte block consists of four parts: - * 1. term statistics: docFreq, totalTermFreq; - * 2. monotonic long[], e.g. the pointer to the postings list for that term; - * 3. generic byte[], e.g. other information customized by postings base. - * 4. single-level skip list to speed up metadata decoding by ord. - * - *

- * Files: - *

- * - * - *

Term Index

- *

- * The .tix contains a list of FSTs, one for each field. - * The FST maps a term to its corresponding order in current field. - *

- * - *
    - *
  • TermIndex(.tix) --> Header, TermFSTNumFields, Footer
  • - *
  • TermFST --> {@link FST FST<long>}
  • - *
  • Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
  • - *
  • Footer --> {@link CodecUtil#writeFooter CodecFooter}
  • - *
- * - *

Notes:

- *
    - *
  • - * Since terms are already sorted before writing to Term Block, - * their ords can directly used to seek term metadata from term block. - *
  • - *
- * - * - *

Term Block

- *

- * The .tbk contains all the statistics and metadata for terms, along with field summary (e.g. - * per-field data like number of documents in current field). For each field, there are four blocks: - *

    - *
  • statistics bytes block: contains term statistics;
  • - *
  • metadata longs block: delta-encodes monotonic part of metadata;
  • - *
  • metadata bytes block: encodes other parts of metadata;
  • - *
  • skip block: contains skip data, to speed up metadata seeking and decoding
  • - *
- * - *

File Format:

- *
    - *
  • TermBlock(.tbk) --> Header, PostingsHeader, FieldSummary, DirOffset
  • - *
  • FieldSummary --> NumFields, <FieldNumber, NumTerms, SumTotalTermFreq?, SumDocFreq, - * DocCount, LongsSize, DataBlock > NumFields, Footer
  • - * - *
  • DataBlock --> StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength, - * SkipBlock, StatsBlock, MetaLongsBlock, MetaBytesBlock
  • - *
  • SkipBlock --> < StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, - * MetaLongsSkipDeltaLongsSize >NumTerms - *
  • StatsBlock --> < DocFreq[Same?], (TotalTermFreq-DocFreq) ? > NumTerms - *
  • MetaLongsBlock --> < LongDeltaLongsSize, BytesSize > NumTerms - *
  • MetaBytesBlock --> Byte MetaBytesBlockLength - *
  • Header --> {@link CodecUtil#writeIndexHeader IndexHeader}
  • - *
  • DirOffset --> {@link DataOutput#writeLong Uint64}
  • - *
  • NumFields, FieldNumber, DocCount, DocFreq, LongsSize, - * FieldNumber, DocCount --> {@link DataOutput#writeVInt VInt}
  • - *
  • NumTerms, SumTotalTermFreq, SumDocFreq, StatsBlockLength, MetaLongsBlockLength, MetaBytesBlockLength, - * StatsFPDelta, MetaLongsSkipFPDelta, MetaBytesSkipFPDelta, MetaLongsSkipStart, TotalTermFreq, - * LongDelta,--> {@link DataOutput#writeVLong VLong}
  • - *
  • Footer --> {@link CodecUtil#writeFooter CodecFooter}
  • - *
- *

Notes:

- *
    - *
  • - * The format of PostingsHeader and MetaBytes are customized by the specific postings implementation: - * they contain arbitrary per-file data (such as parameters or versioning information), and per-term data - * (non-monotonic ones like pulsed postings data). - *
  • - *
  • - * During initialization the reader will load all the blocks into memory. SkipBlock will be decoded, so that during seek - * term dict can lookup file pointers directly. StatsFPDelta, MetaLongsSkipFPDelta, etc. are file offset - * for every SkipInterval's term. MetaLongsSkipDelta is the difference from previous one, which indicates - * the value of preceding metadata longs for every SkipInterval's term. - *
  • - *
  • - * DocFreq is the count of documents which contain the term. TotalTermFreq is the total number of occurrences of the term. - * Usually these two values are the same for long tail terms, therefore one bit is stole from DocFreq to check this case, - * so that encoding of TotalTermFreq may be omitted. - *
  • - *
- * - * @lucene.experimental - */ - -public class FSTOrdTermsWriter extends FieldsConsumer { - static final String TERMS_INDEX_EXTENSION = "tix"; - static final String TERMS_BLOCK_EXTENSION = "tbk"; - static final String TERMS_CODEC_NAME = "FSTOrdTerms"; - static final String TERMS_INDEX_CODEC_NAME = "FSTOrdIndex"; - - public static final int VERSION_START = 2; - public static final int VERSION_CURRENT = VERSION_START; - public static final int SKIP_INTERVAL = 8; - - final PostingsWriterBase postingsWriter; - final FieldInfos fieldInfos; - final int maxDoc; - final List fields = new ArrayList<>(); - IndexOutput blockOut = null; - IndexOutput indexOut = null; - - public FSTOrdTermsWriter(SegmentWriteState state, PostingsWriterBase postingsWriter) throws IOException { - final String termsIndexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION); - final String termsBlockFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_BLOCK_EXTENSION); - - this.postingsWriter = postingsWriter; - this.fieldInfos = state.fieldInfos; - this.maxDoc = state.segmentInfo.maxDoc(); - - boolean success = false; - try { - this.indexOut = state.directory.createOutput(termsIndexFileName, state.context); - this.blockOut = state.directory.createOutput(termsBlockFileName, state.context); - CodecUtil.writeIndexHeader(indexOut, TERMS_INDEX_CODEC_NAME, VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - CodecUtil.writeIndexHeader(blockOut, TERMS_CODEC_NAME, VERSION_CURRENT, - state.segmentInfo.getId(), state.segmentSuffix); - this.postingsWriter.init(blockOut, state); - success = true; - } finally { - if (!success) { - IOUtils.closeWhileHandlingException(indexOut, blockOut); - } - } - } - - @Override - public void write(Fields fields, NormsProducer norms) throws IOException { - for(String field : fields) { - Terms terms = fields.terms(field); - if (terms == null) { - continue; - } - FieldInfo fieldInfo = fieldInfos.fieldInfo(field); - boolean hasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; - TermsEnum termsEnum = terms.iterator(); - TermsWriter termsWriter = new TermsWriter(fieldInfo); - - long sumTotalTermFreq = 0; - long sumDocFreq = 0; - FixedBitSet docsSeen = new FixedBitSet(maxDoc); - while (true) { - BytesRef term = termsEnum.next(); - if (term == null) { - break; - } - BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen, norms); - if (termState != null) { - termsWriter.finishTerm(term, termState); - sumTotalTermFreq += termState.totalTermFreq; - sumDocFreq += termState.docFreq; - } - } - - termsWriter.finish(hasFreq ? sumTotalTermFreq : -1, sumDocFreq, docsSeen.cardinality()); - } - } - - @Override - public void close() throws IOException { - if (blockOut != null) { - boolean success = false; - try { - final long blockDirStart = blockOut.getFilePointer(); - - // write field summary - blockOut.writeVInt(fields.size()); - for (FieldMetaData field : fields) { - blockOut.writeVInt(field.fieldInfo.number); - blockOut.writeVLong(field.numTerms); - if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) { - blockOut.writeVLong(field.sumTotalTermFreq); - } - blockOut.writeVLong(field.sumDocFreq); - blockOut.writeVInt(field.docCount); - blockOut.writeVInt(field.longsSize); - blockOut.writeVLong(field.statsOut.size()); - blockOut.writeVLong(field.metaLongsOut.size()); - blockOut.writeVLong(field.metaBytesOut.size()); - - field.skipOut.copyTo(blockOut); - field.statsOut.copyTo(blockOut); - field.metaLongsOut.copyTo(blockOut); - field.metaBytesOut.copyTo(blockOut); - field.dict.save(indexOut); - } - writeTrailer(blockOut, blockDirStart); - CodecUtil.writeFooter(indexOut); - CodecUtil.writeFooter(blockOut); - success = true; - } finally { - if (success) { - IOUtils.close(blockOut, indexOut, postingsWriter); - } else { - IOUtils.closeWhileHandlingException(blockOut, indexOut, postingsWriter); - } - blockOut = null; - } - } - } - - private void writeTrailer(IndexOutput out, long dirStart) throws IOException { - out.writeLong(dirStart); - } - - private static class FieldMetaData { - public FieldInfo fieldInfo; - public long numTerms; - public long sumTotalTermFreq; - public long sumDocFreq; - public int docCount; - public int longsSize; - public FST dict; - - // TODO: block encode each part - - // vint encode next skip point (fully decoded when reading) - public ByteBuffersDataOutput skipOut; - // vint encode df, (ttf-df) - public ByteBuffersDataOutput statsOut; - // vint encode monotonic long[] and length for corresponding byte[] - public ByteBuffersDataOutput metaLongsOut; - // generic byte[] - public ByteBuffersDataOutput metaBytesOut; - } - - final class TermsWriter { - private final FSTCompiler fstCompiler; - private final PositiveIntOutputs outputs; - private final FieldInfo fieldInfo; - private final int longsSize; - private long numTerms; - - private final IntsRefBuilder scratchTerm = new IntsRefBuilder(); - private final ByteBuffersDataOutput statsOut = new ByteBuffersDataOutput(); - private final ByteBuffersDataOutput metaLongsOut = new ByteBuffersDataOutput(); - private final ByteBuffersDataOutput metaBytesOut = new ByteBuffersDataOutput(); - private final ByteBuffersDataOutput skipOut = new ByteBuffersDataOutput(); - private long lastBlockStatsFP; - private long lastBlockMetaLongsFP; - private long lastBlockMetaBytesFP; - private long[] lastBlockLongs; - - private long[] lastLongs; - private long lastMetaBytesFP; - - TermsWriter(FieldInfo fieldInfo) { - this.numTerms = 0; - this.fieldInfo = fieldInfo; - this.longsSize = postingsWriter.setField(fieldInfo); - this.outputs = PositiveIntOutputs.getSingleton(); - this.fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs); - - this.lastBlockStatsFP = 0; - this.lastBlockMetaLongsFP = 0; - this.lastBlockMetaBytesFP = 0; - this.lastBlockLongs = new long[longsSize]; - - this.lastLongs = new long[longsSize]; - this.lastMetaBytesFP = 0; - } - - public void finishTerm(BytesRef text, BlockTermState state) throws IOException { - if (numTerms > 0 && numTerms % SKIP_INTERVAL == 0) { - bufferSkip(); - } - // write term meta data into fst - final long longs[] = new long[longsSize]; - final long delta = state.totalTermFreq - state.docFreq; - if (state.totalTermFreq > 0) { - if (delta == 0) { - statsOut.writeVInt(state.docFreq<<1|1); - } else { - statsOut.writeVInt(state.docFreq<<1); - statsOut.writeVLong(state.totalTermFreq-state.docFreq); - } - } else { - statsOut.writeVInt(state.docFreq); - } - postingsWriter.encodeTerm(longs, metaBytesOut, fieldInfo, state, true); - for (int i = 0; i < longsSize; i++) { - metaLongsOut.writeVLong(longs[i] - lastLongs[i]); - lastLongs[i] = longs[i]; - } - metaLongsOut.writeVLong(metaBytesOut.size() - lastMetaBytesFP); - - fstCompiler.add(Util.toIntsRef(text, scratchTerm), numTerms); - numTerms++; - - lastMetaBytesFP = metaBytesOut.size(); - } - - public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException { - if (numTerms > 0) { - final FieldMetaData metadata = new FieldMetaData(); - metadata.fieldInfo = fieldInfo; - metadata.numTerms = numTerms; - metadata.sumTotalTermFreq = sumTotalTermFreq; - metadata.sumDocFreq = sumDocFreq; - metadata.docCount = docCount; - metadata.longsSize = longsSize; - metadata.skipOut = skipOut; - metadata.statsOut = statsOut; - metadata.metaLongsOut = metaLongsOut; - metadata.metaBytesOut = metaBytesOut; - metadata.dict = fstCompiler.compile(); - fields.add(metadata); - } - } - - private void bufferSkip() throws IOException { - skipOut.writeVLong(statsOut.size() - lastBlockStatsFP); - skipOut.writeVLong(metaLongsOut.size() - lastBlockMetaLongsFP); - skipOut.writeVLong(metaBytesOut.size() - lastBlockMetaBytesFP); - for (int i = 0; i < longsSize; i++) { - skipOut.writeVLong(lastLongs[i] - lastBlockLongs[i]); - } - lastBlockStatsFP = statsOut.size(); - lastBlockMetaLongsFP = metaLongsOut.size(); - lastBlockMetaBytesFP = metaBytesOut.size(); - System.arraycopy(lastLongs, 0, lastBlockLongs, 0, longsSize); - } - } -} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java index 3695fe872e5..d2df231db0c 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermOutputs.java @@ -41,7 +41,6 @@ class FSTTermOutputs extends Outputs { private final static TermData NO_OUTPUT = new TermData(); //private static boolean TEST = false; private final boolean hasPos; - private final int longsSize; /** * Represents the metadata for one term. @@ -50,18 +49,15 @@ class FSTTermOutputs extends Outputs { */ static class TermData implements Accountable { private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermData.class); - long[] longs; byte[] bytes; int docFreq; long totalTermFreq; TermData() { - this.longs = null; this.bytes = null; this.docFreq = 0; this.totalTermFreq = -1; } - TermData(long[] longs, byte[] bytes, int docFreq, long totalTermFreq) { - this.longs = longs; + TermData(byte[] bytes, int docFreq, long totalTermFreq) { this.bytes = bytes; this.docFreq = docFreq; this.totalTermFreq = totalTermFreq; @@ -70,9 +66,6 @@ class FSTTermOutputs extends Outputs { @Override public long ramBytesUsed() { long ramBytesUsed = BASE_RAM_BYTES_USED; - if (longs != null) { - ramBytesUsed += RamUsageEstimator.sizeOf(longs); - } if (bytes != null) { ramBytesUsed += RamUsageEstimator.sizeOf(bytes); } @@ -85,14 +78,7 @@ class FSTTermOutputs extends Outputs { @Override public int hashCode() { int hash = 0; - if (longs != null) { - final int end = longs.length; - for (int i = 0; i < end; i++) { - hash -= longs[i]; - } - } if (bytes != null) { - hash = -hash; final int end = bytes.length; for (int i = 0; i < end; i++) { hash += bytes[i]; @@ -104,7 +90,7 @@ class FSTTermOutputs extends Outputs { @Override public String toString() { - return "FSTTermOutputs$TermData longs=" + Arrays.toString(longs) + " bytes=" + Arrays.toString(bytes) + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq; + return "FSTTermOutputs$TermData bytes=" + Arrays.toString(bytes) + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq; } @Override @@ -116,15 +102,13 @@ class FSTTermOutputs extends Outputs { } TermData other = (TermData) other_; return statsEqual(this, other) && - longsEqual(this, other) && bytesEqual(this, other); } } - protected FSTTermOutputs(FieldInfo fieldInfo, int longsSize) { + protected FSTTermOutputs(FieldInfo fieldInfo) { this.hasPos = fieldInfo.getIndexOptions() != IndexOptions.DOCS; - this.longsSize = longsSize; } @Override @@ -145,37 +129,13 @@ class FSTTermOutputs extends Outputs { //if (TEST) System.out.println("ret:"+NO_OUTPUT); return NO_OUTPUT; } - assert t1.longs.length == t2.longs.length; - long[] min = t1.longs, max = t2.longs; - int pos = 0; TermData ret; - while (pos < longsSize && min[pos] == max[pos]) { - pos++; - } - if (pos < longsSize) { // unequal long[] - if (min[pos] > max[pos]) { - min = t2.longs; - max = t1.longs; - } - // check whether strictly smaller - while (pos < longsSize && min[pos] <= max[pos]) { - pos++; - } - if (pos < longsSize || allZero(min)) { // not comparable or all-zero - ret = NO_OUTPUT; - } else { - ret = new TermData(min, null, 0, -1); - } - } else { // equal long[] - if (statsEqual(t1, t2) && bytesEqual(t1, t2)) { - ret = t1; - } else if (allZero(min)) { - ret = NO_OUTPUT; - } else { - ret = new TermData(min, null, 0, -1); - } + if (statsEqual(t1, t2) && bytesEqual(t1, t2)) { + ret = t1; + } else { + ret = NO_OUTPUT; } //if (TEST) System.out.println("ret:"+ret); return ret; @@ -188,23 +148,12 @@ class FSTTermOutputs extends Outputs { //if (TEST) System.out.println("ret:"+t1); return t1; } - assert t1.longs.length == t2.longs.length; - - int pos = 0; - long diff = 0; - long[] share = new long[longsSize]; - - while (pos < longsSize) { - share[pos] = t1.longs[pos] - t2.longs[pos]; - diff += share[pos]; - pos++; - } TermData ret; - if (diff == 0 && statsEqual(t1, t2) && bytesEqual(t1, t2)) { + if (statsEqual(t1, t2) && bytesEqual(t1, t2)) { ret = NO_OUTPUT; } else { - ret = new TermData(share, t1.bytes, t1.docFreq, t1.totalTermFreq); + ret = new TermData(t1.bytes, t1.docFreq, t1.totalTermFreq); } //if (TEST) System.out.println("ret:"+ret); return ret; @@ -223,21 +172,12 @@ class FSTTermOutputs extends Outputs { //if (TEST) System.out.println("ret:"+t1); return t1; } - assert t1.longs.length == t2.longs.length; - - int pos = 0; - long[] accum = new long[longsSize]; - - while (pos < longsSize) { - accum[pos] = t1.longs[pos] + t2.longs[pos]; - pos++; - } TermData ret; if (t2.bytes != null || t2.docFreq > 0) { - ret = new TermData(accum, t2.bytes, t2.docFreq, t2.totalTermFreq); + ret = new TermData(t2.bytes, t2.docFreq, t2.totalTermFreq); } else { - ret = new TermData(accum, t1.bytes, t1.docFreq, t1.totalTermFreq); + ret = new TermData(t1.bytes, t1.docFreq, t1.totalTermFreq); } //if (TEST) System.out.println("ret:"+ret); return ret; @@ -246,13 +186,12 @@ class FSTTermOutputs extends Outputs { @Override public void write(TermData data, DataOutput out) throws IOException { assert hasPos || data.totalTermFreq == -1; - int bit0 = allZero(data.longs) ? 0 : 1; - int bit1 = ((data.bytes == null || data.bytes.length == 0) ? 0 : 1) << 1; - int bit2 = ((data.docFreq == 0) ? 0 : 1) << 2; - int bits = bit0 | bit1 | bit2; - if (bit1 > 0) { // determine extra length + int bit0 = ((data.bytes == null || data.bytes.length == 0) ? 0 : 1); + int bit1 = ((data.docFreq == 0) ? 0 : 1) << 1; + int bits = bit0 | bit1; + if (bit0 > 0) { // determine extra length if (data.bytes.length < 32) { - bits |= (data.bytes.length << 3); + bits |= (data.bytes.length << 2); out.writeByte((byte)bits); } else { out.writeByte((byte)bits); @@ -261,15 +200,10 @@ class FSTTermOutputs extends Outputs { } else { out.writeByte((byte)bits); } - if (bit0 > 0) { // not all-zero case - for (int pos = 0; pos < longsSize; pos++) { - out.writeVLong(data.longs[pos]); - } - } - if (bit1 > 0) { // bytes exists + if (bit0 > 0) { // bytes exists out.writeBytes(data.bytes, 0, data.bytes.length); } - if (bit2 > 0) { // stats exist + if (bit1 > 0) { // stats exist if (hasPos) { if (data.docFreq == data.totalTermFreq) { out.writeVInt((data.docFreq << 1) | 1); @@ -285,28 +219,21 @@ class FSTTermOutputs extends Outputs { @Override public TermData read(DataInput in) throws IOException { - long[] longs = new long[longsSize]; byte[] bytes = null; int docFreq = 0; long totalTermFreq = -1; int bits = in.readByte() & 0xff; int bit0 = bits & 1; int bit1 = bits & 2; - int bit2 = bits & 4; - int bytesSize = (bits >>> 3); - if (bit1 > 0 && bytesSize == 0) { // determine extra length + int bytesSize = (bits >>> 2); + if (bit0 > 0 && bytesSize == 0) { // determine extra length bytesSize = in.readVInt(); } - if (bit0 > 0) { // not all-zero case - for (int pos = 0; pos < longsSize; pos++) { - longs[pos] = in.readVLong(); - } - } - if (bit1 > 0) { // bytes exists + if (bit0 > 0) { // bytes exists bytes = new byte[bytesSize]; in.readBytes(bytes, 0, bytesSize); } - if (bit2 > 0) { // stats exist + if (bit1 > 0) { // stats exist int code = in.readVInt(); if (hasPos) { totalTermFreq = docFreq = code >>> 1; @@ -317,7 +244,7 @@ class FSTTermOutputs extends Outputs { docFreq = code; } } - return new TermData(longs, bytes, docFreq, totalTermFreq); + return new TermData(bytes, docFreq, totalTermFreq); } @@ -326,20 +253,14 @@ class FSTTermOutputs extends Outputs { int bits = in.readByte() & 0xff; int bit0 = bits & 1; int bit1 = bits & 2; - int bit2 = bits & 4; - int bytesSize = (bits >>> 3); - if (bit1 > 0 && bytesSize == 0) { // determine extra length + int bytesSize = (bits >>> 2); + if (bit0 > 0 && bytesSize == 0) { // determine extra length bytesSize = in.readVInt(); } - if (bit0 > 0) { // not all-zero case - for (int pos = 0; pos < longsSize; pos++) { - in.readVLong(); - } - } - if (bit1 > 0) { // bytes exists + if (bit0 > 0) { // bytes exists in.skipBytes(bytesSize); } - if (bit2 > 0) { // stats exist + if (bit1 > 0) { // stats exist int code = in.readVInt(); if (hasPos && (code & 1) == 0) { in.readVLong(); @@ -366,18 +287,4 @@ class FSTTermOutputs extends Outputs { } return t1.bytes != null && t2.bytes != null && Arrays.equals(t1.bytes, t2.bytes); } - static boolean longsEqual(final TermData t1, final TermData t2) { - if (t1.longs == null && t2.longs == null) { - return true; - } - return t1.longs != null && t2.longs != null && Arrays.equals(t1.longs, t2.longs); - } - static boolean allZero(final long[] l) { - for (int i = 0; i < l.length; i++) { - if (l[i] != 0) { - return false; - } - } - return true; - } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java index 33084766424..8c232fa8d48 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java @@ -99,8 +99,7 @@ public class FSTTermsReader extends FieldsProducer { // if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong(); int docCount = in.readVInt(); - int longsSize = in.readVInt(); - TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); + TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount); TermsReader previous = fields.put(fieldInfo.name, current); checkFieldSummary(state.segmentInfo, in, current, previous); } @@ -169,17 +168,15 @@ public class FSTTermsReader extends FieldsProducer { final long sumTotalTermFreq; final long sumDocFreq; final int docCount; - final int longsSize; final FST dict; - TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) throws IOException { + TermsReader(FieldInfo fieldInfo, IndexInput in, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException { this.fieldInfo = fieldInfo; this.numTerms = numTerms; this.sumTotalTermFreq = sumTotalTermFreq; this.sumDocFreq = sumDocFreq; this.docCount = docCount; - this.longsSize = longsSize; - this.dict = new FST<>(in, new FSTTermOutputs(fieldInfo, longsSize)); + this.dict = new FST<>(in, new FSTTermOutputs(fieldInfo)); } @Override @@ -349,7 +346,7 @@ public class FSTTermsReader extends FieldsProducer { if (meta.bytes != null) { bytesReader.reset(meta.bytes, 0, meta.bytes.length); } - postingsReader.decodeTerm(meta.longs, bytesReader, fieldInfo, state, true); + postingsReader.decodeTerm(bytesReader, fieldInfo, state, true); decoded = true; } } @@ -495,7 +492,7 @@ public class FSTTermsReader extends FieldsProducer { if (meta.bytes != null) { bytesReader.reset(meta.bytes, 0, meta.bytes.length); } - postingsReader.decodeTerm(meta.longs, bytesReader, fieldInfo, state, true); + postingsReader.decodeTerm(bytesReader, fieldInfo, state, true); decoded = true; } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java index 2ef15651041..fcc0d00a593 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java @@ -209,7 +209,6 @@ public class FSTTermsWriter extends FieldsConsumer { } out.writeVLong(field.sumDocFreq); out.writeVInt(field.docCount); - out.writeVInt(field.longsSize); field.dict.save(out); } writeTrailer(out, dirStart); @@ -232,16 +231,14 @@ public class FSTTermsWriter extends FieldsConsumer { public final long sumTotalTermFreq; public final long sumDocFreq; public final int docCount; - public final int longsSize; public final FST dict; - public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, FST fst) { + public FieldMetaData(FieldInfo fieldInfo, long numTerms, long sumTotalTermFreq, long sumDocFreq, int docCount, FST fst) { this.fieldInfo = fieldInfo; this.numTerms = numTerms; this.sumTotalTermFreq = sumTotalTermFreq; this.sumDocFreq = sumDocFreq; this.docCount = docCount; - this.longsSize = longsSize; this.dict = fst; } } @@ -250,7 +247,6 @@ public class FSTTermsWriter extends FieldsConsumer { private final FSTCompiler fstCompiler; private final FSTTermOutputs outputs; private final FieldInfo fieldInfo; - private final int longsSize; private long numTerms; private final IntsRefBuilder scratchTerm = new IntsRefBuilder(); @@ -259,19 +255,18 @@ public class FSTTermsWriter extends FieldsConsumer { TermsWriter(FieldInfo fieldInfo) { this.numTerms = 0; this.fieldInfo = fieldInfo; - this.longsSize = postingsWriter.setField(fieldInfo); - this.outputs = new FSTTermOutputs(fieldInfo, longsSize); + postingsWriter.setField(fieldInfo); + this.outputs = new FSTTermOutputs(fieldInfo); this.fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs); } public void finishTerm(BytesRef text, BlockTermState state) throws IOException { // write term meta data into fst final FSTTermOutputs.TermData meta = new FSTTermOutputs.TermData(); - meta.longs = new long[longsSize]; meta.bytes = null; meta.docFreq = state.docFreq; meta.totalTermFreq = state.totalTermFreq; - postingsWriter.encodeTerm(meta.longs, metaWriter, fieldInfo, state, true); + postingsWriter.encodeTerm(metaWriter, fieldInfo, state, true); if (metaWriter.size() > 0) { meta.bytes = metaWriter.toArrayCopy(); metaWriter.reset(); @@ -284,7 +279,7 @@ public class FSTTermsWriter extends FieldsConsumer { // save FST dict if (numTerms > 0) { final FST fst = fstCompiler.compile(); - fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, fst)); + fields.add(new FieldMetaData(fieldInfo, numTerms, sumTotalTermFreq, sumDocFreq, docCount, fst)); } } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/DeltaBaseTermStateSerializer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/DeltaBaseTermStateSerializer.java index 52c7465d093..ec73ddcb838 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/DeltaBaseTermStateSerializer.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/DeltaBaseTermStateSerializer.java @@ -94,7 +94,7 @@ public class DeltaBaseTermStateSerializer implements Accountable { /** * Writes a {@link BlockTermState} to the provided {@link DataOutput}. *

- * Simpler variant of {@link Lucene84PostingsWriter#encodeTerm(long[], DataOutput, FieldInfo, BlockTermState, boolean)}. + * Simpler variant of {@link Lucene84PostingsWriter#encodeTerm(DataOutput, FieldInfo, BlockTermState, boolean)}. */ public void writeTermState(DataOutput termStatesOutput, FieldInfo fieldInfo, BlockTermState termState) throws IOException { IndexOptions indexOptions = fieldInfo.getIndexOptions(); @@ -143,7 +143,7 @@ public class DeltaBaseTermStateSerializer implements Accountable { /** * Reads a {@link BlockTermState} from the provided {@link DataInput}. *

- * Simpler variant of {@link Lucene84PostingsReader#decodeTerm(long[], DataInput, FieldInfo, BlockTermState, boolean)}. + * Simpler variant of {@link Lucene84PostingsReader#decodeTerm(DataInput, FieldInfo, BlockTermState, boolean)}. * * @param reuse {@link BlockTermState} to reuse; or null to create a new one. */ diff --git a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat index 55b8a48e3ef..09f2491c801 100644 --- a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat +++ b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat @@ -16,7 +16,6 @@ org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat org.apache.lucene.codecs.memory.DirectPostingsFormat -org.apache.lucene.codecs.memory.FSTOrdPostingsFormat org.apache.lucene.codecs.memory.FSTPostingsFormat org.apache.lucene.codecs.uniformsplit.UniformSplitPostingsFormat org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPostingsFormat diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTOrdPostingsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTOrdPostingsFormat.java deleted file mode 100644 index ec860859a85..00000000000 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/memory/TestFSTOrdPostingsFormat.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.codecs.memory; - - -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.index.BasePostingsFormatTestCase; -import org.apache.lucene.util.TestUtil; - -/** - * Tests FSTOrdPostingsFormat - */ -public class TestFSTOrdPostingsFormat extends BasePostingsFormatTestCase { - private final Codec codec = TestUtil.alwaysPostingsFormat(new FSTOrdPostingsFormat()); - - @Override - protected Codec getCodec() { - return codec; - } -} diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestTermBytesComparator.java b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestTermBytesComparator.java index a77e7820152..8ef246fbb30 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestTermBytesComparator.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestTermBytesComparator.java @@ -159,7 +159,7 @@ public class TestTermBytesComparator extends LuceneTestCase { } @Override - public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) { + public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) { } @Override diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReaderTest.java b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReaderTest.java index f63d63643fa..6d09fe36e16 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReaderTest.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockReaderTest.java @@ -268,7 +268,7 @@ public class STBlockReaderTest extends LuceneTestCase { } @Override - public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) { + public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) { } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java index 4fed1a07e7a..a1244ca7686 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java @@ -61,7 +61,7 @@ public abstract class PostingsReaderBase implements Closeable, Accountable { /** Actually decode metadata for next term * @see PostingsWriterBase#encodeTerm */ - public abstract void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException; + public abstract void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException; /** Must fully consume state, since after this call that * TermState may be reused. */ diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java index 48c6027b286..a8f8ed42aa8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsWriterBase.java @@ -68,21 +68,12 @@ public abstract class PostingsWriterBase implements Closeable { * Usually elements in {@code longs} are file pointers, so each one always * increases when a new term is consumed. {@code out} is used to write generic * bytes, which are not monotonic. - * - * NOTE: sometimes long[] might contain "don't care" values that are unused, e.g. - * the pointer to postings list may not be defined for some terms but is defined - * for others, if it is designed to inline some postings data in term dictionary. - * In this case, the postings writer should always use the last value, so that each - * element in metadata long[] remains monotonic. */ - public abstract void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException; + public abstract void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException; /** - * Sets the current field for writing, and returns the - * fixed length of long[] metadata (which is fixed per - * field), called when the writing switches to another field. */ - // TODO: better name? - public abstract int setField(FieldInfo fieldInfo); + * Sets the current field for writing. */ + public abstract void setField(FieldInfo fieldInfo); @Override public abstract void close() throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java index f9770869f24..f51f0c6f967 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PushPostingsWriterBase.java @@ -87,7 +87,7 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase { * fixed length of long[] metadata (which is fixed per * field), called when the writing switches to another field. */ @Override - public int setField(FieldInfo fieldInfo) { + public void setField(FieldInfo fieldInfo) { this.fieldInfo = fieldInfo; indexOptions = fieldInfo.getIndexOptions(); @@ -113,8 +113,6 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase { enumFlags = PostingsEnum.OFFSETS; } } - - return 0; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java index 0a0cd31c857..b9dc0bb436b 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java @@ -128,8 +128,11 @@ public final class BlockTreeTermsReader extends FieldsProducer { /** Auto-prefix terms have been superseded by points. */ public static final int VERSION_AUTO_PREFIX_TERMS_REMOVED = 3; + /** The long[] + byte[] metadata has been replaced with a single byte[]. */ + public static final int VERSION_META_LONGS_REMOVED = 4; + /** Current terms format. */ - public static final int VERSION_CURRENT = VERSION_AUTO_PREFIX_TERMS_REMOVED; + public static final int VERSION_CURRENT = VERSION_META_LONGS_REMOVED; /** Extension of terms index file */ static final String TERMS_INDEX_EXTENSION = "tip"; @@ -212,9 +215,11 @@ public final class BlockTreeTermsReader extends FieldsProducer { // when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is written. final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : termsIn.readVLong(); final int docCount = termsIn.readVInt(); - final int longsSize = termsIn.readVInt(); - if (longsSize < 0) { - throw new CorruptIndexException("invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn); + if (version < VERSION_META_LONGS_REMOVED) { + final int longsSize = termsIn.readVInt(); + if (longsSize < 0) { + throw new CorruptIndexException("invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn); + } } BytesRef minTerm = readBytesRef(termsIn); BytesRef maxTerm = readBytesRef(termsIn); @@ -231,7 +236,7 @@ public final class BlockTreeTermsReader extends FieldsProducer { final long indexStartFP = indexIn.readVLong(); FieldReader previous = fieldMap.put(fieldInfo.name, new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, - indexStartFP, longsSize, indexIn, minTerm, maxTerm, state.openedFromWriter, perFieldLoadMode)); + indexStartFP, indexIn, minTerm, maxTerm, state.openedFromWriter, perFieldLoadMode)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java index deece0b5266..380cf799a4d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java @@ -224,11 +224,10 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { public final long sumTotalTermFreq; public final long sumDocFreq; public final int docCount; - private final int longsSize; public final BytesRef minTerm; public final BytesRef maxTerm; - public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize, + public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount, BytesRef minTerm, BytesRef maxTerm) { assert numTerms > 0; this.fieldInfo = fieldInfo; @@ -239,7 +238,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { this.sumTotalTermFreq = sumTotalTermFreq; this.sumDocFreq = sumDocFreq; this.docCount = docCount; - this.longsSize = longsSize; this.minTerm = minTerm; this.maxTerm = maxTerm; } @@ -509,7 +507,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { class TermsWriter { private final FieldInfo fieldInfo; - private final int longsSize; private long numTerms; final FixedBitSet docsSeen; long sumTotalTermFreq; @@ -524,8 +521,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { private final BytesRefBuilder lastTerm = new BytesRefBuilder(); private int[] prefixStarts = new int[8]; - private final long[] longs; - // Pending stack of terms and blocks. As terms arrive (in sorted order) // we append to this stack, and once the top of the stack has enough // terms starting with a common prefix, we write a new block with @@ -720,13 +715,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { } // Write term meta data - postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute); - for (int pos = 0; pos < longsSize; pos++) { - assert longs[pos] >= 0; - metaWriter.writeVLong(longs[pos]); - } - bytesWriter.copyTo(metaWriter); - bytesWriter.reset(); + postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute); absolute = false; } } else { @@ -771,13 +760,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { // separate anymore: // Write term meta data - postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute); - for (int pos = 0; pos < longsSize; pos++) { - assert longs[pos] >= 0; - metaWriter.writeVLong(longs[pos]); - } - bytesWriter.copyTo(metaWriter); - bytesWriter.reset(); + postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute); absolute = false; } else { PendingBlock block = (PendingBlock) ent; @@ -845,9 +828,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { this.fieldInfo = fieldInfo; assert fieldInfo.getIndexOptions() != IndexOptions.NONE; docsSeen = new FixedBitSet(maxDoc); - - this.longsSize = postingsWriter.setField(fieldInfo); - this.longs = new long[longsSize]; + postingsWriter.setField(fieldInfo); } /** Writes one term's worth of postings. */ @@ -964,7 +945,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { sumTotalTermFreq, sumDocFreq, docsSeen.cardinality(), - longsSize, minTerm, maxTerm)); } else { assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS && sumTotalTermFreq == -1; @@ -976,7 +956,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { private final ByteBuffersDataOutput suffixWriter = ByteBuffersDataOutput.newResettableInstance(); private final ByteBuffersDataOutput statsWriter = ByteBuffersDataOutput.newResettableInstance(); private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance(); - private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance(); } private boolean closed; @@ -1009,7 +988,6 @@ public final class BlockTreeTermsWriter extends FieldsConsumer { } termsOut.writeVLong(field.sumDocFreq); termsOut.writeVInt(field.docCount); - termsOut.writeVInt(field.longsSize); indexOut.writeVLong(field.indexStartFP); writeBytesRef(termsOut, field.minTerm); writeBytesRef(termsOut, field.maxTerm); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java index 9189b63d366..c185cbcb733 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java @@ -58,7 +58,6 @@ public final class FieldReader extends Terms implements Accountable { final BytesRef rootCode; final BytesRef minTerm; final BytesRef maxTerm; - final int longsSize; final BlockTreeTermsReader parent; final FST index; @@ -66,7 +65,7 @@ public final class FieldReader extends Terms implements Accountable { //private boolean DEBUG; FieldReader(BlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, - long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm, boolean openedFromWriter, BlockTreeTermsReader.FSTLoadMode fstLoadMode) throws IOException { + long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm, boolean openedFromWriter, BlockTreeTermsReader.FSTLoadMode fstLoadMode) throws IOException { assert numTerms > 0; this.fieldInfo = fieldInfo; //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id"); @@ -77,7 +76,6 @@ public final class FieldReader extends Terms implements Accountable { this.docCount = docCount; this.indexStartFP = indexStartFP; this.rootCode = rootCode; - this.longsSize = longsSize; this.minTerm = minTerm; this.maxTerm = maxTerm; // if (DEBUG) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java index b1cfa7c04d2..d64a4aa8232 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java @@ -80,11 +80,8 @@ final class IntersectTermsEnumFrame { FST.Arc arc; final BlockTermState termState; - - // metadata buffer, holding monotonic values - final long[] longs; - // metadata buffer, holding general values + // metadata buffer byte[] bytes = new byte[32]; final ByteArrayDataInput bytesReader = new ByteArrayDataInput(); @@ -102,7 +99,6 @@ final class IntersectTermsEnumFrame { this.ord = ord; this.termState = ite.fr.parent.postingsReader.newTermState(); this.termState.totalTermFreq = -1; - this.longs = new long[ite.fr.longsSize]; } void loadNextFloorBlock() throws IOException { @@ -278,11 +274,8 @@ final class IntersectTermsEnumFrame { } else { termState.totalTermFreq = termState.docFreq + statsReader.readVLong(); } - // metadata - for (int i = 0; i < ite.fr.longsSize; i++) { - longs[i] = bytesReader.readVLong(); - } - ite.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ite.fr.fieldInfo, termState, absolute); + // metadata + ite.fr.parent.postingsReader.decodeTerm(bytesReader, ite.fr.fieldInfo, termState, absolute); metaDataUpto++; absolute = false; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java index fdb4cc6955b..1e9e6245a39 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java @@ -85,9 +85,7 @@ final class SegmentTermsEnumFrame { final BlockTermState state; - // metadata buffer, holding monotonic values - final long[] longs; - // metadata buffer, holding general values + // metadata buffer byte[] bytes = new byte[32]; final ByteArrayDataInput bytesReader = new ByteArrayDataInput(); @@ -98,7 +96,6 @@ final class SegmentTermsEnumFrame { this.ord = ord; this.state = ste.fr.parent.postingsReader.newTermState(); this.state.totalTermFreq = -1; - this.longs = new long[ste.fr.longsSize]; } public void setFloorData(ByteArrayDataInput in, BytesRef source) { @@ -424,11 +421,8 @@ final class SegmentTermsEnumFrame { state.totalTermFreq = state.docFreq + statsReader.readVLong(); //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq); } - // metadata - for (int i = 0; i < ste.fr.longsSize; i++) { - longs[i] = bytesReader.readVLong(); - } - ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute); + // metadata + ste.fr.parent.postingsReader.decodeTerm(bytesReader, ste.fr.fieldInfo, state, absolute); metaDataUpto++; absolute = false; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsReader.java index b0620997726..895db33f0b4 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsReader.java @@ -166,7 +166,7 @@ public final class Lucene84PostingsReader extends PostingsReaderBase { } @Override - public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) + public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) throws IOException { final IntBlockTermState termState = (IntBlockTermState) _termState; final boolean fieldHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; @@ -179,11 +179,11 @@ public final class Lucene84PostingsReader extends PostingsReaderBase { termState.payStartFP = 0; } - termState.docStartFP += longs[0]; + termState.docStartFP += in.readVLong(); if (fieldHasPositions) { - termState.posStartFP += longs[1]; + termState.posStartFP += in.readVLong(); if (fieldHasOffsets || fieldHasPayloads) { - termState.payStartFP += longs[2]; + termState.payStartFP += in.readVLong(); } } if (termState.docFreq == 1) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsWriter.java index e42669af415..29d812e59c8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene84/Lucene84PostingsWriter.java @@ -190,20 +190,11 @@ public final class Lucene84PostingsWriter extends PushPostingsWriterBase { } @Override - public int setField(FieldInfo fieldInfo) { + public void setField(FieldInfo fieldInfo) { super.setField(fieldInfo); skipWriter.setField(writePositions, writeOffsets, writePayloads); lastState = emptyState; fieldHasNorms = fieldInfo.hasNorms(); - if (writePositions) { - if (writePayloads || writeOffsets) { - return 3; // doc + pos + pay FP - } else { - return 2; // doc + pos FP - } - } else { - return 1; // doc FP - } } @Override @@ -466,16 +457,16 @@ public final class Lucene84PostingsWriter extends PushPostingsWriterBase { } @Override - public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException { + public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException { IntBlockTermState state = (IntBlockTermState)_state; if (absolute) { lastState = emptyState; } - longs[0] = state.docStartFP - lastState.docStartFP; + out.writeVLong(state.docStartFP - lastState.docStartFP); if (writePositions) { - longs[1] = state.posStartFP - lastState.posStartFP; + out.writeVLong(state.posStartFP - lastState.posStartFP); if (writePayloads || writeOffsets) { - longs[2] = state.payStartFP - lastState.payStartFP; + out.writeVLong(state.payStartFP - lastState.payStartFP); } } if (state.singletonDocID != -1) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java index e7f4c4c7730..3ecd4734b33 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsReader.java @@ -50,7 +50,7 @@ final class IDVersionPostingsReader extends PostingsReaderBase { } @Override - public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) + public void decodeTerm(DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute) throws IOException { final IDVersionTermState termState = (IDVersionTermState) _termState; termState.docID = in.readVInt(); diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java index 30e19807b31..2ac451fcc08 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionPostingsWriter.java @@ -46,7 +46,6 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase { private long lastVersion; private final Bits liveDocs; - private String segment; public IDVersionPostingsWriter(Bits liveDocs) { this.liveDocs = liveDocs; @@ -60,11 +59,10 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase { @Override public void init(IndexOutput termsOut, SegmentWriteState state) throws IOException { CodecUtil.writeIndexHeader(termsOut, TERMS_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); - segment = state.segmentInfo.name; } @Override - public int setField(FieldInfo fieldInfo) { + public void setField(FieldInfo fieldInfo) { super.setField(fieldInfo); if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { throw new IllegalArgumentException("field must be index using IndexOptions.DOCS_AND_FREQS_AND_POSITIONS"); @@ -75,7 +73,6 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase { throw new IllegalArgumentException("field cannot index term vectors: CheckIndex will report this as index corruption"); } lastState = emptyState; - return 0; } @Override @@ -154,7 +151,7 @@ final class IDVersionPostingsWriter extends PushPostingsWriterBase { private long lastEncodedVersion; @Override - public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException { + public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException { IDVersionTermState state = (IDVersionTermState) _state; out.writeVInt(state.docID); if (absolute) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java index 6d260773353..5b1ea64c405 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/IDVersionSegmentTermsEnumFrame.java @@ -83,9 +83,7 @@ final class IDVersionSegmentTermsEnumFrame { final BlockTermState state; - // metadata buffer, holding monotonic values - public long[] longs; - // metadata buffer, holding general values + // metadata public byte[] bytes; ByteArrayDataInput bytesReader; @@ -96,7 +94,6 @@ final class IDVersionSegmentTermsEnumFrame { this.ord = ord; this.state = ste.fr.parent.postingsReader.newTermState(); this.state.totalTermFreq = -1; - this.longs = new long[ste.fr.longsSize]; } public void setFloorData(ByteArrayDataInput in, BytesRef source) { @@ -396,11 +393,8 @@ final class IDVersionSegmentTermsEnumFrame { state.docFreq = 1; state.totalTermFreq = 1; //if (DEBUG) System.out.println(" dF=" + state.docFreq); - // metadata - for (int i = 0; i < ste.fr.longsSize; i++) { - longs[i] = bytesReader.readVLong(); - } - ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute); + // metadata + ste.fr.parent.postingsReader.decodeTerm(bytesReader, ste.fr.fieldInfo, state, absolute); metaDataUpto++; absolute = false; diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java index 8001a22d9ff..ff5d6ec83b9 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java @@ -127,7 +127,6 @@ public final class VersionBlockTreeTermsReader extends FieldsProducer { final long sumDocFreq = numTerms; assert numTerms <= Integer.MAX_VALUE; final int docCount = (int) numTerms; - final int longsSize = in.readVInt(); BytesRef minTerm = readBytesRef(in); BytesRef maxTerm = readBytesRef(in); @@ -143,7 +142,7 @@ public final class VersionBlockTreeTermsReader extends FieldsProducer { final long indexStartFP = indexIn.readVLong(); VersionFieldReader previous = fields.put(fieldInfo.name, new VersionFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, - indexStartFP, longsSize, indexIn, minTerm, maxTerm)); + indexStartFP, indexIn, minTerm, maxTerm)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name, in); } diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java index 9e2f7549f4a..b9c57491a88 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java @@ -143,11 +143,10 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { public final Pair rootCode; public final long numTerms; public final long indexStartFP; - private final int longsSize; public final BytesRef minTerm; public final BytesRef maxTerm; - public FieldMetaData(FieldInfo fieldInfo, Pair rootCode, long numTerms, long indexStartFP, int longsSize, + public FieldMetaData(FieldInfo fieldInfo, Pair rootCode, long numTerms, long indexStartFP, BytesRef minTerm, BytesRef maxTerm) { assert numTerms > 0; this.fieldInfo = fieldInfo; @@ -155,7 +154,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { this.rootCode = rootCode; this.indexStartFP = indexStartFP; this.numTerms = numTerms; - this.longsSize = longsSize; this.minTerm = minTerm; this.maxTerm = maxTerm; } @@ -403,7 +401,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { class TermsWriter { private final FieldInfo fieldInfo; - private final int longsSize; private long numTerms; final FixedBitSet docsSeen; long indexStartFP; @@ -416,8 +413,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { private final BytesRefBuilder lastTerm = new BytesRefBuilder(); private int[] prefixStarts = new int[8]; - private final long[] longs; - // Pending stack of terms and blocks. As terms arrive (in sorted order) // we append to this stack, and once the top of the stack has enough // terms starting with a common prefix, we write a new block with @@ -605,13 +600,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel; // Write term meta data - postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute); - for (int pos = 0; pos < longsSize; pos++) { - assert longs[pos] >= 0; - metaWriter.writeVLong(longs[pos]); - } - bytesWriter.copyTo(metaWriter); - bytesWriter.reset(); + postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute); absolute = false; } } else { @@ -648,13 +637,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { // separate anymore: // Write term meta data - postingsWriter.encodeTerm(longs, bytesWriter, fieldInfo, state, absolute); - for (int pos = 0; pos < longsSize; pos++) { - assert longs[pos] >= 0; - metaWriter.writeVLong(longs[pos]); - } - bytesWriter.copyTo(metaWriter); - bytesWriter.reset(); + postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute); absolute = false; } else { PendingBlock block = (PendingBlock) ent; @@ -720,8 +703,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { this.fieldInfo = fieldInfo; docsSeen = new FixedBitSet(maxDoc); - this.longsSize = postingsWriter.setField(fieldInfo); - this.longs = new long[longsSize]; + postingsWriter.setField(fieldInfo); } /** Writes one term's worth of postings. */ @@ -818,7 +800,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { ((PendingBlock) pending.get(0)).index.getEmptyOutput(), numTerms, indexStartFP, - longsSize, minTerm, maxTerm)); } else { // cannot assert this: we skip deleted docIDs in the postings: @@ -828,7 +809,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { private final ByteBuffersDataOutput suffixWriter = ByteBuffersDataOutput.newResettableInstance(); private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance(); - private final ByteBuffersDataOutput bytesWriter = ByteBuffersDataOutput.newResettableInstance(); } private boolean closed; @@ -856,7 +836,6 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer { out.writeVInt(field.rootCode.output1.length); out.writeBytes(field.rootCode.output1.bytes, field.rootCode.output1.offset, field.rootCode.output1.length); out.writeVLong(field.rootCode.output2); - out.writeVInt(field.longsSize); indexOut.writeVLong(field.indexStartFP); writeBytesRef(out, field.minTerm); writeBytesRef(out, field.maxTerm); diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionFieldReader.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionFieldReader.java index 581201f9ea4..93888ae589d 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionFieldReader.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionFieldReader.java @@ -45,14 +45,13 @@ final class VersionFieldReader extends Terms implements Accountable { final Pair rootCode; final BytesRef minTerm; final BytesRef maxTerm; - final int longsSize; final VersionBlockTreeTermsReader parent; final FST> index; //private boolean DEBUG; VersionFieldReader(VersionBlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, Pair rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, - long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException { + long indexStartFP, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException { assert numTerms > 0; this.fieldInfo = fieldInfo; //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id"); @@ -63,7 +62,6 @@ final class VersionFieldReader extends Terms implements Accountable { this.docCount = docCount; this.indexStartFP = indexStartFP; this.rootCode = rootCode; - this.longsSize = longsSize; this.minTerm = minTerm; this.maxTerm = maxTerm; // if (DEBUG) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java index e55eb8747ae..690839feefd 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java @@ -41,8 +41,6 @@ import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsReader; import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsWriter; import org.apache.lucene.codecs.lucene84.Lucene84PostingsReader; import org.apache.lucene.codecs.lucene84.Lucene84PostingsWriter; -import org.apache.lucene.codecs.memory.FSTOrdTermsReader; -import org.apache.lucene.codecs.memory.FSTOrdTermsWriter; import org.apache.lucene.codecs.memory.FSTTermsReader; import org.apache.lucene.codecs.memory.FSTTermsWriter; import org.apache.lucene.index.FieldInfo; @@ -122,7 +120,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat { PostingsWriterBase postingsWriter = new Lucene84PostingsWriter(state); final FieldsConsumer fields; - final int t1 = random.nextInt(5); + final int t1 = random.nextInt(4); if (t1 == 0) { boolean success = false; @@ -135,16 +133,6 @@ public final class MockRandomPostingsFormat extends PostingsFormat { } } } else if (t1 == 1) { - boolean success = false; - try { - fields = new FSTOrdTermsWriter(state, postingsWriter); - success = true; - } finally { - if (!success) { - postingsWriter.close(); - } - } - } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.VERBOSE) { @@ -165,7 +153,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat { postingsWriter.close(); } } - } else if (t1 == 3) { + } else if (t1 == 2) { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing Block terms dict"); @@ -235,7 +223,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat { } } } - } else if (t1 == 4) { + } else if (t1 == 3) { // Use OrdsBlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing OrdsBlockTree"); @@ -287,7 +275,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat { PostingsReaderBase postingsReader = new Lucene84PostingsReader(state); final FieldsProducer fields; - final int t1 = random.nextInt(5); + final int t1 = random.nextInt(4); if (t1 == 0) { boolean success = false; try { @@ -299,16 +287,6 @@ public final class MockRandomPostingsFormat extends PostingsFormat { } } } else if (t1 == 1) { - boolean success = false; - try { - fields = new FSTOrdTermsReader(state, postingsReader); - success = true; - } finally { - if (!success) { - postingsReader.close(); - } - } - } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading BlockTree terms dict"); @@ -323,7 +301,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat { postingsReader.close(); } } - } else if (t1 == 3) { + } else if (t1 == 2) { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Block terms dict"); @@ -374,7 +352,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat { } } } - } else if (t1 == 4) { + } else if (t1 == 3) { // Use OrdsBlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading OrdsBlockTree terms dict"); diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java index 8bb9a070268..01dfed0fd43 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java @@ -45,7 +45,6 @@ import org.apache.lucene.codecs.bloom.TestBloomFilteredLucenePostings; import org.apache.lucene.codecs.lucene60.Lucene60PointsReader; import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter; import org.apache.lucene.codecs.memory.DirectPostingsFormat; -import org.apache.lucene.codecs.memory.FSTOrdPostingsFormat; import org.apache.lucene.codecs.memory.FSTPostingsFormat; import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat; import org.apache.lucene.index.PointValues.IntersectVisitor; @@ -190,7 +189,6 @@ public class RandomCodec extends AssertingCodec { add(avoidCodecs, TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock, RandomPicks.randomFrom(random, BlockTreeTermsReader.FSTLoadMode.values())), new FSTPostingsFormat(), - new FSTOrdPostingsFormat(), new DirectPostingsFormat(LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : maxItemsPerBlock), LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : lowFreqCutoff)), //TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucenePostings to be constructed