diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/Lucene50MultiLevelSkipListReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/Lucene50MultiLevelSkipListReader.java deleted file mode 100644 index 71182716190..00000000000 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/Lucene50MultiLevelSkipListReader.java +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.backward_codecs; - -import java.io.Closeable; -import java.io.IOException; -import java.util.Arrays; -import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat; -import org.apache.lucene.codecs.Lucene99MultiLevelSkipListReader; -import org.apache.lucene.codecs.MultiLevelSkipListWriter; -import org.apache.lucene.store.DataOutput; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.MathUtil; - -/** - * Legacy variant of {@link Lucene99MultiLevelSkipListReader} for Lucene postings formats up to - * {@link Lucene90PostingsFormat} included. It starts postings at 0 rather than -1. - */ -public abstract class Lucene50MultiLevelSkipListReader implements Closeable { - /** the maximum number of skip levels possible for this index */ - protected int maxNumberOfSkipLevels; - - /** number of levels in this skip list */ - protected int numberOfSkipLevels; - - private int docCount; - - /** skipStream for each level. */ - private IndexInput[] skipStream; - - /** The start pointer of each skip level. */ - private long[] skipPointer; - - /** skipInterval of each level. */ - private int[] skipInterval; - - /** - * Number of docs skipped per level. It's possible for some values to overflow a signed int, but - * this has been accounted for. - */ - private int[] numSkipped; - - /** Doc id of current skip entry per level. */ - protected int[] skipDoc; - - /** Doc id of last read skip entry with docId <= target. */ - private int lastDoc; - - /** Child pointer of current skip entry per level. */ - private long[] childPointer; - - /** childPointer of last read skip entry with docId <= target. */ - private long lastChildPointer; - - private final int skipMultiplier; - - /** Creates a {@code MultiLevelSkipListReader}. */ - protected Lucene50MultiLevelSkipListReader( - IndexInput skipStream, int maxSkipLevels, int skipInterval, int skipMultiplier) { - this.skipStream = new IndexInput[maxSkipLevels]; - this.skipPointer = new long[maxSkipLevels]; - this.childPointer = new long[maxSkipLevels]; - this.numSkipped = new int[maxSkipLevels]; - this.maxNumberOfSkipLevels = maxSkipLevels; - this.skipInterval = new int[maxSkipLevels]; - this.skipMultiplier = skipMultiplier; - this.skipStream[0] = skipStream; - this.skipInterval[0] = skipInterval; - for (int i = 1; i < maxSkipLevels; i++) { - // cache skip intervals - this.skipInterval[i] = this.skipInterval[i - 1] * skipMultiplier; - } - skipDoc = new int[maxSkipLevels]; - } - - /** - * Creates a {@code MultiLevelSkipListReader}, where {@code skipInterval} and {@code - * skipMultiplier} are the same. - */ - protected Lucene50MultiLevelSkipListReader( - IndexInput skipStream, int maxSkipLevels, int skipInterval) { - this(skipStream, maxSkipLevels, skipInterval, skipInterval); - } - - /** Returns the id of the doc to which the last call of {@link #skipTo(int)} has skipped. */ - public int getDoc() { - return lastDoc; - } - - /** - * Skips entries to the first beyond the current whose document number is greater than or equal to - * target. Returns the current doc count. - */ - public int skipTo(int target) throws IOException { - - // walk up the levels until highest level is found that has a skip - // for this target - int level = 0; - while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) { - level++; - } - - while (level >= 0) { - if (target > skipDoc[level]) { - if (!loadNextSkip(level)) { - continue; - } - } else { - // no more skips on this level, go down one level - if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) { - seekChild(level - 1); - } - level--; - } - } - - return numSkipped[0] - skipInterval[0] - 1; - } - - private boolean loadNextSkip(int level) throws IOException { - // we have to skip, the target document is greater than the current - // skip list entry - setLastSkipData(level); - - numSkipped[level] += skipInterval[level]; - - // numSkipped may overflow a signed int, so compare as unsigned. - if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) { - // this skip list is exhausted - skipDoc[level] = Integer.MAX_VALUE; - if (numberOfSkipLevels > level) numberOfSkipLevels = level; - return false; - } - - // read next skip entry - skipDoc[level] += readSkipData(level, skipStream[level]); - - if (level != 0) { - // read the child pointer if we are not on the leaf level - childPointer[level] = readChildPointer(skipStream[level]) + skipPointer[level - 1]; - } - - return true; - } - - /** Seeks the skip entry on the given level */ - protected void seekChild(int level) throws IOException { - skipStream[level].seek(lastChildPointer); - numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1]; - skipDoc[level] = lastDoc; - if (level > 0) { - childPointer[level] = readChildPointer(skipStream[level]) + skipPointer[level - 1]; - } - } - - @Override - public void close() throws IOException { - for (int i = 1; i < skipStream.length; i++) { - if (skipStream[i] != null) { - skipStream[i].close(); - } - } - } - - /** Initializes the reader, for reuse on a new term. */ - public void init(long skipPointer, int df) throws IOException { - this.skipPointer[0] = skipPointer; - this.docCount = df; - assert skipPointer >= 0 && skipPointer <= skipStream[0].length() - : "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length(); - Arrays.fill(skipDoc, 0); - Arrays.fill(numSkipped, 0); - Arrays.fill(childPointer, 0); - - for (int i = 1; i < numberOfSkipLevels; i++) { - skipStream[i] = null; - } - loadSkipLevels(); - } - - /** Loads the skip levels */ - private void loadSkipLevels() throws IOException { - if (docCount <= skipInterval[0]) { - numberOfSkipLevels = 1; - } else { - numberOfSkipLevels = 1 + MathUtil.log(docCount / skipInterval[0], skipMultiplier); - } - - if (numberOfSkipLevels > maxNumberOfSkipLevels) { - numberOfSkipLevels = maxNumberOfSkipLevels; - } - - skipStream[0].seek(skipPointer[0]); - - for (int i = numberOfSkipLevels - 1; i > 0; i--) { - // the length of the current level - long length = readLevelLength(skipStream[0]); - - // the start pointer of the current level - skipPointer[i] = skipStream[0].getFilePointer(); - - // clone this stream, it is already at the start of the current level - skipStream[i] = skipStream[0].clone(); - - // move base stream beyond the current level - skipStream[0].seek(skipStream[0].getFilePointer() + length); - } - - // use base stream for the lowest level - skipPointer[0] = skipStream[0].getFilePointer(); - } - - /** - * Subclasses must implement the actual skip data encoding in this method. - * - * @param level the level skip data shall be read from - * @param skipStream the skip stream to read from - */ - protected abstract int readSkipData(int level, IndexInput skipStream) throws IOException; - - /** - * read the length of the current level written via {@link - * MultiLevelSkipListWriter#writeLevelLength(long, IndexOutput)}. - * - * @param skipStream the IndexInput the length shall be read from - * @return level length - */ - protected long readLevelLength(IndexInput skipStream) throws IOException { - return skipStream.readVLong(); - } - - /** - * read the child pointer written via {@link MultiLevelSkipListWriter#writeChildPointer(long, - * DataOutput)}. - * - * @param skipStream the IndexInput the child pointer shall be read from - * @return child pointer - */ - protected long readChildPointer(IndexInput skipStream) throws IOException { - return skipStream.readVLong(); - } - - /** Copies the values of the last read skip entry on this level */ - protected void setLastSkipData(int level) { - lastDoc = skipDoc[level]; - lastChildPointer = childPointer[level]; - } -} diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/Placeholder.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/Placeholder.java new file mode 100644 index 00000000000..b6a1dc51b60 --- /dev/null +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/Placeholder.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.backward_codecs; + +/** Remove this file when adding back compat codecs */ +public class Placeholder { + // no instance + private Placeholder() {} +} diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50SkipReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50SkipReader.java index c0298de5829..a74eacdd21d 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50SkipReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50SkipReader.java @@ -20,7 +20,7 @@ import static org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat. import java.io.IOException; import java.util.Arrays; -import org.apache.lucene.backward_codecs.Lucene50MultiLevelSkipListReader; +import org.apache.lucene.codecs.MultiLevelSkipListReader; import org.apache.lucene.store.IndexInput; /** @@ -50,7 +50,7 @@ import org.apache.lucene.store.IndexInput; * *

Therefore, we'll trim df before passing it to the interface. see trim(int) */ -class Lucene50SkipReader extends Lucene50MultiLevelSkipListReader { +class Lucene50SkipReader extends MultiLevelSkipListReader { private final int version; private long[] docPointer; private long[] posPointer; @@ -124,7 +124,7 @@ class Lucene50SkipReader extends Lucene50MultiLevelSkipListReader { /** * Returns the doc pointer of the doc to which the last call of {@link - * Lucene50MultiLevelSkipListReader#skipTo(int)} has skipped. + * MultiLevelSkipListReader#skipTo(int)} has skipped. */ public long getDocPointer() { return lastDocPointer; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84SkipReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84SkipReader.java index 9e1dab5a1c5..78b49e28195 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84SkipReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84SkipReader.java @@ -18,7 +18,7 @@ package org.apache.lucene.backward_codecs.lucene84; import java.io.IOException; import java.util.Arrays; -import org.apache.lucene.backward_codecs.Lucene50MultiLevelSkipListReader; +import org.apache.lucene.codecs.MultiLevelSkipListReader; import org.apache.lucene.store.IndexInput; /** @@ -48,7 +48,7 @@ import org.apache.lucene.store.IndexInput; * *

Therefore, we'll trim df before passing it to the interface. see trim(int) */ -class Lucene84SkipReader extends Lucene50MultiLevelSkipListReader { +class Lucene84SkipReader extends MultiLevelSkipListReader { private long[] docPointer; private long[] posPointer; private long[] payPointer; @@ -119,7 +119,7 @@ class Lucene84SkipReader extends Lucene50MultiLevelSkipListReader { /** * Returns the doc pointer of the doc to which the last call of {@link - * Lucene50MultiLevelSkipListReader#skipTo(int)} has skipped. + * MultiLevelSkipListReader#skipTo(int)} has skipped. */ public long getDocPointer() { return lastDocPointer; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90SkipReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90SkipReader.java index 25df45e6f5b..cd2febadf8f 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90SkipReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90SkipReader.java @@ -18,7 +18,7 @@ package org.apache.lucene.backward_codecs.lucene90; import java.io.IOException; import java.util.Arrays; -import org.apache.lucene.backward_codecs.Lucene50MultiLevelSkipListReader; +import org.apache.lucene.codecs.MultiLevelSkipListReader; import org.apache.lucene.store.IndexInput; /** @@ -48,7 +48,7 @@ import org.apache.lucene.store.IndexInput; * *

Therefore, we'll trim df before passing it to the interface. see trim(int) */ -class Lucene90SkipReader extends Lucene50MultiLevelSkipListReader { +class Lucene90SkipReader extends MultiLevelSkipListReader { private long[] docPointer; private long[] posPointer; private long[] payPointer; @@ -119,7 +119,7 @@ class Lucene90SkipReader extends Lucene50MultiLevelSkipListReader { /** * Returns the doc pointer of the doc to which the last call of {@link - * Lucene50MultiLevelSkipListReader#skipTo(int)} has skipped. + * MultiLevelSkipListReader#skipTo(int)} has skipped. */ public long getDocPointer() { return lastDocPointer; diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java index 880f7d748e0..abfbdd25a02 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java @@ -284,7 +284,7 @@ class SimpleTextFieldsReader extends FieldsProducer { // for skip list data private SimpleTextSkipReader skipReader; - private int nextSkipDoc = -1; + private int nextSkipDoc = 0; private long seekTo = -1; public SimpleTextDocsEnum() { @@ -305,7 +305,7 @@ class SimpleTextFieldsReader extends FieldsProducer { tf = 1; cost = docFreq; skipReader.reset(skipPointer, docFreq); - nextSkipDoc = -1; + nextSkipDoc = 0; seekTo = -1; return this; } @@ -458,7 +458,7 @@ class SimpleTextFieldsReader extends FieldsProducer { // for skip list data private SimpleTextSkipReader skipReader; - private int nextSkipDoc = -1; + private int nextSkipDoc = 0; private long seekTo = -1; public SimpleTextPostingsEnum() { @@ -484,7 +484,7 @@ class SimpleTextFieldsReader extends FieldsProducer { } cost = docFreq; skipReader.reset(skipPointer, docFreq); - nextSkipDoc = -1; + nextSkipDoc = 0; seekTo = -1; return this; } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSkipReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSkipReader.java index 247972abd67..d418021e440 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSkipReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSkipReader.java @@ -31,7 +31,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import org.apache.lucene.codecs.Lucene99MultiLevelSkipListReader; +import org.apache.lucene.codecs.MultiLevelSkipListReader; import org.apache.lucene.index.Impact; import org.apache.lucene.index.Impacts; import org.apache.lucene.search.DocIdSetIterator; @@ -51,7 +51,7 @@ import org.apache.lucene.util.StringHelper; * * @lucene.experimental */ -class SimpleTextSkipReader extends Lucene99MultiLevelSkipListReader { +class SimpleTextSkipReader extends MultiLevelSkipListReader { private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder(); private final BytesRefBuilder scratch = new BytesRefBuilder(); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSkipWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSkipWriter.java index 0986a019b63..95ef8caaa07 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSkipWriter.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSkipWriter.java @@ -41,7 +41,7 @@ class SimpleTextSkipWriter extends MultiLevelSkipListWriter { static final int BLOCK_SIZE = 8; private Map wroteHeaderPerLevelMap = new HashMap<>(); - private int curDoc = -1; + private int curDoc; private long curDocFilePointer; private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms; private final BytesRefBuilder scratch = new BytesRefBuilder(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Lucene99MultiLevelSkipListReader.java b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java similarity index 96% rename from lucene/core/src/java/org/apache/lucene/codecs/Lucene99MultiLevelSkipListReader.java rename to lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java index b3275aab323..e4c59f41335 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/Lucene99MultiLevelSkipListReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java @@ -35,7 +35,7 @@ import org.apache.lucene.util.MathUtil; * * @lucene.experimental */ -public abstract class Lucene99MultiLevelSkipListReader implements Closeable { +public abstract class MultiLevelSkipListReader implements Closeable { /** the maximum number of skip levels possible for this index */ protected int maxNumberOfSkipLevels; @@ -63,7 +63,7 @@ public abstract class Lucene99MultiLevelSkipListReader implements Closeable { protected int[] skipDoc; /** Doc id of last read skip entry with docId <= target. */ - private int lastDoc = -1; + private int lastDoc; /** Child pointer of current skip entry per level. */ private long[] childPointer; @@ -74,7 +74,7 @@ public abstract class Lucene99MultiLevelSkipListReader implements Closeable { private final int skipMultiplier; /** Creates a {@code MultiLevelSkipListReader}. */ - protected Lucene99MultiLevelSkipListReader( + protected MultiLevelSkipListReader( IndexInput skipStream, int maxSkipLevels, int skipInterval, int skipMultiplier) { this.skipStream = new IndexInput[maxSkipLevels]; this.skipPointer = new long[maxSkipLevels]; @@ -96,8 +96,7 @@ public abstract class Lucene99MultiLevelSkipListReader implements Closeable { * Creates a {@code MultiLevelSkipListReader}, where {@code skipInterval} and {@code * skipMultiplier} are the same. */ - protected Lucene99MultiLevelSkipListReader( - IndexInput skipStream, int maxSkipLevels, int skipInterval) { + protected MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) { this(skipStream, maxSkipLevels, skipInterval, skipInterval); } @@ -187,7 +186,7 @@ public abstract class Lucene99MultiLevelSkipListReader implements Closeable { this.docCount = df; assert skipPointer >= 0 && skipPointer <= skipStream[0].length() : "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length(); - Arrays.fill(skipDoc, -1); + Arrays.fill(skipDoc, 0); Arrays.fill(numSkipped, 0); Arrays.fill(childPointer, 0); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java index 954cf4ed3bf..599d1f939ce 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java @@ -334,7 +334,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted) private int blockUpto; // number of docs in or before the current block private int doc; // doc we last read - private int accum; // accumulator for doc deltas + private long accum; // accumulator for doc deltas // Where this term's postings start in the .doc file: private long docTermStartFP; @@ -344,6 +344,10 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { // no skip data for this term): private long skipOffset; + // docID for next skip point, we won't use skipper if + // target docID is not larger than this + private int nextSkipDoc; + private boolean needsFreq; // true if the caller actually needs frequencies // as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block // always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer @@ -400,8 +404,9 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { freqBuffer[i] = 1; } } - accum = -1; + accum = 0; blockUpto = 0; + nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block docBufferUpto = BLOCK_SIZE; skipped = false; return this; @@ -462,22 +467,20 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { pforUtil.skip(docIn); // skip over freqBuffer if we don't need them at all } } - accum = (int) docBuffer[BLOCK_SIZE - 1]; blockUpto += BLOCK_SIZE; } else if (docFreq == 1) { docBuffer[0] = singletonDocID; freqBuffer[0] = totalTermFreq; docBuffer[1] = NO_MORE_DOCS; - accum = NO_MORE_DOCS; blockUpto++; } else { // Read vInts: readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, needsFreq); prefixSum(docBuffer, left, accum); docBuffer[left] = NO_MORE_DOCS; - accum = NO_MORE_DOCS; blockUpto += left; } + accum = docBuffer[BLOCK_SIZE - 1]; docBufferUpto = 0; assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS; } @@ -498,7 +501,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { public int advance(int target) throws IOException { // current skip docID < docIDs generated from current buffer <= next skip docID // we don't need to skip if target is buffered already - if (docFreq > BLOCK_SIZE && target > accum) { + if (docFreq > BLOCK_SIZE && target > nextSkipDoc) { + if (skipper == null) { // Lazy init: first time this enum has ever been used for skipping skipper = @@ -532,15 +536,28 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { // as we have already positioned docIn where in needs to be. isFreqsRead = true; } + // next time we call advance, this is used to + // foresee whether skipper is necessary. + nextSkipDoc = skipper.getNextSkipDoc(); } if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } - int next = findFirstGreater(docBuffer, target, docBufferUpto); - this.doc = (int) docBuffer[next]; - docBufferUpto = next + 1; - return doc; + // Now scan... this is an inlined/pared down version + // of nextDoc(): + long doc; + while (true) { + doc = docBuffer[docBufferUpto]; + + if (doc >= target) { + break; + } + ++docBufferUpto; + } + + docBufferUpto++; + return this.doc = (int) doc; } @Override @@ -592,7 +609,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { private long totalTermFreq; // number of positions in this posting list private int blockUpto; // number of docs in or before the current block private int doc; // doc we last read - private int accum; // accumulator for doc deltas + private long accum; // accumulator for doc deltas private int freq; // freq we last read private int position; // current position @@ -628,6 +645,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { // no skip data for this term): private long skipOffset; + private int nextSkipDoc; + private boolean needsOffsets; // true if we actually need offsets private boolean needsPayloads; // true if we actually need payloads private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 @@ -713,8 +732,13 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS); doc = -1; - accum = -1; + accum = 0; blockUpto = 0; + if (docFreq > BLOCK_SIZE) { + nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block + } else { + nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping + } docBufferUpto = BLOCK_SIZE; skipped = false; return this; @@ -737,21 +761,19 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { if (left >= BLOCK_SIZE) { forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer); pforUtil.decode(docIn, freqBuffer); - accum = (int) docBuffer[BLOCK_SIZE - 1]; blockUpto += BLOCK_SIZE; } else if (docFreq == 1) { docBuffer[0] = singletonDocID; freqBuffer[0] = totalTermFreq; docBuffer[1] = NO_MORE_DOCS; - accum = NO_MORE_DOCS; blockUpto++; } else { readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true); prefixSum(docBuffer, left, accum); docBuffer[left] = NO_MORE_DOCS; - accum = NO_MORE_DOCS; blockUpto += left; } + accum = docBuffer[BLOCK_SIZE - 1]; docBufferUpto = 0; assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS; } @@ -845,7 +867,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { @Override public int advance(int target) throws IOException { - if (docFreq > BLOCK_SIZE && target > accum) { + if (target > nextSkipDoc) { if (skipper == null) { // Lazy init: first time this enum has ever been used for skipping skipper = @@ -879,6 +901,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { lastStartOffset = 0; // new document payloadByteUpto = skipper.getPayloadByteUpto(); } + nextSkipDoc = skipper.getNextSkipDoc(); } if (docBufferUpto == BLOCK_SIZE) { refillDocs(); @@ -886,12 +909,16 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { // Now scan: long doc; - do { + while (true) { doc = docBuffer[docBufferUpto]; freq = (int) freqBuffer[docBufferUpto]; posPendingCount += freq; docBufferUpto++; - } while (doc < target); + + if (doc >= target) { + break; + } + } position = 0; lastStartOffset = 0; @@ -1046,7 +1073,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { private int docFreq; // number of docs in this posting list private int blockUpto; // number of documents in or before the current block private int doc; // doc we last read - private int accum; // accumulator for doc deltas + private long accum; // accumulator for doc deltas private int nextSkipDoc = -1; @@ -1073,7 +1100,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { docIn.seek(termState.docStartFP); doc = -1; - accum = -1; + accum = 0; blockUpto = 0; docBufferUpto = BLOCK_SIZE; @@ -1126,15 +1153,14 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { if (indexHasFreqs) { isFreqsRead = false; } - accum = (int) docBuffer[BLOCK_SIZE - 1]; blockUpto += BLOCK_SIZE; } else { readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs, true); prefixSum(docBuffer, left, accum); docBuffer[left] = NO_MORE_DOCS; - accum = NO_MORE_DOCS; blockUpto += left; } + accum = docBuffer[BLOCK_SIZE - 1]; docBufferUpto = 0; assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS; } @@ -1182,8 +1208,10 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { @Override public int advance(int target) throws IOException { - if (target > accum) { + if (target > nextSkipDoc) { advanceShallow(target); + } + if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } @@ -1243,7 +1271,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { private long totalTermFreq; // number of positions in this posting list private int docUpto; // how many docs we've read private int doc; // doc we last read - private int accum; // accumulator for doc deltas + private long accum; // accumulator for doc deltas private int freq; // freq we last read private int position; // current position @@ -1302,7 +1330,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { } doc = -1; - accum = -1; + accum = 0; docUpto = 0; docBufferUpto = BLOCK_SIZE; @@ -1334,14 +1362,12 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { if (left >= BLOCK_SIZE) { forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer); pforUtil.decode(docIn, freqBuffer); - accum = (int) docBuffer[BLOCK_SIZE - 1]; } else { readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true); prefixSum(docBuffer, left, accum); docBuffer[left] = NO_MORE_DOCS; - accum = NO_MORE_DOCS; } - + accum = docBuffer[BLOCK_SIZE - 1]; docBufferUpto = 0; } @@ -1413,8 +1439,10 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { @Override public int advance(int target) throws IOException { - if (target > accum) { + if (target > nextSkipDoc) { advanceShallow(target); + } + if (docBufferUpto == BLOCK_SIZE) { refillDocs(); } @@ -1548,7 +1576,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { private int docUpto; // how many docs we've read private int posDocUpTo; // for how many docs we've read positions, offsets, and payloads private int doc; // doc we last read - private int accum; // accumulator for doc deltas + private long accum; // accumulator for doc deltas private int position; // current position // how many positions "behind" we are; nextPosition must @@ -1657,7 +1685,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { } doc = -1; - accum = -1; + accum = 0; docUpto = 0; posDocUpTo = 0; isFreqsRead = true; @@ -1724,14 +1752,12 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { isFreqsRead = false; // freq block will be loaded lazily when necessary, we don't load it here } - accum = (int) docBuffer[BLOCK_SIZE - 1]; } else { readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true); prefixSum(docBuffer, left, accum); docBuffer[left] = NO_MORE_DOCS; - accum = NO_MORE_DOCS; } - + accum = docBuffer[BLOCK_SIZE - 1]; docBufferUpto = 0; } @@ -1849,8 +1875,10 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { @Override public int advance(int target) throws IOException { - if (target > accum) { + if (target > nextSkipDoc) { advanceShallow(target); + } + if (docBufferUpto == BLOCK_SIZE) { if (seekTo >= 0) { docIn.seek(seekTo); seekTo = -1; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsWriter.java index 8129211b5f1..2bd562fc3da 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsWriter.java @@ -205,7 +205,7 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase { payStartFP = payOut.getFilePointer(); } } - lastDocID = -1; + lastDocID = 0; lastBlockDocID = -1; skipWriter.resetSkip(); this.norms = norms; @@ -369,7 +369,7 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase { final int singletonDocID; if (state.docFreq == 1) { // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq - singletonDocID = (int) docDeltaBuffer[0] - 1; + singletonDocID = (int) docDeltaBuffer[0]; } else { singletonDocID = -1; // Group vInt encode the remaining doc deltas and freqs: @@ -468,7 +468,7 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase { state.lastPosBlockOffset = lastPosBlockOffset; docBufferUpto = 0; posBufferUpto = 0; - lastDocID = -1; + lastDocID = 0; docCount = 0; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99SkipReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99SkipReader.java index 0ed66b91640..a6cb7c3a811 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99SkipReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99SkipReader.java @@ -18,7 +18,7 @@ package org.apache.lucene.codecs.lucene99; import java.io.IOException; import java.util.Arrays; -import org.apache.lucene.codecs.Lucene99MultiLevelSkipListReader; +import org.apache.lucene.codecs.MultiLevelSkipListReader; import org.apache.lucene.store.IndexInput; /** @@ -48,7 +48,7 @@ import org.apache.lucene.store.IndexInput; * *

Therefore, we'll trim df before passing it to the interface. see trim(int) */ -public class Lucene99SkipReader extends Lucene99MultiLevelSkipListReader { +public class Lucene99SkipReader extends MultiLevelSkipListReader { private long[] docPointer; private long[] posPointer; private long[] payPointer; @@ -119,7 +119,7 @@ public class Lucene99SkipReader extends Lucene99MultiLevelSkipListReader { /** * Returns the doc pointer of the doc to which the last call of {@link - * Lucene99MultiLevelSkipListReader#skipTo(int)} has skipped. + * MultiLevelSkipListReader#skipTo(int)} has skipped. */ public long getDocPointer() { return lastDocPointer; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99SkipWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99SkipWriter.java index 70c17a4092e..ec50e5e5c16 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99SkipWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99SkipWriter.java @@ -131,7 +131,7 @@ public final class Lucene99SkipWriter extends MultiLevelSkipListWriter { private void initSkip() { if (!initialized) { super.resetSkip(); - Arrays.fill(lastSkipDoc, -1); + Arrays.fill(lastSkipDoc, 0); Arrays.fill(lastSkipDocPointer, lastDocFP); if (fieldHasPositions) { Arrays.fill(lastSkipPosPointer, lastPosFP);