Simplify advancing on postings/impacts enums (#12810)

This commit is contained in:
Adrien Grand 2023-11-23 13:32:08 +01:00 committed by GitHub
parent f7cab16450
commit 5aa401e7d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 327 additions and 114 deletions

View File

@ -0,0 +1,263 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs;
import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
import org.apache.lucene.codecs.Lucene99MultiLevelSkipListReader;
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.MathUtil;
/**
* Legacy variant of {@link Lucene99MultiLevelSkipListReader} for Lucene postings formats up to
* {@link Lucene90PostingsFormat} included. It starts postings at 0 rather than -1.
*/
public abstract class Lucene50MultiLevelSkipListReader implements Closeable {
/** the maximum number of skip levels possible for this index */
protected int maxNumberOfSkipLevels;
/** number of levels in this skip list */
protected int numberOfSkipLevels;
private int docCount;
/** skipStream for each level. */
private IndexInput[] skipStream;
/** The start pointer of each skip level. */
private long[] skipPointer;
/** skipInterval of each level. */
private int[] skipInterval;
/**
* Number of docs skipped per level. It's possible for some values to overflow a signed int, but
* this has been accounted for.
*/
private int[] numSkipped;
/** Doc id of current skip entry per level. */
protected int[] skipDoc;
/** Doc id of last read skip entry with docId <= target. */
private int lastDoc;
/** Child pointer of current skip entry per level. */
private long[] childPointer;
/** childPointer of last read skip entry with docId <= target. */
private long lastChildPointer;
private final int skipMultiplier;
/** Creates a {@code MultiLevelSkipListReader}. */
protected Lucene50MultiLevelSkipListReader(
IndexInput skipStream, int maxSkipLevels, int skipInterval, int skipMultiplier) {
this.skipStream = new IndexInput[maxSkipLevels];
this.skipPointer = new long[maxSkipLevels];
this.childPointer = new long[maxSkipLevels];
this.numSkipped = new int[maxSkipLevels];
this.maxNumberOfSkipLevels = maxSkipLevels;
this.skipInterval = new int[maxSkipLevels];
this.skipMultiplier = skipMultiplier;
this.skipStream[0] = skipStream;
this.skipInterval[0] = skipInterval;
for (int i = 1; i < maxSkipLevels; i++) {
// cache skip intervals
this.skipInterval[i] = this.skipInterval[i - 1] * skipMultiplier;
}
skipDoc = new int[maxSkipLevels];
}
/**
* Creates a {@code MultiLevelSkipListReader}, where {@code skipInterval} and {@code
* skipMultiplier} are the same.
*/
protected Lucene50MultiLevelSkipListReader(
IndexInput skipStream, int maxSkipLevels, int skipInterval) {
this(skipStream, maxSkipLevels, skipInterval, skipInterval);
}
/** Returns the id of the doc to which the last call of {@link #skipTo(int)} has skipped. */
public int getDoc() {
return lastDoc;
}
/**
* Skips entries to the first beyond the current whose document number is greater than or equal to
* <i>target</i>. Returns the current doc count.
*/
public int skipTo(int target) throws IOException {
// walk up the levels until highest level is found that has a skip
// for this target
int level = 0;
while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
level++;
}
while (level >= 0) {
if (target > skipDoc[level]) {
if (!loadNextSkip(level)) {
continue;
}
} else {
// no more skips on this level, go down one level
if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
seekChild(level - 1);
}
level--;
}
}
return numSkipped[0] - skipInterval[0] - 1;
}
private boolean loadNextSkip(int level) throws IOException {
// we have to skip, the target document is greater than the current
// skip list entry
setLastSkipData(level);
numSkipped[level] += skipInterval[level];
// numSkipped may overflow a signed int, so compare as unsigned.
if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) {
// this skip list is exhausted
skipDoc[level] = Integer.MAX_VALUE;
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
return false;
}
// read next skip entry
skipDoc[level] += readSkipData(level, skipStream[level]);
if (level != 0) {
// read the child pointer if we are not on the leaf level
childPointer[level] = readChildPointer(skipStream[level]) + skipPointer[level - 1];
}
return true;
}
/** Seeks the skip entry on the given level */
protected void seekChild(int level) throws IOException {
skipStream[level].seek(lastChildPointer);
numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
skipDoc[level] = lastDoc;
if (level > 0) {
childPointer[level] = readChildPointer(skipStream[level]) + skipPointer[level - 1];
}
}
@Override
public void close() throws IOException {
for (int i = 1; i < skipStream.length; i++) {
if (skipStream[i] != null) {
skipStream[i].close();
}
}
}
/** Initializes the reader, for reuse on a new term. */
public void init(long skipPointer, int df) throws IOException {
this.skipPointer[0] = skipPointer;
this.docCount = df;
assert skipPointer >= 0 && skipPointer <= skipStream[0].length()
: "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length();
Arrays.fill(skipDoc, 0);
Arrays.fill(numSkipped, 0);
Arrays.fill(childPointer, 0);
for (int i = 1; i < numberOfSkipLevels; i++) {
skipStream[i] = null;
}
loadSkipLevels();
}
/** Loads the skip levels */
private void loadSkipLevels() throws IOException {
if (docCount <= skipInterval[0]) {
numberOfSkipLevels = 1;
} else {
numberOfSkipLevels = 1 + MathUtil.log(docCount / skipInterval[0], skipMultiplier);
}
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
numberOfSkipLevels = maxNumberOfSkipLevels;
}
skipStream[0].seek(skipPointer[0]);
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
// the length of the current level
long length = readLevelLength(skipStream[0]);
// the start pointer of the current level
skipPointer[i] = skipStream[0].getFilePointer();
// clone this stream, it is already at the start of the current level
skipStream[i] = skipStream[0].clone();
// move base stream beyond the current level
skipStream[0].seek(skipStream[0].getFilePointer() + length);
}
// use base stream for the lowest level
skipPointer[0] = skipStream[0].getFilePointer();
}
/**
* Subclasses must implement the actual skip data encoding in this method.
*
* @param level the level skip data shall be read from
* @param skipStream the skip stream to read from
*/
protected abstract int readSkipData(int level, IndexInput skipStream) throws IOException;
/**
* read the length of the current level written via {@link
* MultiLevelSkipListWriter#writeLevelLength(long, IndexOutput)}.
*
* @param skipStream the IndexInput the length shall be read from
* @return level length
*/
protected long readLevelLength(IndexInput skipStream) throws IOException {
return skipStream.readVLong();
}
/**
* read the child pointer written via {@link MultiLevelSkipListWriter#writeChildPointer(long,
* DataOutput)}.
*
* @param skipStream the IndexInput the child pointer shall be read from
* @return child pointer
*/
protected long readChildPointer(IndexInput skipStream) throws IOException {
return skipStream.readVLong();
}
/** Copies the values of the last read skip entry on this level */
protected void setLastSkipData(int level) {
lastDoc = skipDoc[level];
lastChildPointer = childPointer[level];
}
}

View File

@ -1,23 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs;
/** Remove this file when adding back compat codecs */
public class Placeholder {
// no instance
private Placeholder() {}
}

View File

@ -20,7 +20,7 @@ import static org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat.
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.codecs.MultiLevelSkipListReader; import org.apache.lucene.backward_codecs.Lucene50MultiLevelSkipListReader;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
/** /**
@ -50,7 +50,7 @@ import org.apache.lucene.store.IndexInput;
* *
* <p>Therefore, we'll trim df before passing it to the interface. see trim(int) * <p>Therefore, we'll trim df before passing it to the interface. see trim(int)
*/ */
class Lucene50SkipReader extends MultiLevelSkipListReader { class Lucene50SkipReader extends Lucene50MultiLevelSkipListReader {
private final int version; private final int version;
private long[] docPointer; private long[] docPointer;
private long[] posPointer; private long[] posPointer;
@ -124,7 +124,7 @@ class Lucene50SkipReader extends MultiLevelSkipListReader {
/** /**
* Returns the doc pointer of the doc to which the last call of {@link * Returns the doc pointer of the doc to which the last call of {@link
* MultiLevelSkipListReader#skipTo(int)} has skipped. * Lucene50MultiLevelSkipListReader#skipTo(int)} has skipped.
*/ */
public long getDocPointer() { public long getDocPointer() {
return lastDocPointer; return lastDocPointer;

View File

@ -18,7 +18,7 @@ package org.apache.lucene.backward_codecs.lucene84;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.codecs.MultiLevelSkipListReader; import org.apache.lucene.backward_codecs.Lucene50MultiLevelSkipListReader;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
/** /**
@ -48,7 +48,7 @@ import org.apache.lucene.store.IndexInput;
* *
* <p>Therefore, we'll trim df before passing it to the interface. see trim(int) * <p>Therefore, we'll trim df before passing it to the interface. see trim(int)
*/ */
class Lucene84SkipReader extends MultiLevelSkipListReader { class Lucene84SkipReader extends Lucene50MultiLevelSkipListReader {
private long[] docPointer; private long[] docPointer;
private long[] posPointer; private long[] posPointer;
private long[] payPointer; private long[] payPointer;
@ -119,7 +119,7 @@ class Lucene84SkipReader extends MultiLevelSkipListReader {
/** /**
* Returns the doc pointer of the doc to which the last call of {@link * Returns the doc pointer of the doc to which the last call of {@link
* MultiLevelSkipListReader#skipTo(int)} has skipped. * Lucene50MultiLevelSkipListReader#skipTo(int)} has skipped.
*/ */
public long getDocPointer() { public long getDocPointer() {
return lastDocPointer; return lastDocPointer;

View File

@ -18,7 +18,7 @@ package org.apache.lucene.backward_codecs.lucene90;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.codecs.MultiLevelSkipListReader; import org.apache.lucene.backward_codecs.Lucene50MultiLevelSkipListReader;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
/** /**
@ -48,7 +48,7 @@ import org.apache.lucene.store.IndexInput;
* *
* <p>Therefore, we'll trim df before passing it to the interface. see trim(int) * <p>Therefore, we'll trim df before passing it to the interface. see trim(int)
*/ */
class Lucene90SkipReader extends MultiLevelSkipListReader { class Lucene90SkipReader extends Lucene50MultiLevelSkipListReader {
private long[] docPointer; private long[] docPointer;
private long[] posPointer; private long[] posPointer;
private long[] payPointer; private long[] payPointer;
@ -119,7 +119,7 @@ class Lucene90SkipReader extends MultiLevelSkipListReader {
/** /**
* Returns the doc pointer of the doc to which the last call of {@link * Returns the doc pointer of the doc to which the last call of {@link
* MultiLevelSkipListReader#skipTo(int)} has skipped. * Lucene50MultiLevelSkipListReader#skipTo(int)} has skipped.
*/ */
public long getDocPointer() { public long getDocPointer() {
return lastDocPointer; return lastDocPointer;

View File

@ -284,7 +284,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
// for skip list data // for skip list data
private SimpleTextSkipReader skipReader; private SimpleTextSkipReader skipReader;
private int nextSkipDoc = 0; private int nextSkipDoc = -1;
private long seekTo = -1; private long seekTo = -1;
public SimpleTextDocsEnum() { public SimpleTextDocsEnum() {
@ -305,7 +305,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
tf = 1; tf = 1;
cost = docFreq; cost = docFreq;
skipReader.reset(skipPointer, docFreq); skipReader.reset(skipPointer, docFreq);
nextSkipDoc = 0; nextSkipDoc = -1;
seekTo = -1; seekTo = -1;
return this; return this;
} }
@ -458,7 +458,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
// for skip list data // for skip list data
private SimpleTextSkipReader skipReader; private SimpleTextSkipReader skipReader;
private int nextSkipDoc = 0; private int nextSkipDoc = -1;
private long seekTo = -1; private long seekTo = -1;
public SimpleTextPostingsEnum() { public SimpleTextPostingsEnum() {
@ -484,7 +484,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
} }
cost = docFreq; cost = docFreq;
skipReader.reset(skipPointer, docFreq); skipReader.reset(skipPointer, docFreq);
nextSkipDoc = 0; nextSkipDoc = -1;
seekTo = -1; seekTo = -1;
return this; return this;
} }

View File

@ -31,7 +31,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.lucene.codecs.MultiLevelSkipListReader; import org.apache.lucene.codecs.Lucene99MultiLevelSkipListReader;
import org.apache.lucene.index.Impact; import org.apache.lucene.index.Impact;
import org.apache.lucene.index.Impacts; import org.apache.lucene.index.Impacts;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
@ -51,7 +51,7 @@ import org.apache.lucene.util.StringHelper;
* *
* @lucene.experimental * @lucene.experimental
*/ */
class SimpleTextSkipReader extends MultiLevelSkipListReader { class SimpleTextSkipReader extends Lucene99MultiLevelSkipListReader {
private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder(); private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
private final BytesRefBuilder scratch = new BytesRefBuilder(); private final BytesRefBuilder scratch = new BytesRefBuilder();

View File

@ -41,7 +41,7 @@ class SimpleTextSkipWriter extends MultiLevelSkipListWriter {
static final int BLOCK_SIZE = 8; static final int BLOCK_SIZE = 8;
private Map<Integer, Boolean> wroteHeaderPerLevelMap = new HashMap<>(); private Map<Integer, Boolean> wroteHeaderPerLevelMap = new HashMap<>();
private int curDoc; private int curDoc = -1;
private long curDocFilePointer; private long curDocFilePointer;
private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms; private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
private final BytesRefBuilder scratch = new BytesRefBuilder(); private final BytesRefBuilder scratch = new BytesRefBuilder();

View File

@ -35,7 +35,7 @@ import org.apache.lucene.util.MathUtil;
* *
* @lucene.experimental * @lucene.experimental
*/ */
public abstract class MultiLevelSkipListReader implements Closeable { public abstract class Lucene99MultiLevelSkipListReader implements Closeable {
/** the maximum number of skip levels possible for this index */ /** the maximum number of skip levels possible for this index */
protected int maxNumberOfSkipLevels; protected int maxNumberOfSkipLevels;
@ -63,7 +63,7 @@ public abstract class MultiLevelSkipListReader implements Closeable {
protected int[] skipDoc; protected int[] skipDoc;
/** Doc id of last read skip entry with docId &lt;= target. */ /** Doc id of last read skip entry with docId &lt;= target. */
private int lastDoc; private int lastDoc = -1;
/** Child pointer of current skip entry per level. */ /** Child pointer of current skip entry per level. */
private long[] childPointer; private long[] childPointer;
@ -74,7 +74,7 @@ public abstract class MultiLevelSkipListReader implements Closeable {
private final int skipMultiplier; private final int skipMultiplier;
/** Creates a {@code MultiLevelSkipListReader}. */ /** Creates a {@code MultiLevelSkipListReader}. */
protected MultiLevelSkipListReader( protected Lucene99MultiLevelSkipListReader(
IndexInput skipStream, int maxSkipLevels, int skipInterval, int skipMultiplier) { IndexInput skipStream, int maxSkipLevels, int skipInterval, int skipMultiplier) {
this.skipStream = new IndexInput[maxSkipLevels]; this.skipStream = new IndexInput[maxSkipLevels];
this.skipPointer = new long[maxSkipLevels]; this.skipPointer = new long[maxSkipLevels];
@ -96,7 +96,8 @@ public abstract class MultiLevelSkipListReader implements Closeable {
* Creates a {@code MultiLevelSkipListReader}, where {@code skipInterval} and {@code * Creates a {@code MultiLevelSkipListReader}, where {@code skipInterval} and {@code
* skipMultiplier} are the same. * skipMultiplier} are the same.
*/ */
protected MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) { protected Lucene99MultiLevelSkipListReader(
IndexInput skipStream, int maxSkipLevels, int skipInterval) {
this(skipStream, maxSkipLevels, skipInterval, skipInterval); this(skipStream, maxSkipLevels, skipInterval, skipInterval);
} }
@ -186,7 +187,7 @@ public abstract class MultiLevelSkipListReader implements Closeable {
this.docCount = df; this.docCount = df;
assert skipPointer >= 0 && skipPointer <= skipStream[0].length() assert skipPointer >= 0 && skipPointer <= skipStream[0].length()
: "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length(); : "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length();
Arrays.fill(skipDoc, 0); Arrays.fill(skipDoc, -1);
Arrays.fill(numSkipped, 0); Arrays.fill(numSkipped, 0);
Arrays.fill(childPointer, 0); Arrays.fill(childPointer, 0);

View File

@ -334,7 +334,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted) private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
private int blockUpto; // number of docs in or before the current block private int blockUpto; // number of docs in or before the current block
private int doc; // doc we last read private int doc; // doc we last read
private long accum; // accumulator for doc deltas private int accum; // accumulator for doc deltas
// Where this term's postings start in the .doc file: // Where this term's postings start in the .doc file:
private long docTermStartFP; private long docTermStartFP;
@ -344,10 +344,6 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
// no skip data for this term): // no skip data for this term):
private long skipOffset; private long skipOffset;
// docID for next skip point, we won't use skipper if
// target docID is not larger than this
private int nextSkipDoc;
private boolean needsFreq; // true if the caller actually needs frequencies private boolean needsFreq; // true if the caller actually needs frequencies
// as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block // as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block
// always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer // always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer
@ -404,9 +400,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
freqBuffer[i] = 1; freqBuffer[i] = 1;
} }
} }
accum = 0; accum = -1;
blockUpto = 0; blockUpto = 0;
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
docBufferUpto = BLOCK_SIZE; docBufferUpto = BLOCK_SIZE;
skipped = false; skipped = false;
return this; return this;
@ -467,20 +462,22 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
pforUtil.skip(docIn); // skip over freqBuffer if we don't need them at all pforUtil.skip(docIn); // skip over freqBuffer if we don't need them at all
} }
} }
accum = (int) docBuffer[BLOCK_SIZE - 1];
blockUpto += BLOCK_SIZE; blockUpto += BLOCK_SIZE;
} else if (docFreq == 1) { } else if (docFreq == 1) {
docBuffer[0] = singletonDocID; docBuffer[0] = singletonDocID;
freqBuffer[0] = totalTermFreq; freqBuffer[0] = totalTermFreq;
docBuffer[1] = NO_MORE_DOCS; docBuffer[1] = NO_MORE_DOCS;
accum = NO_MORE_DOCS;
blockUpto++; blockUpto++;
} else { } else {
// Read vInts: // Read vInts:
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, needsFreq); readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, needsFreq);
prefixSum(docBuffer, left, accum); prefixSum(docBuffer, left, accum);
docBuffer[left] = NO_MORE_DOCS; docBuffer[left] = NO_MORE_DOCS;
accum = NO_MORE_DOCS;
blockUpto += left; blockUpto += left;
} }
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0; docBufferUpto = 0;
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS; assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
} }
@ -501,8 +498,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
public int advance(int target) throws IOException { public int advance(int target) throws IOException {
// current skip docID < docIDs generated from current buffer <= next skip docID // current skip docID < docIDs generated from current buffer <= next skip docID
// we don't need to skip if target is buffered already // we don't need to skip if target is buffered already
if (docFreq > BLOCK_SIZE && target > nextSkipDoc) { if (docFreq > BLOCK_SIZE && target > accum) {
if (skipper == null) { if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping // Lazy init: first time this enum has ever been used for skipping
skipper = skipper =
@ -536,28 +532,15 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
// as we have already positioned docIn where in needs to be. // as we have already positioned docIn where in needs to be.
isFreqsRead = true; isFreqsRead = true;
} }
// next time we call advance, this is used to
// foresee whether skipper is necessary.
nextSkipDoc = skipper.getNextSkipDoc();
} }
if (docBufferUpto == BLOCK_SIZE) { if (docBufferUpto == BLOCK_SIZE) {
refillDocs(); refillDocs();
} }
// Now scan... this is an inlined/pared down version int next = findFirstGreater(docBuffer, target, docBufferUpto);
// of nextDoc(): this.doc = (int) docBuffer[next];
long doc; docBufferUpto = next + 1;
while (true) { return doc;
doc = docBuffer[docBufferUpto];
if (doc >= target) {
break;
}
++docBufferUpto;
}
docBufferUpto++;
return this.doc = (int) doc;
} }
@Override @Override
@ -609,7 +592,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
private long totalTermFreq; // number of positions in this posting list private long totalTermFreq; // number of positions in this posting list
private int blockUpto; // number of docs in or before the current block private int blockUpto; // number of docs in or before the current block
private int doc; // doc we last read private int doc; // doc we last read
private long accum; // accumulator for doc deltas private int accum; // accumulator for doc deltas
private int freq; // freq we last read private int freq; // freq we last read
private int position; // current position private int position; // current position
@ -645,8 +628,6 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
// no skip data for this term): // no skip data for this term):
private long skipOffset; private long skipOffset;
private int nextSkipDoc;
private boolean needsOffsets; // true if we actually need offsets private boolean needsOffsets; // true if we actually need offsets
private boolean needsPayloads; // true if we actually need payloads private boolean needsPayloads; // true if we actually need payloads
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1 private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
@ -732,13 +713,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS); this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
doc = -1; doc = -1;
accum = 0; accum = -1;
blockUpto = 0; blockUpto = 0;
if (docFreq > BLOCK_SIZE) {
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
} else {
nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
}
docBufferUpto = BLOCK_SIZE; docBufferUpto = BLOCK_SIZE;
skipped = false; skipped = false;
return this; return this;
@ -761,19 +737,21 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
if (left >= BLOCK_SIZE) { if (left >= BLOCK_SIZE) {
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer); forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
pforUtil.decode(docIn, freqBuffer); pforUtil.decode(docIn, freqBuffer);
accum = (int) docBuffer[BLOCK_SIZE - 1];
blockUpto += BLOCK_SIZE; blockUpto += BLOCK_SIZE;
} else if (docFreq == 1) { } else if (docFreq == 1) {
docBuffer[0] = singletonDocID; docBuffer[0] = singletonDocID;
freqBuffer[0] = totalTermFreq; freqBuffer[0] = totalTermFreq;
docBuffer[1] = NO_MORE_DOCS; docBuffer[1] = NO_MORE_DOCS;
accum = NO_MORE_DOCS;
blockUpto++; blockUpto++;
} else { } else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true); readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true);
prefixSum(docBuffer, left, accum); prefixSum(docBuffer, left, accum);
docBuffer[left] = NO_MORE_DOCS; docBuffer[left] = NO_MORE_DOCS;
accum = NO_MORE_DOCS;
blockUpto += left; blockUpto += left;
} }
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0; docBufferUpto = 0;
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS; assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
} }
@ -867,7 +845,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
@Override @Override
public int advance(int target) throws IOException { public int advance(int target) throws IOException {
if (target > nextSkipDoc) { if (docFreq > BLOCK_SIZE && target > accum) {
if (skipper == null) { if (skipper == null) {
// Lazy init: first time this enum has ever been used for skipping // Lazy init: first time this enum has ever been used for skipping
skipper = skipper =
@ -901,7 +879,6 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
lastStartOffset = 0; // new document lastStartOffset = 0; // new document
payloadByteUpto = skipper.getPayloadByteUpto(); payloadByteUpto = skipper.getPayloadByteUpto();
} }
nextSkipDoc = skipper.getNextSkipDoc();
} }
if (docBufferUpto == BLOCK_SIZE) { if (docBufferUpto == BLOCK_SIZE) {
refillDocs(); refillDocs();
@ -909,16 +886,12 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
// Now scan: // Now scan:
long doc; long doc;
while (true) { do {
doc = docBuffer[docBufferUpto]; doc = docBuffer[docBufferUpto];
freq = (int) freqBuffer[docBufferUpto]; freq = (int) freqBuffer[docBufferUpto];
posPendingCount += freq; posPendingCount += freq;
docBufferUpto++; docBufferUpto++;
} while (doc < target);
if (doc >= target) {
break;
}
}
position = 0; position = 0;
lastStartOffset = 0; lastStartOffset = 0;
@ -1073,7 +1046,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
private int docFreq; // number of docs in this posting list private int docFreq; // number of docs in this posting list
private int blockUpto; // number of documents in or before the current block private int blockUpto; // number of documents in or before the current block
private int doc; // doc we last read private int doc; // doc we last read
private long accum; // accumulator for doc deltas private int accum; // accumulator for doc deltas
private int nextSkipDoc = -1; private int nextSkipDoc = -1;
@ -1100,7 +1073,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
docIn.seek(termState.docStartFP); docIn.seek(termState.docStartFP);
doc = -1; doc = -1;
accum = 0; accum = -1;
blockUpto = 0; blockUpto = 0;
docBufferUpto = BLOCK_SIZE; docBufferUpto = BLOCK_SIZE;
@ -1153,14 +1126,15 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
if (indexHasFreqs) { if (indexHasFreqs) {
isFreqsRead = false; isFreqsRead = false;
} }
accum = (int) docBuffer[BLOCK_SIZE - 1];
blockUpto += BLOCK_SIZE; blockUpto += BLOCK_SIZE;
} else { } else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs, true); readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs, true);
prefixSum(docBuffer, left, accum); prefixSum(docBuffer, left, accum);
docBuffer[left] = NO_MORE_DOCS; docBuffer[left] = NO_MORE_DOCS;
accum = NO_MORE_DOCS;
blockUpto += left; blockUpto += left;
} }
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0; docBufferUpto = 0;
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS; assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
} }
@ -1208,10 +1182,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
@Override @Override
public int advance(int target) throws IOException { public int advance(int target) throws IOException {
if (target > nextSkipDoc) { if (target > accum) {
advanceShallow(target); advanceShallow(target);
}
if (docBufferUpto == BLOCK_SIZE) {
refillDocs(); refillDocs();
} }
@ -1271,7 +1243,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
private long totalTermFreq; // number of positions in this posting list private long totalTermFreq; // number of positions in this posting list
private int docUpto; // how many docs we've read private int docUpto; // how many docs we've read
private int doc; // doc we last read private int doc; // doc we last read
private long accum; // accumulator for doc deltas private int accum; // accumulator for doc deltas
private int freq; // freq we last read private int freq; // freq we last read
private int position; // current position private int position; // current position
@ -1330,7 +1302,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
} }
doc = -1; doc = -1;
accum = 0; accum = -1;
docUpto = 0; docUpto = 0;
docBufferUpto = BLOCK_SIZE; docBufferUpto = BLOCK_SIZE;
@ -1362,12 +1334,14 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
if (left >= BLOCK_SIZE) { if (left >= BLOCK_SIZE) {
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer); forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
pforUtil.decode(docIn, freqBuffer); pforUtil.decode(docIn, freqBuffer);
accum = (int) docBuffer[BLOCK_SIZE - 1];
} else { } else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true); readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true);
prefixSum(docBuffer, left, accum); prefixSum(docBuffer, left, accum);
docBuffer[left] = NO_MORE_DOCS; docBuffer[left] = NO_MORE_DOCS;
accum = NO_MORE_DOCS;
} }
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0; docBufferUpto = 0;
} }
@ -1439,10 +1413,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
@Override @Override
public int advance(int target) throws IOException { public int advance(int target) throws IOException {
if (target > nextSkipDoc) { if (target > accum) {
advanceShallow(target); advanceShallow(target);
}
if (docBufferUpto == BLOCK_SIZE) {
refillDocs(); refillDocs();
} }
@ -1576,7 +1548,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
private int docUpto; // how many docs we've read private int docUpto; // how many docs we've read
private int posDocUpTo; // for how many docs we've read positions, offsets, and payloads private int posDocUpTo; // for how many docs we've read positions, offsets, and payloads
private int doc; // doc we last read private int doc; // doc we last read
private long accum; // accumulator for doc deltas private int accum; // accumulator for doc deltas
private int position; // current position private int position; // current position
// how many positions "behind" we are; nextPosition must // how many positions "behind" we are; nextPosition must
@ -1685,7 +1657,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
} }
doc = -1; doc = -1;
accum = 0; accum = -1;
docUpto = 0; docUpto = 0;
posDocUpTo = 0; posDocUpTo = 0;
isFreqsRead = true; isFreqsRead = true;
@ -1752,12 +1724,14 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
isFreqsRead = isFreqsRead =
false; // freq block will be loaded lazily when necessary, we don't load it here false; // freq block will be loaded lazily when necessary, we don't load it here
} }
accum = (int) docBuffer[BLOCK_SIZE - 1];
} else { } else {
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true); readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
prefixSum(docBuffer, left, accum); prefixSum(docBuffer, left, accum);
docBuffer[left] = NO_MORE_DOCS; docBuffer[left] = NO_MORE_DOCS;
accum = NO_MORE_DOCS;
} }
accum = docBuffer[BLOCK_SIZE - 1];
docBufferUpto = 0; docBufferUpto = 0;
} }
@ -1875,10 +1849,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
@Override @Override
public int advance(int target) throws IOException { public int advance(int target) throws IOException {
if (target > nextSkipDoc) { if (target > accum) {
advanceShallow(target); advanceShallow(target);
}
if (docBufferUpto == BLOCK_SIZE) {
if (seekTo >= 0) { if (seekTo >= 0) {
docIn.seek(seekTo); docIn.seek(seekTo);
seekTo = -1; seekTo = -1;

View File

@ -205,7 +205,7 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
payStartFP = payOut.getFilePointer(); payStartFP = payOut.getFilePointer();
} }
} }
lastDocID = 0; lastDocID = -1;
lastBlockDocID = -1; lastBlockDocID = -1;
skipWriter.resetSkip(); skipWriter.resetSkip();
this.norms = norms; this.norms = norms;
@ -369,7 +369,7 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
final int singletonDocID; final int singletonDocID;
if (state.docFreq == 1) { if (state.docFreq == 1) {
// pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq // pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
singletonDocID = (int) docDeltaBuffer[0]; singletonDocID = (int) docDeltaBuffer[0] - 1;
} else { } else {
singletonDocID = -1; singletonDocID = -1;
// Group vInt encode the remaining doc deltas and freqs: // Group vInt encode the remaining doc deltas and freqs:
@ -468,7 +468,7 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
state.lastPosBlockOffset = lastPosBlockOffset; state.lastPosBlockOffset = lastPosBlockOffset;
docBufferUpto = 0; docBufferUpto = 0;
posBufferUpto = 0; posBufferUpto = 0;
lastDocID = 0; lastDocID = -1;
docCount = 0; docCount = 0;
} }

View File

@ -18,7 +18,7 @@ package org.apache.lucene.codecs.lucene99;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.codecs.MultiLevelSkipListReader; import org.apache.lucene.codecs.Lucene99MultiLevelSkipListReader;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
/** /**
@ -48,7 +48,7 @@ import org.apache.lucene.store.IndexInput;
* *
* <p>Therefore, we'll trim df before passing it to the interface. see trim(int) * <p>Therefore, we'll trim df before passing it to the interface. see trim(int)
*/ */
public class Lucene99SkipReader extends MultiLevelSkipListReader { public class Lucene99SkipReader extends Lucene99MultiLevelSkipListReader {
private long[] docPointer; private long[] docPointer;
private long[] posPointer; private long[] posPointer;
private long[] payPointer; private long[] payPointer;
@ -119,7 +119,7 @@ public class Lucene99SkipReader extends MultiLevelSkipListReader {
/** /**
* Returns the doc pointer of the doc to which the last call of {@link * Returns the doc pointer of the doc to which the last call of {@link
* MultiLevelSkipListReader#skipTo(int)} has skipped. * Lucene99MultiLevelSkipListReader#skipTo(int)} has skipped.
*/ */
public long getDocPointer() { public long getDocPointer() {
return lastDocPointer; return lastDocPointer;

View File

@ -131,7 +131,7 @@ public final class Lucene99SkipWriter extends MultiLevelSkipListWriter {
private void initSkip() { private void initSkip() {
if (!initialized) { if (!initialized) {
super.resetSkip(); super.resetSkip();
Arrays.fill(lastSkipDoc, 0); Arrays.fill(lastSkipDoc, -1);
Arrays.fill(lastSkipDocPointer, lastDocFP); Arrays.fill(lastSkipDocPointer, lastDocFP);
if (fieldHasPositions) { if (fieldHasPositions) {
Arrays.fill(lastSkipPosPointer, lastPosFP); Arrays.fill(lastSkipPosPointer, lastPosFP);