mirror of https://github.com/apache/lucene.git
Simplify advancing on postings/impacts enums (#12810)
This commit is contained in:
parent
f7cab16450
commit
5aa401e7d8
|
@ -0,0 +1,263 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.backward_codecs;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
|
||||
import org.apache.lucene.codecs.Lucene99MultiLevelSkipListReader;
|
||||
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.MathUtil;
|
||||
|
||||
/**
|
||||
* Legacy variant of {@link Lucene99MultiLevelSkipListReader} for Lucene postings formats up to
|
||||
* {@link Lucene90PostingsFormat} included. It starts postings at 0 rather than -1.
|
||||
*/
|
||||
public abstract class Lucene50MultiLevelSkipListReader implements Closeable {
|
||||
/** the maximum number of skip levels possible for this index */
|
||||
protected int maxNumberOfSkipLevels;
|
||||
|
||||
/** number of levels in this skip list */
|
||||
protected int numberOfSkipLevels;
|
||||
|
||||
private int docCount;
|
||||
|
||||
/** skipStream for each level. */
|
||||
private IndexInput[] skipStream;
|
||||
|
||||
/** The start pointer of each skip level. */
|
||||
private long[] skipPointer;
|
||||
|
||||
/** skipInterval of each level. */
|
||||
private int[] skipInterval;
|
||||
|
||||
/**
|
||||
* Number of docs skipped per level. It's possible for some values to overflow a signed int, but
|
||||
* this has been accounted for.
|
||||
*/
|
||||
private int[] numSkipped;
|
||||
|
||||
/** Doc id of current skip entry per level. */
|
||||
protected int[] skipDoc;
|
||||
|
||||
/** Doc id of last read skip entry with docId <= target. */
|
||||
private int lastDoc;
|
||||
|
||||
/** Child pointer of current skip entry per level. */
|
||||
private long[] childPointer;
|
||||
|
||||
/** childPointer of last read skip entry with docId <= target. */
|
||||
private long lastChildPointer;
|
||||
|
||||
private final int skipMultiplier;
|
||||
|
||||
/** Creates a {@code MultiLevelSkipListReader}. */
|
||||
protected Lucene50MultiLevelSkipListReader(
|
||||
IndexInput skipStream, int maxSkipLevels, int skipInterval, int skipMultiplier) {
|
||||
this.skipStream = new IndexInput[maxSkipLevels];
|
||||
this.skipPointer = new long[maxSkipLevels];
|
||||
this.childPointer = new long[maxSkipLevels];
|
||||
this.numSkipped = new int[maxSkipLevels];
|
||||
this.maxNumberOfSkipLevels = maxSkipLevels;
|
||||
this.skipInterval = new int[maxSkipLevels];
|
||||
this.skipMultiplier = skipMultiplier;
|
||||
this.skipStream[0] = skipStream;
|
||||
this.skipInterval[0] = skipInterval;
|
||||
for (int i = 1; i < maxSkipLevels; i++) {
|
||||
// cache skip intervals
|
||||
this.skipInterval[i] = this.skipInterval[i - 1] * skipMultiplier;
|
||||
}
|
||||
skipDoc = new int[maxSkipLevels];
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@code MultiLevelSkipListReader}, where {@code skipInterval} and {@code
|
||||
* skipMultiplier} are the same.
|
||||
*/
|
||||
protected Lucene50MultiLevelSkipListReader(
|
||||
IndexInput skipStream, int maxSkipLevels, int skipInterval) {
|
||||
this(skipStream, maxSkipLevels, skipInterval, skipInterval);
|
||||
}
|
||||
|
||||
/** Returns the id of the doc to which the last call of {@link #skipTo(int)} has skipped. */
|
||||
public int getDoc() {
|
||||
return lastDoc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips entries to the first beyond the current whose document number is greater than or equal to
|
||||
* <i>target</i>. Returns the current doc count.
|
||||
*/
|
||||
public int skipTo(int target) throws IOException {
|
||||
|
||||
// walk up the levels until highest level is found that has a skip
|
||||
// for this target
|
||||
int level = 0;
|
||||
while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
|
||||
level++;
|
||||
}
|
||||
|
||||
while (level >= 0) {
|
||||
if (target > skipDoc[level]) {
|
||||
if (!loadNextSkip(level)) {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// no more skips on this level, go down one level
|
||||
if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
|
||||
seekChild(level - 1);
|
||||
}
|
||||
level--;
|
||||
}
|
||||
}
|
||||
|
||||
return numSkipped[0] - skipInterval[0] - 1;
|
||||
}
|
||||
|
||||
private boolean loadNextSkip(int level) throws IOException {
|
||||
// we have to skip, the target document is greater than the current
|
||||
// skip list entry
|
||||
setLastSkipData(level);
|
||||
|
||||
numSkipped[level] += skipInterval[level];
|
||||
|
||||
// numSkipped may overflow a signed int, so compare as unsigned.
|
||||
if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) {
|
||||
// this skip list is exhausted
|
||||
skipDoc[level] = Integer.MAX_VALUE;
|
||||
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
|
||||
return false;
|
||||
}
|
||||
|
||||
// read next skip entry
|
||||
skipDoc[level] += readSkipData(level, skipStream[level]);
|
||||
|
||||
if (level != 0) {
|
||||
// read the child pointer if we are not on the leaf level
|
||||
childPointer[level] = readChildPointer(skipStream[level]) + skipPointer[level - 1];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Seeks the skip entry on the given level */
|
||||
protected void seekChild(int level) throws IOException {
|
||||
skipStream[level].seek(lastChildPointer);
|
||||
numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
|
||||
skipDoc[level] = lastDoc;
|
||||
if (level > 0) {
|
||||
childPointer[level] = readChildPointer(skipStream[level]) + skipPointer[level - 1];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
for (int i = 1; i < skipStream.length; i++) {
|
||||
if (skipStream[i] != null) {
|
||||
skipStream[i].close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Initializes the reader, for reuse on a new term. */
|
||||
public void init(long skipPointer, int df) throws IOException {
|
||||
this.skipPointer[0] = skipPointer;
|
||||
this.docCount = df;
|
||||
assert skipPointer >= 0 && skipPointer <= skipStream[0].length()
|
||||
: "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length();
|
||||
Arrays.fill(skipDoc, 0);
|
||||
Arrays.fill(numSkipped, 0);
|
||||
Arrays.fill(childPointer, 0);
|
||||
|
||||
for (int i = 1; i < numberOfSkipLevels; i++) {
|
||||
skipStream[i] = null;
|
||||
}
|
||||
loadSkipLevels();
|
||||
}
|
||||
|
||||
/** Loads the skip levels */
|
||||
private void loadSkipLevels() throws IOException {
|
||||
if (docCount <= skipInterval[0]) {
|
||||
numberOfSkipLevels = 1;
|
||||
} else {
|
||||
numberOfSkipLevels = 1 + MathUtil.log(docCount / skipInterval[0], skipMultiplier);
|
||||
}
|
||||
|
||||
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
|
||||
numberOfSkipLevels = maxNumberOfSkipLevels;
|
||||
}
|
||||
|
||||
skipStream[0].seek(skipPointer[0]);
|
||||
|
||||
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
|
||||
// the length of the current level
|
||||
long length = readLevelLength(skipStream[0]);
|
||||
|
||||
// the start pointer of the current level
|
||||
skipPointer[i] = skipStream[0].getFilePointer();
|
||||
|
||||
// clone this stream, it is already at the start of the current level
|
||||
skipStream[i] = skipStream[0].clone();
|
||||
|
||||
// move base stream beyond the current level
|
||||
skipStream[0].seek(skipStream[0].getFilePointer() + length);
|
||||
}
|
||||
|
||||
// use base stream for the lowest level
|
||||
skipPointer[0] = skipStream[0].getFilePointer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses must implement the actual skip data encoding in this method.
|
||||
*
|
||||
* @param level the level skip data shall be read from
|
||||
* @param skipStream the skip stream to read from
|
||||
*/
|
||||
protected abstract int readSkipData(int level, IndexInput skipStream) throws IOException;
|
||||
|
||||
/**
|
||||
* read the length of the current level written via {@link
|
||||
* MultiLevelSkipListWriter#writeLevelLength(long, IndexOutput)}.
|
||||
*
|
||||
* @param skipStream the IndexInput the length shall be read from
|
||||
* @return level length
|
||||
*/
|
||||
protected long readLevelLength(IndexInput skipStream) throws IOException {
|
||||
return skipStream.readVLong();
|
||||
}
|
||||
|
||||
/**
|
||||
* read the child pointer written via {@link MultiLevelSkipListWriter#writeChildPointer(long,
|
||||
* DataOutput)}.
|
||||
*
|
||||
* @param skipStream the IndexInput the child pointer shall be read from
|
||||
* @return child pointer
|
||||
*/
|
||||
protected long readChildPointer(IndexInput skipStream) throws IOException {
|
||||
return skipStream.readVLong();
|
||||
}
|
||||
|
||||
/** Copies the values of the last read skip entry on this level */
|
||||
protected void setLastSkipData(int level) {
|
||||
lastDoc = skipDoc[level];
|
||||
lastChildPointer = childPointer[level];
|
||||
}
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.backward_codecs;
|
||||
|
||||
/** Remove this file when adding back compat codecs */
|
||||
public class Placeholder {
|
||||
// no instance
|
||||
private Placeholder() {}
|
||||
}
|
|
@ -20,7 +20,7 @@ import static org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat.
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.codecs.MultiLevelSkipListReader;
|
||||
import org.apache.lucene.backward_codecs.Lucene50MultiLevelSkipListReader;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
|
@ -50,7 +50,7 @@ import org.apache.lucene.store.IndexInput;
|
|||
*
|
||||
* <p>Therefore, we'll trim df before passing it to the interface. see trim(int)
|
||||
*/
|
||||
class Lucene50SkipReader extends MultiLevelSkipListReader {
|
||||
class Lucene50SkipReader extends Lucene50MultiLevelSkipListReader {
|
||||
private final int version;
|
||||
private long[] docPointer;
|
||||
private long[] posPointer;
|
||||
|
@ -124,7 +124,7 @@ class Lucene50SkipReader extends MultiLevelSkipListReader {
|
|||
|
||||
/**
|
||||
* Returns the doc pointer of the doc to which the last call of {@link
|
||||
* MultiLevelSkipListReader#skipTo(int)} has skipped.
|
||||
* Lucene50MultiLevelSkipListReader#skipTo(int)} has skipped.
|
||||
*/
|
||||
public long getDocPointer() {
|
||||
return lastDocPointer;
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.backward_codecs.lucene84;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.codecs.MultiLevelSkipListReader;
|
||||
import org.apache.lucene.backward_codecs.Lucene50MultiLevelSkipListReader;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
|
@ -48,7 +48,7 @@ import org.apache.lucene.store.IndexInput;
|
|||
*
|
||||
* <p>Therefore, we'll trim df before passing it to the interface. see trim(int)
|
||||
*/
|
||||
class Lucene84SkipReader extends MultiLevelSkipListReader {
|
||||
class Lucene84SkipReader extends Lucene50MultiLevelSkipListReader {
|
||||
private long[] docPointer;
|
||||
private long[] posPointer;
|
||||
private long[] payPointer;
|
||||
|
@ -119,7 +119,7 @@ class Lucene84SkipReader extends MultiLevelSkipListReader {
|
|||
|
||||
/**
|
||||
* Returns the doc pointer of the doc to which the last call of {@link
|
||||
* MultiLevelSkipListReader#skipTo(int)} has skipped.
|
||||
* Lucene50MultiLevelSkipListReader#skipTo(int)} has skipped.
|
||||
*/
|
||||
public long getDocPointer() {
|
||||
return lastDocPointer;
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.backward_codecs.lucene90;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.codecs.MultiLevelSkipListReader;
|
||||
import org.apache.lucene.backward_codecs.Lucene50MultiLevelSkipListReader;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
|
@ -48,7 +48,7 @@ import org.apache.lucene.store.IndexInput;
|
|||
*
|
||||
* <p>Therefore, we'll trim df before passing it to the interface. see trim(int)
|
||||
*/
|
||||
class Lucene90SkipReader extends MultiLevelSkipListReader {
|
||||
class Lucene90SkipReader extends Lucene50MultiLevelSkipListReader {
|
||||
private long[] docPointer;
|
||||
private long[] posPointer;
|
||||
private long[] payPointer;
|
||||
|
@ -119,7 +119,7 @@ class Lucene90SkipReader extends MultiLevelSkipListReader {
|
|||
|
||||
/**
|
||||
* Returns the doc pointer of the doc to which the last call of {@link
|
||||
* MultiLevelSkipListReader#skipTo(int)} has skipped.
|
||||
* Lucene50MultiLevelSkipListReader#skipTo(int)} has skipped.
|
||||
*/
|
||||
public long getDocPointer() {
|
||||
return lastDocPointer;
|
||||
|
|
|
@ -284,7 +284,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
|
||||
// for skip list data
|
||||
private SimpleTextSkipReader skipReader;
|
||||
private int nextSkipDoc = 0;
|
||||
private int nextSkipDoc = -1;
|
||||
private long seekTo = -1;
|
||||
|
||||
public SimpleTextDocsEnum() {
|
||||
|
@ -305,7 +305,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
tf = 1;
|
||||
cost = docFreq;
|
||||
skipReader.reset(skipPointer, docFreq);
|
||||
nextSkipDoc = 0;
|
||||
nextSkipDoc = -1;
|
||||
seekTo = -1;
|
||||
return this;
|
||||
}
|
||||
|
@ -458,7 +458,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
|
||||
// for skip list data
|
||||
private SimpleTextSkipReader skipReader;
|
||||
private int nextSkipDoc = 0;
|
||||
private int nextSkipDoc = -1;
|
||||
private long seekTo = -1;
|
||||
|
||||
public SimpleTextPostingsEnum() {
|
||||
|
@ -484,7 +484,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
cost = docFreq;
|
||||
skipReader.reset(skipPointer, docFreq);
|
||||
nextSkipDoc = 0;
|
||||
nextSkipDoc = -1;
|
||||
seekTo = -1;
|
||||
return this;
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.codecs.MultiLevelSkipListReader;
|
||||
import org.apache.lucene.codecs.Lucene99MultiLevelSkipListReader;
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
@ -51,7 +51,7 @@ import org.apache.lucene.util.StringHelper;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
class SimpleTextSkipReader extends MultiLevelSkipListReader {
|
||||
class SimpleTextSkipReader extends Lucene99MultiLevelSkipListReader {
|
||||
|
||||
private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
|
||||
private final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
|
|
|
@ -41,7 +41,7 @@ class SimpleTextSkipWriter extends MultiLevelSkipListWriter {
|
|||
|
||||
static final int BLOCK_SIZE = 8;
|
||||
private Map<Integer, Boolean> wroteHeaderPerLevelMap = new HashMap<>();
|
||||
private int curDoc;
|
||||
private int curDoc = -1;
|
||||
private long curDocFilePointer;
|
||||
private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
|
||||
private final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.lucene.util.MathUtil;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class MultiLevelSkipListReader implements Closeable {
|
||||
public abstract class Lucene99MultiLevelSkipListReader implements Closeable {
|
||||
/** the maximum number of skip levels possible for this index */
|
||||
protected int maxNumberOfSkipLevels;
|
||||
|
||||
|
@ -63,7 +63,7 @@ public abstract class MultiLevelSkipListReader implements Closeable {
|
|||
protected int[] skipDoc;
|
||||
|
||||
/** Doc id of last read skip entry with docId <= target. */
|
||||
private int lastDoc;
|
||||
private int lastDoc = -1;
|
||||
|
||||
/** Child pointer of current skip entry per level. */
|
||||
private long[] childPointer;
|
||||
|
@ -74,7 +74,7 @@ public abstract class MultiLevelSkipListReader implements Closeable {
|
|||
private final int skipMultiplier;
|
||||
|
||||
/** Creates a {@code MultiLevelSkipListReader}. */
|
||||
protected MultiLevelSkipListReader(
|
||||
protected Lucene99MultiLevelSkipListReader(
|
||||
IndexInput skipStream, int maxSkipLevels, int skipInterval, int skipMultiplier) {
|
||||
this.skipStream = new IndexInput[maxSkipLevels];
|
||||
this.skipPointer = new long[maxSkipLevels];
|
||||
|
@ -96,7 +96,8 @@ public abstract class MultiLevelSkipListReader implements Closeable {
|
|||
* Creates a {@code MultiLevelSkipListReader}, where {@code skipInterval} and {@code
|
||||
* skipMultiplier} are the same.
|
||||
*/
|
||||
protected MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
|
||||
protected Lucene99MultiLevelSkipListReader(
|
||||
IndexInput skipStream, int maxSkipLevels, int skipInterval) {
|
||||
this(skipStream, maxSkipLevels, skipInterval, skipInterval);
|
||||
}
|
||||
|
||||
|
@ -186,7 +187,7 @@ public abstract class MultiLevelSkipListReader implements Closeable {
|
|||
this.docCount = df;
|
||||
assert skipPointer >= 0 && skipPointer <= skipStream[0].length()
|
||||
: "invalid skip pointer: " + skipPointer + ", length=" + skipStream[0].length();
|
||||
Arrays.fill(skipDoc, 0);
|
||||
Arrays.fill(skipDoc, -1);
|
||||
Arrays.fill(numSkipped, 0);
|
||||
Arrays.fill(childPointer, 0);
|
||||
|
|
@ -334,7 +334,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
|
||||
private int blockUpto; // number of docs in or before the current block
|
||||
private int doc; // doc we last read
|
||||
private long accum; // accumulator for doc deltas
|
||||
private int accum; // accumulator for doc deltas
|
||||
|
||||
// Where this term's postings start in the .doc file:
|
||||
private long docTermStartFP;
|
||||
|
@ -344,10 +344,6 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
// no skip data for this term):
|
||||
private long skipOffset;
|
||||
|
||||
// docID for next skip point, we won't use skipper if
|
||||
// target docID is not larger than this
|
||||
private int nextSkipDoc;
|
||||
|
||||
private boolean needsFreq; // true if the caller actually needs frequencies
|
||||
// as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block
|
||||
// always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer
|
||||
|
@ -404,9 +400,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
freqBuffer[i] = 1;
|
||||
}
|
||||
}
|
||||
accum = 0;
|
||||
accum = -1;
|
||||
blockUpto = 0;
|
||||
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
skipped = false;
|
||||
return this;
|
||||
|
@ -467,20 +462,22 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
pforUtil.skip(docIn); // skip over freqBuffer if we don't need them at all
|
||||
}
|
||||
}
|
||||
accum = (int) docBuffer[BLOCK_SIZE - 1];
|
||||
blockUpto += BLOCK_SIZE;
|
||||
} else if (docFreq == 1) {
|
||||
docBuffer[0] = singletonDocID;
|
||||
freqBuffer[0] = totalTermFreq;
|
||||
docBuffer[1] = NO_MORE_DOCS;
|
||||
accum = NO_MORE_DOCS;
|
||||
blockUpto++;
|
||||
} else {
|
||||
// Read vInts:
|
||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, needsFreq);
|
||||
prefixSum(docBuffer, left, accum);
|
||||
docBuffer[left] = NO_MORE_DOCS;
|
||||
accum = NO_MORE_DOCS;
|
||||
blockUpto += left;
|
||||
}
|
||||
accum = docBuffer[BLOCK_SIZE - 1];
|
||||
docBufferUpto = 0;
|
||||
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
|
||||
}
|
||||
|
@ -501,8 +498,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
public int advance(int target) throws IOException {
|
||||
// current skip docID < docIDs generated from current buffer <= next skip docID
|
||||
// we don't need to skip if target is buffered already
|
||||
if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
|
||||
|
||||
if (docFreq > BLOCK_SIZE && target > accum) {
|
||||
if (skipper == null) {
|
||||
// Lazy init: first time this enum has ever been used for skipping
|
||||
skipper =
|
||||
|
@ -536,28 +532,15 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
// as we have already positioned docIn where in needs to be.
|
||||
isFreqsRead = true;
|
||||
}
|
||||
// next time we call advance, this is used to
|
||||
// foresee whether skipper is necessary.
|
||||
nextSkipDoc = skipper.getNextSkipDoc();
|
||||
}
|
||||
if (docBufferUpto == BLOCK_SIZE) {
|
||||
refillDocs();
|
||||
}
|
||||
|
||||
// Now scan... this is an inlined/pared down version
|
||||
// of nextDoc():
|
||||
long doc;
|
||||
while (true) {
|
||||
doc = docBuffer[docBufferUpto];
|
||||
|
||||
if (doc >= target) {
|
||||
break;
|
||||
}
|
||||
++docBufferUpto;
|
||||
}
|
||||
|
||||
docBufferUpto++;
|
||||
return this.doc = (int) doc;
|
||||
int next = findFirstGreater(docBuffer, target, docBufferUpto);
|
||||
this.doc = (int) docBuffer[next];
|
||||
docBufferUpto = next + 1;
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -609,7 +592,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
private long totalTermFreq; // number of positions in this posting list
|
||||
private int blockUpto; // number of docs in or before the current block
|
||||
private int doc; // doc we last read
|
||||
private long accum; // accumulator for doc deltas
|
||||
private int accum; // accumulator for doc deltas
|
||||
private int freq; // freq we last read
|
||||
private int position; // current position
|
||||
|
||||
|
@ -645,8 +628,6 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
// no skip data for this term):
|
||||
private long skipOffset;
|
||||
|
||||
private int nextSkipDoc;
|
||||
|
||||
private boolean needsOffsets; // true if we actually need offsets
|
||||
private boolean needsPayloads; // true if we actually need payloads
|
||||
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
|
||||
|
@ -732,13 +713,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
|
||||
|
||||
doc = -1;
|
||||
accum = 0;
|
||||
accum = -1;
|
||||
blockUpto = 0;
|
||||
if (docFreq > BLOCK_SIZE) {
|
||||
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
|
||||
} else {
|
||||
nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
|
||||
}
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
skipped = false;
|
||||
return this;
|
||||
|
@ -761,19 +737,21 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
if (left >= BLOCK_SIZE) {
|
||||
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
pforUtil.decode(docIn, freqBuffer);
|
||||
accum = (int) docBuffer[BLOCK_SIZE - 1];
|
||||
blockUpto += BLOCK_SIZE;
|
||||
} else if (docFreq == 1) {
|
||||
docBuffer[0] = singletonDocID;
|
||||
freqBuffer[0] = totalTermFreq;
|
||||
docBuffer[1] = NO_MORE_DOCS;
|
||||
accum = NO_MORE_DOCS;
|
||||
blockUpto++;
|
||||
} else {
|
||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true);
|
||||
prefixSum(docBuffer, left, accum);
|
||||
docBuffer[left] = NO_MORE_DOCS;
|
||||
accum = NO_MORE_DOCS;
|
||||
blockUpto += left;
|
||||
}
|
||||
accum = docBuffer[BLOCK_SIZE - 1];
|
||||
docBufferUpto = 0;
|
||||
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
|
||||
}
|
||||
|
@ -867,7 +845,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target > nextSkipDoc) {
|
||||
if (docFreq > BLOCK_SIZE && target > accum) {
|
||||
if (skipper == null) {
|
||||
// Lazy init: first time this enum has ever been used for skipping
|
||||
skipper =
|
||||
|
@ -901,7 +879,6 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
lastStartOffset = 0; // new document
|
||||
payloadByteUpto = skipper.getPayloadByteUpto();
|
||||
}
|
||||
nextSkipDoc = skipper.getNextSkipDoc();
|
||||
}
|
||||
if (docBufferUpto == BLOCK_SIZE) {
|
||||
refillDocs();
|
||||
|
@ -909,16 +886,12 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
|
||||
// Now scan:
|
||||
long doc;
|
||||
while (true) {
|
||||
do {
|
||||
doc = docBuffer[docBufferUpto];
|
||||
freq = (int) freqBuffer[docBufferUpto];
|
||||
posPendingCount += freq;
|
||||
docBufferUpto++;
|
||||
|
||||
if (doc >= target) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (doc < target);
|
||||
|
||||
position = 0;
|
||||
lastStartOffset = 0;
|
||||
|
@ -1073,7 +1046,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
private int docFreq; // number of docs in this posting list
|
||||
private int blockUpto; // number of documents in or before the current block
|
||||
private int doc; // doc we last read
|
||||
private long accum; // accumulator for doc deltas
|
||||
private int accum; // accumulator for doc deltas
|
||||
|
||||
private int nextSkipDoc = -1;
|
||||
|
||||
|
@ -1100,7 +1073,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
docIn.seek(termState.docStartFP);
|
||||
|
||||
doc = -1;
|
||||
accum = 0;
|
||||
accum = -1;
|
||||
blockUpto = 0;
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
|
||||
|
@ -1153,14 +1126,15 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
if (indexHasFreqs) {
|
||||
isFreqsRead = false;
|
||||
}
|
||||
accum = (int) docBuffer[BLOCK_SIZE - 1];
|
||||
blockUpto += BLOCK_SIZE;
|
||||
} else {
|
||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs, true);
|
||||
prefixSum(docBuffer, left, accum);
|
||||
docBuffer[left] = NO_MORE_DOCS;
|
||||
accum = NO_MORE_DOCS;
|
||||
blockUpto += left;
|
||||
}
|
||||
accum = docBuffer[BLOCK_SIZE - 1];
|
||||
docBufferUpto = 0;
|
||||
assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
|
||||
}
|
||||
|
@ -1208,10 +1182,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target > nextSkipDoc) {
|
||||
if (target > accum) {
|
||||
advanceShallow(target);
|
||||
}
|
||||
if (docBufferUpto == BLOCK_SIZE) {
|
||||
refillDocs();
|
||||
}
|
||||
|
||||
|
@ -1271,7 +1243,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
private long totalTermFreq; // number of positions in this posting list
|
||||
private int docUpto; // how many docs we've read
|
||||
private int doc; // doc we last read
|
||||
private long accum; // accumulator for doc deltas
|
||||
private int accum; // accumulator for doc deltas
|
||||
private int freq; // freq we last read
|
||||
private int position; // current position
|
||||
|
||||
|
@ -1330,7 +1302,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
doc = -1;
|
||||
accum = 0;
|
||||
accum = -1;
|
||||
docUpto = 0;
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
|
||||
|
@ -1362,12 +1334,14 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
if (left >= BLOCK_SIZE) {
|
||||
forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
|
||||
pforUtil.decode(docIn, freqBuffer);
|
||||
accum = (int) docBuffer[BLOCK_SIZE - 1];
|
||||
} else {
|
||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true);
|
||||
prefixSum(docBuffer, left, accum);
|
||||
docBuffer[left] = NO_MORE_DOCS;
|
||||
accum = NO_MORE_DOCS;
|
||||
}
|
||||
accum = docBuffer[BLOCK_SIZE - 1];
|
||||
|
||||
docBufferUpto = 0;
|
||||
}
|
||||
|
||||
|
@ -1439,10 +1413,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target > nextSkipDoc) {
|
||||
if (target > accum) {
|
||||
advanceShallow(target);
|
||||
}
|
||||
if (docBufferUpto == BLOCK_SIZE) {
|
||||
refillDocs();
|
||||
}
|
||||
|
||||
|
@ -1576,7 +1548,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
private int docUpto; // how many docs we've read
|
||||
private int posDocUpTo; // for how many docs we've read positions, offsets, and payloads
|
||||
private int doc; // doc we last read
|
||||
private long accum; // accumulator for doc deltas
|
||||
private int accum; // accumulator for doc deltas
|
||||
private int position; // current position
|
||||
|
||||
// how many positions "behind" we are; nextPosition must
|
||||
|
@ -1685,7 +1657,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
doc = -1;
|
||||
accum = 0;
|
||||
accum = -1;
|
||||
docUpto = 0;
|
||||
posDocUpTo = 0;
|
||||
isFreqsRead = true;
|
||||
|
@ -1752,12 +1724,14 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
isFreqsRead =
|
||||
false; // freq block will be loaded lazily when necessary, we don't load it here
|
||||
}
|
||||
accum = (int) docBuffer[BLOCK_SIZE - 1];
|
||||
} else {
|
||||
readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
|
||||
prefixSum(docBuffer, left, accum);
|
||||
docBuffer[left] = NO_MORE_DOCS;
|
||||
accum = NO_MORE_DOCS;
|
||||
}
|
||||
accum = docBuffer[BLOCK_SIZE - 1];
|
||||
|
||||
docBufferUpto = 0;
|
||||
}
|
||||
|
||||
|
@ -1875,10 +1849,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target > nextSkipDoc) {
|
||||
if (target > accum) {
|
||||
advanceShallow(target);
|
||||
}
|
||||
if (docBufferUpto == BLOCK_SIZE) {
|
||||
if (seekTo >= 0) {
|
||||
docIn.seek(seekTo);
|
||||
seekTo = -1;
|
||||
|
|
|
@ -205,7 +205,7 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
|
|||
payStartFP = payOut.getFilePointer();
|
||||
}
|
||||
}
|
||||
lastDocID = 0;
|
||||
lastDocID = -1;
|
||||
lastBlockDocID = -1;
|
||||
skipWriter.resetSkip();
|
||||
this.norms = norms;
|
||||
|
@ -369,7 +369,7 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
|
|||
final int singletonDocID;
|
||||
if (state.docFreq == 1) {
|
||||
// pulse the singleton docid into the term dictionary, freq is implicitly totalTermFreq
|
||||
singletonDocID = (int) docDeltaBuffer[0];
|
||||
singletonDocID = (int) docDeltaBuffer[0] - 1;
|
||||
} else {
|
||||
singletonDocID = -1;
|
||||
// Group vInt encode the remaining doc deltas and freqs:
|
||||
|
@ -468,7 +468,7 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
|
|||
state.lastPosBlockOffset = lastPosBlockOffset;
|
||||
docBufferUpto = 0;
|
||||
posBufferUpto = 0;
|
||||
lastDocID = 0;
|
||||
lastDocID = -1;
|
||||
docCount = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.lucene.codecs.lucene99;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.codecs.MultiLevelSkipListReader;
|
||||
import org.apache.lucene.codecs.Lucene99MultiLevelSkipListReader;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
|
@ -48,7 +48,7 @@ import org.apache.lucene.store.IndexInput;
|
|||
*
|
||||
* <p>Therefore, we'll trim df before passing it to the interface. see trim(int)
|
||||
*/
|
||||
public class Lucene99SkipReader extends MultiLevelSkipListReader {
|
||||
public class Lucene99SkipReader extends Lucene99MultiLevelSkipListReader {
|
||||
private long[] docPointer;
|
||||
private long[] posPointer;
|
||||
private long[] payPointer;
|
||||
|
@ -119,7 +119,7 @@ public class Lucene99SkipReader extends MultiLevelSkipListReader {
|
|||
|
||||
/**
|
||||
* Returns the doc pointer of the doc to which the last call of {@link
|
||||
* MultiLevelSkipListReader#skipTo(int)} has skipped.
|
||||
* Lucene99MultiLevelSkipListReader#skipTo(int)} has skipped.
|
||||
*/
|
||||
public long getDocPointer() {
|
||||
return lastDocPointer;
|
||||
|
|
|
@ -131,7 +131,7 @@ public final class Lucene99SkipWriter extends MultiLevelSkipListWriter {
|
|||
private void initSkip() {
|
||||
if (!initialized) {
|
||||
super.resetSkip();
|
||||
Arrays.fill(lastSkipDoc, 0);
|
||||
Arrays.fill(lastSkipDoc, -1);
|
||||
Arrays.fill(lastSkipDocPointer, lastDocFP);
|
||||
if (fieldHasPositions) {
|
||||
Arrays.fill(lastSkipPosPointer, lastPosFP);
|
||||
|
|
Loading…
Reference in New Issue