mirror of https://github.com/apache/lucene.git
LUCENE-866: Adds multi-level skip lists to the posting lists.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@543076 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f86b74623d
commit
87ba4a1258
|
@ -210,6 +210,12 @@ Optimizations
|
||||||
these increases yield 10-18% overall performance gain vs the
|
these increases yield 10-18% overall performance gain vs the
|
||||||
previous 1K defaults. (Mike McCandless)
|
previous 1K defaults. (Mike McCandless)
|
||||||
|
|
||||||
|
7. LUCENE-866: Adds multi-level skip lists to the posting lists. This speeds
|
||||||
|
up most queries that use skipTo(), especially on big indexes with large posting
|
||||||
|
lists. For average AND queries the speedup is about 20%, for queries that
|
||||||
|
contain very frequence and very unique terms the speedup can be over 80%.
|
||||||
|
(Michael Busch)
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
|
|
||||||
1. LUCENE 791 && INFRA-1173: Infrastructure moved the Wiki to
|
1. LUCENE 791 && INFRA-1173: Infrastructure moved the Wiki to
|
||||||
|
|
|
@ -0,0 +1,114 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements the skip list reader for the default posting list format
|
||||||
|
* that stores positions and payloads.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class DefaultSkipListReader extends MultiLevelSkipListReader {
|
||||||
|
private boolean currentFieldStoresPayloads;
|
||||||
|
private long freqPointer[];
|
||||||
|
private long proxPointer[];
|
||||||
|
private int payloadLength[];
|
||||||
|
|
||||||
|
private long lastFreqPointer;
|
||||||
|
private long lastProxPointer;
|
||||||
|
private int lastPayloadLength;
|
||||||
|
|
||||||
|
|
||||||
|
DefaultSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
|
||||||
|
super(skipStream, maxSkipLevels, skipInterval);
|
||||||
|
freqPointer = new long[maxSkipLevels];
|
||||||
|
proxPointer = new long[maxSkipLevels];
|
||||||
|
payloadLength = new int[maxSkipLevels];
|
||||||
|
}
|
||||||
|
|
||||||
|
void init(long skipPointer, long freqBasePointer, long proxBasePointer, int df, boolean storesPayloads) {
|
||||||
|
super.init(skipPointer, df);
|
||||||
|
this.currentFieldStoresPayloads = storesPayloads;
|
||||||
|
lastFreqPointer = freqBasePointer;
|
||||||
|
lastProxPointer = proxBasePointer;
|
||||||
|
|
||||||
|
Arrays.fill(freqPointer, freqBasePointer);
|
||||||
|
Arrays.fill(proxPointer, proxBasePointer);
|
||||||
|
Arrays.fill(payloadLength, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the freq pointer of the doc to which the last call of
|
||||||
|
* {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */
|
||||||
|
long getFreqPointer() {
|
||||||
|
return lastFreqPointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the prox pointer of the doc to which the last call of
|
||||||
|
* {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */
|
||||||
|
long getProxPointer() {
|
||||||
|
return lastProxPointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the payload length of the payload stored just before
|
||||||
|
* the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)}
|
||||||
|
* has skipped. */
|
||||||
|
int getPayloadLength() {
|
||||||
|
return lastPayloadLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void seekChild(int level) throws IOException {
|
||||||
|
super.seekChild(level);
|
||||||
|
freqPointer[level] = lastFreqPointer;
|
||||||
|
proxPointer[level] = lastProxPointer;
|
||||||
|
payloadLength[level] = lastPayloadLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void setLastSkipData(int level) {
|
||||||
|
super.setLastSkipData(level);
|
||||||
|
lastFreqPointer = freqPointer[level];
|
||||||
|
lastProxPointer = proxPointer[level];
|
||||||
|
lastPayloadLength = payloadLength[level];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
|
||||||
|
int delta;
|
||||||
|
if (currentFieldStoresPayloads) {
|
||||||
|
// the current field stores payloads.
|
||||||
|
// if the doc delta is odd then we have
|
||||||
|
// to read the current payload length
|
||||||
|
// because it differs from the length of the
|
||||||
|
// previous payload
|
||||||
|
delta = skipStream.readVInt();
|
||||||
|
if ((delta & 1) != 0) {
|
||||||
|
payloadLength[level] = skipStream.readVInt();
|
||||||
|
}
|
||||||
|
delta >>>= 1;
|
||||||
|
} else {
|
||||||
|
delta = skipStream.readVInt();
|
||||||
|
}
|
||||||
|
freqPointer[level] += skipStream.readVInt();
|
||||||
|
proxPointer[level] += skipStream.readVInt();
|
||||||
|
|
||||||
|
return delta;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,124 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements the skip list writer for the default posting list format
|
||||||
|
* that stores positions and payloads.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class DefaultSkipListWriter extends MultiLevelSkipListWriter {
|
||||||
|
private int[] lastSkipDoc;
|
||||||
|
private int[] lastSkipPayloadLength;
|
||||||
|
private long[] lastSkipFreqPointer;
|
||||||
|
private long[] lastSkipProxPointer;
|
||||||
|
|
||||||
|
private IndexOutput freqOutput;
|
||||||
|
private IndexOutput proxOutput;
|
||||||
|
|
||||||
|
private int curDoc;
|
||||||
|
private boolean curStorePayloads;
|
||||||
|
private int curPayloadLength;
|
||||||
|
private long curFreqPointer;
|
||||||
|
private long curProxPointer;
|
||||||
|
|
||||||
|
DefaultSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) {
|
||||||
|
super(skipInterval, numberOfSkipLevels, docCount);
|
||||||
|
this.freqOutput = freqOutput;
|
||||||
|
this.proxOutput = proxOutput;
|
||||||
|
|
||||||
|
lastSkipDoc = new int[numberOfSkipLevels];
|
||||||
|
lastSkipPayloadLength = new int[numberOfSkipLevels];
|
||||||
|
lastSkipFreqPointer = new long[numberOfSkipLevels];
|
||||||
|
lastSkipProxPointer = new long[numberOfSkipLevels];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the values for the current skip data.
|
||||||
|
*/
|
||||||
|
void setSkipData(int doc, boolean storePayloads, int payloadLength) {
|
||||||
|
this.curDoc = doc;
|
||||||
|
this.curStorePayloads = storePayloads;
|
||||||
|
this.curPayloadLength = payloadLength;
|
||||||
|
this.curFreqPointer = freqOutput.getFilePointer();
|
||||||
|
this.curProxPointer = proxOutput.getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void resetSkip() {
|
||||||
|
super.resetSkip();
|
||||||
|
Arrays.fill(lastSkipDoc, 0);
|
||||||
|
Arrays.fill(lastSkipPayloadLength, -1); // we don't have to write the first length in the skip list
|
||||||
|
Arrays.fill(lastSkipFreqPointer, freqOutput.getFilePointer());
|
||||||
|
Arrays.fill(lastSkipProxPointer, proxOutput.getFilePointer());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
|
||||||
|
// To efficiently store payloads in the posting lists we do not store the length of
|
||||||
|
// every payload. Instead we omit the length for a payload if the previous payload had
|
||||||
|
// the same length.
|
||||||
|
// However, in order to support skipping the payload length at every skip point must be known.
|
||||||
|
// So we use the same length encoding that we use for the posting lists for the skip data as well:
|
||||||
|
// Case 1: current field does not store payloads
|
||||||
|
// SkipDatum --> DocSkip, FreqSkip, ProxSkip
|
||||||
|
// DocSkip,FreqSkip,ProxSkip --> VInt
|
||||||
|
// DocSkip records the document number before every SkipInterval th document in TermFreqs.
|
||||||
|
// Document numbers are represented as differences from the previous value in the sequence.
|
||||||
|
// Case 2: current field stores payloads
|
||||||
|
// SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
|
||||||
|
// DocSkip,FreqSkip,ProxSkip --> VInt
|
||||||
|
// PayloadLength --> VInt
|
||||||
|
// In this case DocSkip/2 is the difference between
|
||||||
|
// the current and the previous value. If DocSkip
|
||||||
|
// is odd, then a PayloadLength encoded as VInt follows,
|
||||||
|
// if DocSkip is even, then it is assumed that the
|
||||||
|
// current payload length equals the length at the previous
|
||||||
|
// skip point
|
||||||
|
if (curStorePayloads) {
|
||||||
|
int delta = curDoc - lastSkipDoc[level];
|
||||||
|
if (curPayloadLength == lastSkipPayloadLength[level]) {
|
||||||
|
// the current payload length equals the length at the previous skip point,
|
||||||
|
// so we don't store the length again
|
||||||
|
skipBuffer.writeVInt(delta * 2);
|
||||||
|
} else {
|
||||||
|
// the payload length is different from the previous one. We shift the DocSkip,
|
||||||
|
// set the lowest bit and store the current payload length as VInt.
|
||||||
|
skipBuffer.writeVInt(delta * 2 + 1);
|
||||||
|
skipBuffer.writeVInt(curPayloadLength);
|
||||||
|
lastSkipPayloadLength[level] = curPayloadLength;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// current field does not store payloads
|
||||||
|
skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
|
||||||
|
}
|
||||||
|
skipBuffer.writeVInt((int) (curFreqPointer - lastSkipFreqPointer[level]));
|
||||||
|
skipBuffer.writeVInt((int) (curProxPointer - lastSkipProxPointer[level]));
|
||||||
|
|
||||||
|
lastSkipDoc[level] = curDoc;
|
||||||
|
//System.out.println("write doc at level " + level + ": " + curDoc);
|
||||||
|
|
||||||
|
lastSkipFreqPointer[level] = curFreqPointer;
|
||||||
|
lastSkipProxPointer[level] = curProxPointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,271 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.BufferedIndexInput;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This abstract class reads skip lists with multiple levels.
|
||||||
|
*
|
||||||
|
* See {@link MultiLevelSkipListWriter} for the information about the encoding
|
||||||
|
* of the multi level skip lists.
|
||||||
|
*
|
||||||
|
* Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)}
|
||||||
|
* which defines the actual format of the skip data.
|
||||||
|
*/
|
||||||
|
abstract class MultiLevelSkipListReader {
|
||||||
|
// the maximum number of skip levels possible for this index
|
||||||
|
private int maxNumberOfSkipLevels;
|
||||||
|
|
||||||
|
// number of levels in this skip list
|
||||||
|
private int numberOfSkipLevels;
|
||||||
|
|
||||||
|
// Expert: defines the number of top skip levels to buffer in memory.
|
||||||
|
// Reducing this number results in less memory usage, but possibly
|
||||||
|
// slower performance due to more random I/Os.
|
||||||
|
// Please notice that the space each level occupies is limited by
|
||||||
|
// the skipInterval. The top level can not contain more than
|
||||||
|
// skipLevel entries, the second top level can not contain more
|
||||||
|
// than skipLevel^2 entries and so forth.
|
||||||
|
private int numberOfLevelsToBuffer = 1;
|
||||||
|
|
||||||
|
private int docCount;
|
||||||
|
private boolean haveSkipped;
|
||||||
|
|
||||||
|
private IndexInput[] skipStream; // skipStream for each level
|
||||||
|
private long skipPointer[]; // the start pointer of each skip level
|
||||||
|
private int skipInterval[]; // skipInterval of each level
|
||||||
|
private int[] numSkipped; // number of docs skipped per level
|
||||||
|
|
||||||
|
private int[] skipDoc; // doc id of current skip entry per level
|
||||||
|
private int lastDoc; // doc id of last read skip entry with docId <= target
|
||||||
|
private long[] childPointer; // child pointer of current skip entry per level
|
||||||
|
private long lastChildPointer; // childPointer of last read skip entry with docId <= target
|
||||||
|
|
||||||
|
private boolean inputIsBuffered;
|
||||||
|
|
||||||
|
public MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
|
||||||
|
this.skipStream = new IndexInput[maxSkipLevels];
|
||||||
|
this.skipPointer = new long[maxSkipLevels];
|
||||||
|
this.childPointer = new long[maxSkipLevels];
|
||||||
|
this.numSkipped = new int[maxSkipLevels];
|
||||||
|
this.maxNumberOfSkipLevels = maxSkipLevels;
|
||||||
|
this.skipInterval = new int[maxSkipLevels];
|
||||||
|
this.skipStream [0]= skipStream;
|
||||||
|
this.inputIsBuffered = (skipStream instanceof BufferedIndexInput);
|
||||||
|
this.skipInterval[0] = skipInterval;
|
||||||
|
for (int i = 1; i < maxSkipLevels; i++) {
|
||||||
|
// cache skip intervals
|
||||||
|
this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
|
||||||
|
}
|
||||||
|
skipDoc = new int[maxSkipLevels];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns the id of the doc to which the last call of {@link #skipTo(int)}
|
||||||
|
* has skipped. */
|
||||||
|
int getDoc() {
|
||||||
|
return lastDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Skips entries to the first beyond the current whose document number is
|
||||||
|
* greater than or equal to <i>target</i>. Returns the current doc count.
|
||||||
|
*/
|
||||||
|
int skipTo(int target) throws IOException {
|
||||||
|
if (!haveSkipped) {
|
||||||
|
// first time, load skip levels
|
||||||
|
loadSkipLevels();
|
||||||
|
haveSkipped = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// walk up the levels until highest level is found that has a skip
|
||||||
|
// for this target
|
||||||
|
int level = 0;
|
||||||
|
while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
|
||||||
|
level++;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (level >= 0) {
|
||||||
|
if (target > skipDoc[level]) {
|
||||||
|
if (!loadNextSkip(level)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// no more skips on this level, go down one level
|
||||||
|
if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
|
||||||
|
seekChild(level - 1);
|
||||||
|
}
|
||||||
|
level--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return numSkipped[0] - skipInterval[0] - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean loadNextSkip(int level) throws IOException {
|
||||||
|
// we have to skip, the target document is greater than the current
|
||||||
|
// skip list entry
|
||||||
|
setLastSkipData(level);
|
||||||
|
|
||||||
|
numSkipped[level] += skipInterval[level];
|
||||||
|
|
||||||
|
if (numSkipped[level] > docCount) {
|
||||||
|
// this skip list is exhausted
|
||||||
|
skipDoc[level] = Integer.MAX_VALUE;
|
||||||
|
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// read next skip entry
|
||||||
|
skipDoc[level] += readSkipData(level, skipStream[level]);
|
||||||
|
|
||||||
|
if (level != 0) {
|
||||||
|
// read the child pointer if we are not on the leaf level
|
||||||
|
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Seeks the skip entry on the given level */
|
||||||
|
protected void seekChild(int level) throws IOException {
|
||||||
|
skipStream[level].seek(lastChildPointer);
|
||||||
|
numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
|
||||||
|
skipDoc[level] = lastDoc;
|
||||||
|
if (level > 0) {
|
||||||
|
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void close() throws IOException {
|
||||||
|
for (int i = 1; i < skipStream.length; i++) {
|
||||||
|
if (skipStream[i] != null) {
|
||||||
|
skipStream[i].close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** initializes the reader */
|
||||||
|
void init(long skipPointer, int df) {
|
||||||
|
this.skipPointer[0] = skipPointer;
|
||||||
|
this.docCount = df;
|
||||||
|
Arrays.fill(skipDoc, 0);
|
||||||
|
Arrays.fill(numSkipped, 0);
|
||||||
|
haveSkipped = false;
|
||||||
|
for (int i = 1; i < numberOfSkipLevels; i++) {
|
||||||
|
skipStream[0] = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Loads the skip levels */
|
||||||
|
private void loadSkipLevels() throws IOException {
|
||||||
|
numberOfSkipLevels = docCount == 0 ? 0 : (int) Math.floor(Math.log(docCount) / Math.log(skipInterval[0]));
|
||||||
|
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
|
||||||
|
numberOfSkipLevels = maxNumberOfSkipLevels;
|
||||||
|
}
|
||||||
|
|
||||||
|
skipStream[0].seek(skipPointer[0]);
|
||||||
|
|
||||||
|
int toBuffer = numberOfLevelsToBuffer;
|
||||||
|
|
||||||
|
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
|
||||||
|
// the length of the current level
|
||||||
|
long length = skipStream[0].readVLong();
|
||||||
|
|
||||||
|
// the start pointer of the current level
|
||||||
|
skipPointer[i] = skipStream[0].getFilePointer();
|
||||||
|
if (toBuffer > 0) {
|
||||||
|
// buffer this level
|
||||||
|
skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
|
||||||
|
toBuffer--;
|
||||||
|
} else {
|
||||||
|
// clone this stream, it is already at the start of the current level
|
||||||
|
skipStream[i] = (IndexInput) skipStream[0].clone();
|
||||||
|
if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
|
||||||
|
((BufferedIndexInput) skipStream[i]).setBufferSize((int) length);
|
||||||
|
}
|
||||||
|
|
||||||
|
// move base stream beyond the current level
|
||||||
|
skipStream[0].seek(skipStream[0].getFilePointer() + length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// use base stream for the lowest level
|
||||||
|
skipPointer[0] = skipStream[0].getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Subclasses must implement the actual skip data encoding in this method.
|
||||||
|
*
|
||||||
|
* @param level the level skip data shall be read from
|
||||||
|
* @param skipStream the skip stream to read from
|
||||||
|
*/
|
||||||
|
protected abstract int readSkipData(int level, IndexInput skipStream) throws IOException;
|
||||||
|
|
||||||
|
/** Copies the values of the last read skip entry on this level */
|
||||||
|
protected void setLastSkipData(int level) {
|
||||||
|
lastDoc = skipDoc[level];
|
||||||
|
lastChildPointer = childPointer[level];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** used to buffer the top skip levels */
|
||||||
|
private final static class SkipBuffer extends IndexInput {
|
||||||
|
private byte[] data;
|
||||||
|
private long pointer;
|
||||||
|
private int pos;
|
||||||
|
|
||||||
|
SkipBuffer(IndexInput input, int length) throws IOException {
|
||||||
|
data = new byte[length];
|
||||||
|
pointer = input.getFilePointer();
|
||||||
|
input.readBytes(data, 0, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
data = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getFilePointer() {
|
||||||
|
return pointer + pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long length() {
|
||||||
|
return data.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte readByte() throws IOException {
|
||||||
|
return data[pos++];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
||||||
|
System.arraycopy(data, pos, b, offset, len);
|
||||||
|
pos += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void seek(long pos) throws IOException {
|
||||||
|
this.pos = (int) (pos - pointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,151 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This abstract class writes skip lists with multiple levels.
|
||||||
|
*
|
||||||
|
* Example for skipInterval = 3:
|
||||||
|
* c (skip level 2)
|
||||||
|
* c c c (skip level 1)
|
||||||
|
* x x x x x x x x x x (skip level 0)
|
||||||
|
* d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
|
||||||
|
* 3 6 9 12 15 18 21 24 27 30 (df)
|
||||||
|
*
|
||||||
|
* d - document
|
||||||
|
* x - skip data
|
||||||
|
* c - skip data with child pointer
|
||||||
|
*
|
||||||
|
* Skip level i contains every skipInterval-th entry from skip level i-1.
|
||||||
|
* Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))).
|
||||||
|
*
|
||||||
|
* Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1.
|
||||||
|
* This guarantess a logarithmic amount of skips to find the target document.
|
||||||
|
*
|
||||||
|
* While this class takes care of writing the different skip levels,
|
||||||
|
* subclasses must define the actual format of the skip data.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
abstract class MultiLevelSkipListWriter {
|
||||||
|
// number of levels in this skip list
|
||||||
|
private int numberOfSkipLevels;
|
||||||
|
|
||||||
|
// the skip interval in the list with level = 0
|
||||||
|
private int skipInterval;
|
||||||
|
|
||||||
|
// for every skip level a different buffer is used
|
||||||
|
private RAMOutputStream[] skipBuffer;
|
||||||
|
|
||||||
|
protected MultiLevelSkipListWriter(int skipInterval, int maxSkipLevels, int df) {
|
||||||
|
this.skipInterval = skipInterval;
|
||||||
|
|
||||||
|
// calculate the maximum number of skip levels for this document frequency
|
||||||
|
numberOfSkipLevels = df == 0 ? 0 : (int) Math.floor(Math.log(df) / Math.log(skipInterval));
|
||||||
|
|
||||||
|
// make sure it does not exceed maxSkipLevels
|
||||||
|
if (numberOfSkipLevels > maxSkipLevels) {
|
||||||
|
numberOfSkipLevels = maxSkipLevels;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void init() {
|
||||||
|
skipBuffer = new RAMOutputStream[numberOfSkipLevels];
|
||||||
|
for (int i = 0; i < numberOfSkipLevels; i++) {
|
||||||
|
skipBuffer[i] = new RAMOutputStream();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void resetSkip() {
|
||||||
|
// creates new buffers or empties the existing ones
|
||||||
|
if (skipBuffer == null) {
|
||||||
|
init();
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < skipBuffer.length; i++) {
|
||||||
|
skipBuffer[i].reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Subclasses must implement the actual skip data encoding in this method.
|
||||||
|
*
|
||||||
|
* @param level the level skip data shall be writting for
|
||||||
|
* @param skipBuffer the skip buffer to write to
|
||||||
|
*/
|
||||||
|
protected abstract void writeSkipData(int level, IndexOutput skipBuffer) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes the current skip data to the buffers. The current document frequency determines
|
||||||
|
* the max level is skip data is to be written to.
|
||||||
|
*
|
||||||
|
* @param df the current document frequency
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
void bufferSkip(int df) throws IOException {
|
||||||
|
int numLevels;
|
||||||
|
|
||||||
|
// determine max level
|
||||||
|
for (numLevels = 0; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval) {
|
||||||
|
numLevels++;
|
||||||
|
}
|
||||||
|
|
||||||
|
long childPointer = 0;
|
||||||
|
|
||||||
|
for (int level = 0; level < numLevels; level++) {
|
||||||
|
writeSkipData(level, skipBuffer[level]);
|
||||||
|
|
||||||
|
long newChildPointer = skipBuffer[level].getFilePointer();
|
||||||
|
|
||||||
|
if (level != 0) {
|
||||||
|
// store child pointers for all levels except the lowest
|
||||||
|
skipBuffer[level].writeVLong(childPointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
//remember the childPointer for the next level
|
||||||
|
childPointer = newChildPointer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes the buffered skip lists to the given output.
|
||||||
|
*
|
||||||
|
* @param output the IndexOutput the skip lists shall be written to
|
||||||
|
* @return the pointer the skip list starts
|
||||||
|
*/
|
||||||
|
long writeSkip(IndexOutput output) throws IOException {
|
||||||
|
long skipPointer = output.getFilePointer();
|
||||||
|
if (skipBuffer == null || skipBuffer.length == 0) return skipPointer;
|
||||||
|
|
||||||
|
for (int level = numberOfSkipLevels - 1; level > 0; level--) {
|
||||||
|
long length = skipBuffer[level].getFilePointer();
|
||||||
|
if (length > 0) {
|
||||||
|
output.writeVLong(length);
|
||||||
|
skipBuffer[level].writeTo(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
skipBuffer[0].writeTo(output);
|
||||||
|
|
||||||
|
return skipPointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.document.FieldSelector;
|
||||||
import org.apache.lucene.document.FieldSelectorResult;
|
import org.apache.lucene.document.FieldSelectorResult;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.RAMOutputStream;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
|
* The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
|
||||||
|
@ -51,6 +50,8 @@ final class SegmentMerger {
|
||||||
private Vector readers = new Vector();
|
private Vector readers = new Vector();
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
|
|
||||||
|
private int mergedDocs;
|
||||||
|
|
||||||
/** This ctor used only by test code.
|
/** This ctor used only by test code.
|
||||||
*
|
*
|
||||||
* @param dir The Directory to merge the other segments into
|
* @param dir The Directory to merge the other segments into
|
||||||
|
@ -93,14 +94,14 @@ final class SegmentMerger {
|
||||||
final int merge() throws CorruptIndexException, IOException {
|
final int merge() throws CorruptIndexException, IOException {
|
||||||
int value;
|
int value;
|
||||||
|
|
||||||
value = mergeFields();
|
mergedDocs = mergeFields();
|
||||||
mergeTerms();
|
mergeTerms();
|
||||||
mergeNorms();
|
mergeNorms();
|
||||||
|
|
||||||
if (fieldInfos.hasVectors())
|
if (fieldInfos.hasVectors())
|
||||||
mergeVectors();
|
mergeVectors();
|
||||||
|
|
||||||
return value;
|
return mergedDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -241,7 +242,9 @@ final class SegmentMerger {
|
||||||
private IndexOutput proxOutput = null;
|
private IndexOutput proxOutput = null;
|
||||||
private TermInfosWriter termInfosWriter = null;
|
private TermInfosWriter termInfosWriter = null;
|
||||||
private int skipInterval;
|
private int skipInterval;
|
||||||
|
private int maxSkipLevels;
|
||||||
private SegmentMergeQueue queue = null;
|
private SegmentMergeQueue queue = null;
|
||||||
|
private DefaultSkipListWriter skipListWriter = null;
|
||||||
|
|
||||||
private final void mergeTerms() throws CorruptIndexException, IOException {
|
private final void mergeTerms() throws CorruptIndexException, IOException {
|
||||||
try {
|
try {
|
||||||
|
@ -251,6 +254,8 @@ final class SegmentMerger {
|
||||||
new TermInfosWriter(directory, segment, fieldInfos,
|
new TermInfosWriter(directory, segment, fieldInfos,
|
||||||
termIndexInterval);
|
termIndexInterval);
|
||||||
skipInterval = termInfosWriter.skipInterval;
|
skipInterval = termInfosWriter.skipInterval;
|
||||||
|
maxSkipLevels = termInfosWriter.maxSkipLevels;
|
||||||
|
skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels, mergedDocs, freqOutput, proxOutput);
|
||||||
queue = new SegmentMergeQueue(readers.size());
|
queue = new SegmentMergeQueue(readers.size());
|
||||||
|
|
||||||
mergeTermInfos();
|
mergeTermInfos();
|
||||||
|
@ -319,7 +324,7 @@ final class SegmentMerger {
|
||||||
|
|
||||||
int df = appendPostings(smis, n); // append posting data
|
int df = appendPostings(smis, n); // append posting data
|
||||||
|
|
||||||
long skipPointer = writeSkip();
|
long skipPointer = skipListWriter.writeSkip(freqOutput);
|
||||||
|
|
||||||
if (df > 0) {
|
if (df > 0) {
|
||||||
// add an entry to the dictionary with pointers to prox and freq files
|
// add an entry to the dictionary with pointers to prox and freq files
|
||||||
|
@ -344,7 +349,7 @@ final class SegmentMerger {
|
||||||
throws CorruptIndexException, IOException {
|
throws CorruptIndexException, IOException {
|
||||||
int lastDoc = 0;
|
int lastDoc = 0;
|
||||||
int df = 0; // number of docs w/ term
|
int df = 0; // number of docs w/ term
|
||||||
resetSkip();
|
skipListWriter.resetSkip();
|
||||||
boolean storePayloads = fieldInfos.fieldInfo(smis[0].term.field).storePayloads;
|
boolean storePayloads = fieldInfos.fieldInfo(smis[0].term.field).storePayloads;
|
||||||
int lastPayloadLength = -1; // ensures that we write the first length
|
int lastPayloadLength = -1; // ensures that we write the first length
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
|
@ -366,7 +371,8 @@ final class SegmentMerger {
|
||||||
df++;
|
df++;
|
||||||
|
|
||||||
if ((df % skipInterval) == 0) {
|
if ((df % skipInterval) == 0) {
|
||||||
bufferSkip(lastDoc, storePayloads, lastPayloadLength);
|
skipListWriter.setSkipData(lastDoc, storePayloads, lastPayloadLength);
|
||||||
|
skipListWriter.bufferSkip(df);
|
||||||
}
|
}
|
||||||
|
|
||||||
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
|
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
|
||||||
|
@ -413,75 +419,6 @@ final class SegmentMerger {
|
||||||
return df;
|
return df;
|
||||||
}
|
}
|
||||||
|
|
||||||
private RAMOutputStream skipBuffer = new RAMOutputStream();
|
|
||||||
private int lastSkipDoc;
|
|
||||||
private int lastSkipPayloadLength;
|
|
||||||
private long lastSkipFreqPointer;
|
|
||||||
private long lastSkipProxPointer;
|
|
||||||
|
|
||||||
private void resetSkip() {
|
|
||||||
skipBuffer.reset();
|
|
||||||
lastSkipDoc = 0;
|
|
||||||
lastSkipPayloadLength = -1; // we don't have to write the first length in the skip list
|
|
||||||
lastSkipFreqPointer = freqOutput.getFilePointer();
|
|
||||||
lastSkipProxPointer = proxOutput.getFilePointer();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void bufferSkip(int doc, boolean storePayloads, int payloadLength) throws IOException {
|
|
||||||
long freqPointer = freqOutput.getFilePointer();
|
|
||||||
long proxPointer = proxOutput.getFilePointer();
|
|
||||||
|
|
||||||
// To efficiently store payloads in the posting lists we do not store the length of
|
|
||||||
// every payload. Instead we omit the length for a payload if the previous payload had
|
|
||||||
// the same length.
|
|
||||||
// However, in order to support skipping the payload length at every skip point must be known.
|
|
||||||
// So we use the same length encoding that we use for the posting lists for the skip data as well:
|
|
||||||
// Case 1: current field does not store payloads
|
|
||||||
// SkipDatum --> DocSkip, FreqSkip, ProxSkip
|
|
||||||
// DocSkip,FreqSkip,ProxSkip --> VInt
|
|
||||||
// DocSkip records the document number before every SkipInterval th document in TermFreqs.
|
|
||||||
// Document numbers are represented as differences from the previous value in the sequence.
|
|
||||||
// Case 2: current field stores payloads
|
|
||||||
// SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
|
|
||||||
// DocSkip,FreqSkip,ProxSkip --> VInt
|
|
||||||
// PayloadLength --> VInt
|
|
||||||
// In this case DocSkip/2 is the difference between
|
|
||||||
// the current and the previous value. If DocSkip
|
|
||||||
// is odd, then a PayloadLength encoded as VInt follows,
|
|
||||||
// if DocSkip is even, then it is assumed that the
|
|
||||||
// current payload length equals the length at the previous
|
|
||||||
// skip point
|
|
||||||
if (storePayloads) {
|
|
||||||
int delta = doc - lastSkipDoc;
|
|
||||||
if (payloadLength == lastSkipPayloadLength) {
|
|
||||||
// the current payload length equals the length at the previous skip point,
|
|
||||||
// so we don't store the length again
|
|
||||||
skipBuffer.writeVInt(delta * 2);
|
|
||||||
} else {
|
|
||||||
// the payload length is different from the previous one. We shift the DocSkip,
|
|
||||||
// set the lowest bit and store the current payload length as VInt.
|
|
||||||
skipBuffer.writeVInt(delta * 2 + 1);
|
|
||||||
skipBuffer.writeVInt(payloadLength);
|
|
||||||
lastSkipPayloadLength = payloadLength;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// current field does not store payloads
|
|
||||||
skipBuffer.writeVInt(doc - lastSkipDoc);
|
|
||||||
}
|
|
||||||
skipBuffer.writeVInt((int) (freqPointer - lastSkipFreqPointer));
|
|
||||||
skipBuffer.writeVInt((int) (proxPointer - lastSkipProxPointer));
|
|
||||||
|
|
||||||
lastSkipDoc = doc;
|
|
||||||
lastSkipFreqPointer = freqPointer;
|
|
||||||
lastSkipProxPointer = proxPointer;
|
|
||||||
}
|
|
||||||
|
|
||||||
private long writeSkip() throws IOException {
|
|
||||||
long skipPointer = freqOutput.getFilePointer();
|
|
||||||
skipBuffer.writeTo(freqOutput);
|
|
||||||
return skipPointer;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void mergeNorms() throws IOException {
|
private void mergeNorms() throws IOException {
|
||||||
byte[] normBuffer = null;
|
byte[] normBuffer = null;
|
||||||
IndexOutput output = null;
|
IndexOutput output = null;
|
||||||
|
|
|
@ -31,16 +31,15 @@ class SegmentTermDocs implements TermDocs {
|
||||||
int freq;
|
int freq;
|
||||||
|
|
||||||
private int skipInterval;
|
private int skipInterval;
|
||||||
private int numSkips;
|
private int maxSkipLevels;
|
||||||
private int skipCount;
|
private DefaultSkipListReader skipListReader;
|
||||||
private IndexInput skipStream;
|
|
||||||
private int skipDoc;
|
private long freqBasePointer;
|
||||||
private long freqPointer;
|
private long proxBasePointer;
|
||||||
private long proxPointer;
|
|
||||||
private long skipPointer;
|
private long skipPointer;
|
||||||
private boolean haveSkipped;
|
private boolean haveSkipped;
|
||||||
|
|
||||||
private int payloadLengthAtLastSkip;
|
|
||||||
protected boolean currentFieldStoresPayloads;
|
protected boolean currentFieldStoresPayloads;
|
||||||
|
|
||||||
protected SegmentTermDocs(SegmentReader parent) {
|
protected SegmentTermDocs(SegmentReader parent) {
|
||||||
|
@ -48,6 +47,7 @@ class SegmentTermDocs implements TermDocs {
|
||||||
this.freqStream = (IndexInput) parent.freqStream.clone();
|
this.freqStream = (IndexInput) parent.freqStream.clone();
|
||||||
this.deletedDocs = parent.deletedDocs;
|
this.deletedDocs = parent.deletedDocs;
|
||||||
this.skipInterval = parent.tis.getSkipInterval();
|
this.skipInterval = parent.tis.getSkipInterval();
|
||||||
|
this.maxSkipLevels = parent.tis.getMaxSkipLevels();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void seek(Term term) throws IOException {
|
public void seek(Term term) throws IOException {
|
||||||
|
@ -74,7 +74,6 @@ class SegmentTermDocs implements TermDocs {
|
||||||
|
|
||||||
void seek(TermInfo ti, Term term) throws IOException {
|
void seek(TermInfo ti, Term term) throws IOException {
|
||||||
count = 0;
|
count = 0;
|
||||||
payloadLengthAtLastSkip = 0;
|
|
||||||
FieldInfo fi = parent.fieldInfos.fieldInfo(term.field);
|
FieldInfo fi = parent.fieldInfos.fieldInfo(term.field);
|
||||||
currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
|
currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false;
|
||||||
if (ti == null) {
|
if (ti == null) {
|
||||||
|
@ -82,21 +81,18 @@ class SegmentTermDocs implements TermDocs {
|
||||||
} else {
|
} else {
|
||||||
df = ti.docFreq;
|
df = ti.docFreq;
|
||||||
doc = 0;
|
doc = 0;
|
||||||
skipDoc = 0;
|
freqBasePointer = ti.freqPointer;
|
||||||
skipCount = 0;
|
proxBasePointer = ti.proxPointer;
|
||||||
numSkips = df / skipInterval;
|
skipPointer = freqBasePointer + ti.skipOffset;
|
||||||
freqPointer = ti.freqPointer;
|
freqStream.seek(freqBasePointer);
|
||||||
proxPointer = ti.proxPointer;
|
|
||||||
skipPointer = freqPointer + ti.skipOffset;
|
|
||||||
freqStream.seek(freqPointer);
|
|
||||||
haveSkipped = false;
|
haveSkipped = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
freqStream.close();
|
freqStream.close();
|
||||||
if (skipStream != null)
|
if (skipListReader != null)
|
||||||
skipStream.close();
|
skipListReader.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
public final int doc() { return doc; }
|
public final int doc() { return doc; }
|
||||||
|
@ -111,11 +107,11 @@ class SegmentTermDocs implements TermDocs {
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int docCode = freqStream.readVInt();
|
int docCode = freqStream.readVInt();
|
||||||
doc += docCode >>> 1; // shift off low bit
|
doc += docCode >>> 1; // shift off low bit
|
||||||
if ((docCode & 1) != 0) // if low bit is set
|
if ((docCode & 1) != 0) // if low bit is set
|
||||||
freq = 1; // freq is one
|
freq = 1; // freq is one
|
||||||
else
|
else
|
||||||
freq = freqStream.readVInt(); // else read freq
|
freq = freqStream.readVInt(); // else read freq
|
||||||
|
|
||||||
count++;
|
count++;
|
||||||
|
|
||||||
|
@ -135,11 +131,11 @@ class SegmentTermDocs implements TermDocs {
|
||||||
|
|
||||||
// manually inlined call to next() for speed
|
// manually inlined call to next() for speed
|
||||||
final int docCode = freqStream.readVInt();
|
final int docCode = freqStream.readVInt();
|
||||||
doc += docCode >>> 1; // shift off low bit
|
doc += docCode >>> 1; // shift off low bit
|
||||||
if ((docCode & 1) != 0) // if low bit is set
|
if ((docCode & 1) != 0) // if low bit is set
|
||||||
freq = 1; // freq is one
|
freq = 1; // freq is one
|
||||||
else
|
else
|
||||||
freq = freqStream.readVInt(); // else read freq
|
freq = freqStream.readVInt(); // else read freq
|
||||||
count++;
|
count++;
|
||||||
|
|
||||||
if (deletedDocs == null || !deletedDocs.get(doc)) {
|
if (deletedDocs == null || !deletedDocs.get(doc)) {
|
||||||
|
@ -157,64 +153,22 @@ class SegmentTermDocs implements TermDocs {
|
||||||
/** Optimized implementation. */
|
/** Optimized implementation. */
|
||||||
public boolean skipTo(int target) throws IOException {
|
public boolean skipTo(int target) throws IOException {
|
||||||
if (df >= skipInterval) { // optimized case
|
if (df >= skipInterval) { // optimized case
|
||||||
|
if (skipListReader == null)
|
||||||
|
skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone
|
||||||
|
|
||||||
if (skipStream == null)
|
if (!haveSkipped) { // lazily initialize skip stream
|
||||||
skipStream = (IndexInput) freqStream.clone(); // lazily clone
|
skipListReader.init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads);
|
||||||
|
|
||||||
if (!haveSkipped) { // lazily seek skip stream
|
|
||||||
skipStream.seek(skipPointer);
|
|
||||||
haveSkipped = true;
|
haveSkipped = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// scan skip data
|
int newCount = skipListReader.skipTo(target);
|
||||||
int lastSkipDoc = skipDoc;
|
if (newCount > count) {
|
||||||
int lastPayloadLength = 0;
|
freqStream.seek(skipListReader.getFreqPointer());
|
||||||
long lastFreqPointer = freqStream.getFilePointer();
|
skipProx(skipListReader.getProxPointer(), skipListReader.getPayloadLength());
|
||||||
long lastProxPointer = -1;
|
|
||||||
int numSkipped = -1 - (count % skipInterval);
|
|
||||||
|
|
||||||
while (target > skipDoc) {
|
doc = skipListReader.getDoc();
|
||||||
lastSkipDoc = skipDoc;
|
count = newCount;
|
||||||
lastFreqPointer = freqPointer;
|
|
||||||
lastProxPointer = proxPointer;
|
|
||||||
lastPayloadLength = payloadLengthAtLastSkip;
|
|
||||||
|
|
||||||
if (skipDoc != 0 && skipDoc >= doc)
|
|
||||||
numSkipped += skipInterval;
|
|
||||||
|
|
||||||
if(skipCount >= numSkips)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (currentFieldStoresPayloads) {
|
|
||||||
// the current field stores payloads.
|
|
||||||
// if the doc delta is odd then we have
|
|
||||||
// to read the current payload length
|
|
||||||
// because it differs from the length of the
|
|
||||||
// previous payload
|
|
||||||
int delta = skipStream.readVInt();
|
|
||||||
if ((delta & 1) != 0) {
|
|
||||||
payloadLengthAtLastSkip = skipStream.readVInt();
|
|
||||||
}
|
|
||||||
delta >>>= 1;
|
|
||||||
skipDoc += delta;
|
|
||||||
} else {
|
|
||||||
skipDoc += skipStream.readVInt();
|
|
||||||
}
|
|
||||||
freqPointer += skipStream.readVInt();
|
|
||||||
proxPointer += skipStream.readVInt();
|
|
||||||
|
|
||||||
skipCount++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we found something to skip, then skip it
|
|
||||||
if (lastFreqPointer > freqStream.getFilePointer()) {
|
|
||||||
freqStream.seek(lastFreqPointer);
|
|
||||||
skipProx(lastProxPointer, lastPayloadLength);
|
|
||||||
|
|
||||||
doc = lastSkipDoc;
|
|
||||||
count += numSkipped;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// done skipping, now just scan
|
// done skipping, now just scan
|
||||||
|
@ -224,5 +178,4 @@ class SegmentTermDocs implements TermDocs {
|
||||||
} while (target > doc);
|
} while (target > doc);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,6 +37,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
long indexPointer = 0;
|
long indexPointer = 0;
|
||||||
int indexInterval;
|
int indexInterval;
|
||||||
int skipInterval;
|
int skipInterval;
|
||||||
|
int maxSkipLevels;
|
||||||
private int formatM1SkipInterval;
|
private int formatM1SkipInterval;
|
||||||
|
|
||||||
SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi)
|
SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi)
|
||||||
|
@ -44,6 +45,7 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
input = i;
|
input = i;
|
||||||
fieldInfos = fis;
|
fieldInfos = fis;
|
||||||
isIndex = isi;
|
isIndex = isi;
|
||||||
|
maxSkipLevels = 1; // use single-level skip lists for formats > -3
|
||||||
|
|
||||||
int firstInt = input.readInt();
|
int firstInt = input.readInt();
|
||||||
if (firstInt >= 0) {
|
if (firstInt >= 0) {
|
||||||
|
@ -54,7 +56,6 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
// back-compatible settings
|
// back-compatible settings
|
||||||
indexInterval = 128;
|
indexInterval = 128;
|
||||||
skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization
|
skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// we have a format version number
|
// we have a format version number
|
||||||
format = firstInt;
|
format = firstInt;
|
||||||
|
@ -73,10 +74,13 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
|
||||||
// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
|
// switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
|
||||||
// skipTo implementation of these versions
|
// skipTo implementation of these versions
|
||||||
skipInterval = Integer.MAX_VALUE;
|
skipInterval = Integer.MAX_VALUE;
|
||||||
}
|
} else {
|
||||||
else{
|
|
||||||
indexInterval = input.readInt();
|
indexInterval = input.readInt();
|
||||||
skipInterval = input.readInt();
|
skipInterval = input.readInt();
|
||||||
|
if (format == -3) {
|
||||||
|
// this new format introduces multi-level skipping
|
||||||
|
maxSkipLevels = input.readInt();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,10 @@ final class TermInfosReader {
|
||||||
return origEnum.skipInterval;
|
return origEnum.skipInterval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getMaxSkipLevels() {
|
||||||
|
return origEnum.maxSkipLevels;
|
||||||
|
}
|
||||||
|
|
||||||
final void close() throws IOException {
|
final void close() throws IOException {
|
||||||
if (origEnum != null)
|
if (origEnum != null)
|
||||||
origEnum.close();
|
origEnum.close();
|
||||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.util.StringHelper;
|
||||||
|
|
||||||
final class TermInfosWriter {
|
final class TermInfosWriter {
|
||||||
/** The file format version, a negative number. */
|
/** The file format version, a negative number. */
|
||||||
public static final int FORMAT = -2;
|
public static final int FORMAT = -3;
|
||||||
|
|
||||||
private FieldInfos fieldInfos;
|
private FieldInfos fieldInfos;
|
||||||
private IndexOutput output;
|
private IndexOutput output;
|
||||||
|
@ -57,6 +57,11 @@ final class TermInfosWriter {
|
||||||
* accelerable cases. More detailed experiments would be useful here. */
|
* accelerable cases. More detailed experiments would be useful here. */
|
||||||
int skipInterval = 16;
|
int skipInterval = 16;
|
||||||
|
|
||||||
|
/** Expert: The maximum number of skip levels. Smaller values result in
|
||||||
|
* slightly smaller indexes, but slower skipping in big posting lists.
|
||||||
|
*/
|
||||||
|
int maxSkipLevels = 10;
|
||||||
|
|
||||||
private long lastIndexPointer = 0;
|
private long lastIndexPointer = 0;
|
||||||
private boolean isIndex = false;
|
private boolean isIndex = false;
|
||||||
|
|
||||||
|
@ -85,6 +90,7 @@ final class TermInfosWriter {
|
||||||
output.writeLong(0); // leave space for size
|
output.writeLong(0); // leave space for size
|
||||||
output.writeInt(indexInterval); // write indexInterval
|
output.writeInt(indexInterval); // write indexInterval
|
||||||
output.writeInt(skipInterval); // write skipInterval
|
output.writeInt(skipInterval); // write skipInterval
|
||||||
|
output.writeInt(maxSkipLevels); // write maxSkipLevels
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Adds a new <Term, TermInfo> pair to the set.
|
/** Adds a new <Term, TermInfo> pair to the set.
|
||||||
|
|
|
@ -1139,7 +1139,7 @@
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
TermInfoFile (.tis)-->
|
TermInfoFile (.tis)-->
|
||||||
TIVersion, TermCount, IndexInterval, SkipInterval, TermInfos
|
TIVersion, TermCount, IndexInterval, SkipInterval, MaxSkipLevels, TermInfos
|
||||||
</p>
|
</p>
|
||||||
<p>TIVersion -->
|
<p>TIVersion -->
|
||||||
UInt32
|
UInt32
|
||||||
|
@ -1153,6 +1153,9 @@
|
||||||
<p>SkipInterval -->
|
<p>SkipInterval -->
|
||||||
UInt32
|
UInt32
|
||||||
</p>
|
</p>
|
||||||
|
<p>MaxSkipLevels -->
|
||||||
|
UInt32
|
||||||
|
</p>
|
||||||
<p>TermInfos -->
|
<p>TermInfos -->
|
||||||
<TermInfo>
|
<TermInfo>
|
||||||
<sup>TermCount</sup>
|
<sup>TermCount</sup>
|
||||||
|
@ -1209,7 +1212,8 @@
|
||||||
particular, it is the number of bytes
|
particular, it is the number of bytes
|
||||||
after TermFreqs that the SkipData starts.
|
after TermFreqs that the SkipData starts.
|
||||||
In other words, it is the length of the
|
In other words, it is the length of the
|
||||||
TermFreq data.
|
TermFreq data. SkipDelta is only stored
|
||||||
|
if DocFreq is not smaller than SkipInterval.
|
||||||
</p>
|
</p>
|
||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
|
@ -1233,7 +1237,7 @@
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
TermInfoIndex (.tii)-->
|
TermInfoIndex (.tii)-->
|
||||||
TIVersion, IndexTermCount, IndexInterval, SkipInterval, TermIndices
|
TIVersion, IndexTermCount, IndexInterval, SkipInterval, MaxSkipLevels, TermIndices
|
||||||
</p>
|
</p>
|
||||||
<p>TIVersion -->
|
<p>TIVersion -->
|
||||||
UInt32
|
UInt32
|
||||||
|
@ -1261,8 +1265,11 @@
|
||||||
</p>
|
</p>
|
||||||
<p>SkipInterval is the fraction of TermDocs stored in skip tables. It is used to accelerate TermDocs.skipTo(int).
|
<p>SkipInterval is the fraction of TermDocs stored in skip tables. It is used to accelerate TermDocs.skipTo(int).
|
||||||
Larger values result in smaller indexes, greater acceleration, but fewer accelerable cases, while
|
Larger values result in smaller indexes, greater acceleration, but fewer accelerable cases, while
|
||||||
smaller values result in bigger indexes, less acceleration and more
|
smaller values result in bigger indexes, less acceleration (in case of a small value for MaxSkipLevels) and more
|
||||||
accelerable cases.</p>
|
accelerable cases.</p>
|
||||||
|
<p>MaxSkipLevels is the max. number of skip levels stored for each term in the .frq file. A low value results in
|
||||||
|
smaller indexes but less acceleration, a larger value results in slighly larger indexes but greater acceleration.
|
||||||
|
See format of .frq file for more information about skip levels.</p>
|
||||||
</li>
|
</li>
|
||||||
</ol>
|
</ol>
|
||||||
</section>
|
</section>
|
||||||
|
@ -1286,15 +1293,23 @@
|
||||||
DocDelta, Freq?
|
DocDelta, Freq?
|
||||||
</p>
|
</p>
|
||||||
<p>SkipData -->
|
<p>SkipData -->
|
||||||
|
<<SkipLevelLength, SkipLevel>
|
||||||
|
<sup>NumSkipLevels-1</sup>, SkipLevel>
|
||||||
<SkipDatum>
|
<SkipDatum>
|
||||||
<sup>DocFreq/SkipInterval</sup>
|
</p>
|
||||||
|
<p>SkipLevel -->
|
||||||
|
<SkipDatum>
|
||||||
|
<sup>DocFreq/(SkipInterval^(Level + 1))</sup>
|
||||||
</p>
|
</p>
|
||||||
<p>SkipDatum -->
|
<p>SkipDatum -->
|
||||||
DocSkip,PayloadLength?,FreqSkip,ProxSkip
|
DocSkip,PayloadLength?,FreqSkip,ProxSkip,SkipChildLevelPointer?
|
||||||
</p>
|
</p>
|
||||||
<p>DocDelta,Freq,DocSkip,PayloadLength,FreqSkip,ProxSkip -->
|
<p>DocDelta,Freq,DocSkip,PayloadLength,FreqSkip,ProxSkip -->
|
||||||
VInt
|
VInt
|
||||||
</p>
|
</p>
|
||||||
|
<p>SkipChildLevelPointer -->
|
||||||
|
VLong
|
||||||
|
</p>
|
||||||
<p>TermFreqs
|
<p>TermFreqs
|
||||||
are ordered by term (the term is implicit, from the .tis file).
|
are ordered by term (the term is implicit, from the .tis file).
|
||||||
</p>
|
</p>
|
||||||
|
@ -1362,6 +1377,18 @@
|
||||||
<sup>nd</sup>
|
<sup>nd</sup>
|
||||||
starts.
|
starts.
|
||||||
</p>
|
</p>
|
||||||
|
<p>Lucene 2.2 introduces the notion of skip levels. Each term can have multiple skip levels.
|
||||||
|
The amount of skip levels for a term is NumSkipLevels = Min(MaxSkipLevels, floor(log(DocFreq/log(SkipInterval)))).
|
||||||
|
The number of SkipData entries for a skip level is DocFreq/(SkipInterval^(Level + 1)), whereas the lowest skip
|
||||||
|
level is Level=0. <br></br>
|
||||||
|
Example: SkipInterval = 4, MaxSkipLevels = 2, DocFreq = 35. Then skip level 0 has 8 SkipData entries,
|
||||||
|
containing the 3<sup>rd</sup>, 7<sup>th</sup>, 11<sup>th</sup>, 15<sup>th</sup>, 19<sup>th</sup>, 23<sup>rd</sup>,
|
||||||
|
27<sup>th</sup>, and 31<sup>st</sup> document numbers in TermFreqs. Skip level 1 has 2 SkipData entries, containing the
|
||||||
|
15<sup>th</sup> and 31<sup>st</sup> document numbers in TermFreqs. <br></br>
|
||||||
|
The SkipData entries on all upper levels > 0 contain a SkipChildLevelPointer referencing the corresponding SkipData
|
||||||
|
entry in level-1. In the example has entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a pointer
|
||||||
|
to entry 31 on level 0.
|
||||||
|
</p>
|
||||||
|
|
||||||
</section>
|
</section>
|
||||||
<section id="Positions"><title>Positions</title>
|
<section id="Positions"><title>Positions</title>
|
||||||
|
|
|
@ -0,0 +1,154 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.Field.Index;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This testcase tests whether multi-level skipping is being used
|
||||||
|
* to reduce I/O while skipping through posting lists.
|
||||||
|
*
|
||||||
|
* Skipping in general is already covered by several other
|
||||||
|
* testcases.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class TestMultiLevelSkipList extends TestCase {
|
||||||
|
public void testSimpleSkip() throws IOException {
|
||||||
|
RAMDirectory dir = new RAMDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true);
|
||||||
|
Term term = new Term("test", "a");
|
||||||
|
for (int i = 0; i < 5000; i++) {
|
||||||
|
Document d1 = new Document();
|
||||||
|
d1.add(new Field(term.field(), term.text(), Store.NO, Index.TOKENIZED));
|
||||||
|
writer.addDocument(d1);
|
||||||
|
}
|
||||||
|
writer.flush();
|
||||||
|
writer.optimize();
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexReader reader = IndexReader.open(dir);
|
||||||
|
SegmentTermPositions tp = (SegmentTermPositions) reader.termPositions(term);
|
||||||
|
tp.freqStream = new CountingStream(tp.freqStream);
|
||||||
|
|
||||||
|
tp.next();
|
||||||
|
|
||||||
|
checkSkipTo(tp, 14, 185); // no skips
|
||||||
|
checkSkipTo(tp, 17, 190); // one skip on level 0
|
||||||
|
checkSkipTo(tp, 287, 200); // one skip on level 1, two on level 0
|
||||||
|
|
||||||
|
// this test would fail if we had only one skip level,
|
||||||
|
// because than more bytes would be read from the freqStream
|
||||||
|
checkSkipTo(tp, 4800, 250);// one skip on level 2
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkSkipTo(TermPositions tp, int target, int maxCounter) throws IOException {
|
||||||
|
tp.skipTo(target);
|
||||||
|
if (maxCounter < counter) {
|
||||||
|
fail("Too many bytes read: " + counter);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals("Wrong document " + tp.doc() + " after skipTo target " + target, target, tp.doc());
|
||||||
|
assertEquals("Frequency is not 1: " + tp.freq(), 1,tp.freq());
|
||||||
|
tp.nextPosition();
|
||||||
|
byte[] b = new byte[1];
|
||||||
|
tp.getPayload(b, 0);
|
||||||
|
assertEquals("Wrong payload for the target " + target + ": " + b[0], (byte) target, b[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class PayloadAnalyzer extends Analyzer {
|
||||||
|
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||||
|
return new PayloadFilter(new LowerCaseTokenizer(reader));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class PayloadFilter extends TokenFilter {
|
||||||
|
static int count = 0;
|
||||||
|
|
||||||
|
protected PayloadFilter(TokenStream input) {
|
||||||
|
super(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Token next() throws IOException {
|
||||||
|
Token t = input.next();
|
||||||
|
if (t != null) {
|
||||||
|
t.setPayload(new Payload(new byte[] { (byte) count++ }));
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private int counter = 0;
|
||||||
|
|
||||||
|
// Simply extends IndexInput in a way that we are able to count the number
|
||||||
|
// of bytes read
|
||||||
|
class CountingStream extends IndexInput {
|
||||||
|
private IndexInput input;
|
||||||
|
|
||||||
|
CountingStream(IndexInput input) {
|
||||||
|
this.input = input;
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte readByte() throws IOException {
|
||||||
|
TestMultiLevelSkipList.this.counter++;
|
||||||
|
return this.input.readByte();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
||||||
|
TestMultiLevelSkipList.this.counter += len;
|
||||||
|
this.input.readBytes(b, offset, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
this.input.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getFilePointer() {
|
||||||
|
return this.input.getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void seek(long pos) throws IOException {
|
||||||
|
this.input.seek(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
public long length() {
|
||||||
|
return this.input.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object clone() {
|
||||||
|
return new CountingStream((IndexInput) this.input.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue