diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSkipListReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSkipListReader.java new file mode 100644 index 00000000000..5d3db46fb2d --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xSkipListReader.java @@ -0,0 +1,117 @@ +package org.apache.lucene.codecs.lucene3x; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.codecs.MultiLevelSkipListReader; +import org.apache.lucene.store.IndexInput; + +/** + * @deprecated (4.0) This is only used to read indexes created + * before 4.0. + */ +@Deprecated +final class Lucene3xSkipListReader extends MultiLevelSkipListReader { + private boolean currentFieldStoresPayloads; + private long freqPointer[]; + private long proxPointer[]; + private int payloadLength[]; + + private long lastFreqPointer; + private long lastProxPointer; + private int lastPayloadLength; + + public Lucene3xSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) { + super(skipStream, maxSkipLevels, skipInterval); + freqPointer = new long[maxSkipLevels]; + proxPointer = new long[maxSkipLevels]; + payloadLength = new int[maxSkipLevels]; + } + + public void init(long skipPointer, long freqBasePointer, long proxBasePointer, int df, boolean storesPayloads) { + super.init(skipPointer, df); + this.currentFieldStoresPayloads = storesPayloads; + lastFreqPointer = freqBasePointer; + lastProxPointer = proxBasePointer; + + Arrays.fill(freqPointer, freqBasePointer); + Arrays.fill(proxPointer, proxBasePointer); + Arrays.fill(payloadLength, 0); + } + + /** Returns the freq pointer of the doc to which the last call of + * {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */ + public long getFreqPointer() { + return lastFreqPointer; + } + + /** Returns the prox pointer of the doc to which the last call of + * {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */ + public long getProxPointer() { + return lastProxPointer; + } + + /** Returns the payload length of the payload stored just before + * the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} + * has skipped. */ + public int getPayloadLength() { + return lastPayloadLength; + } + + @Override + protected void seekChild(int level) throws IOException { + super.seekChild(level); + freqPointer[level] = lastFreqPointer; + proxPointer[level] = lastProxPointer; + payloadLength[level] = lastPayloadLength; + } + + @Override + protected void setLastSkipData(int level) { + super.setLastSkipData(level); + lastFreqPointer = freqPointer[level]; + lastProxPointer = proxPointer[level]; + lastPayloadLength = payloadLength[level]; + } + + @Override + protected int readSkipData(int level, IndexInput skipStream) throws IOException { + int delta; + if (currentFieldStoresPayloads) { + // the current field stores payloads. + // if the doc delta is odd then we have + // to read the current payload length + // because it differs from the length of the + // previous payload + delta = skipStream.readVInt(); + if ((delta & 1) != 0) { + payloadLength[level] = skipStream.readVInt(); + } + delta >>>= 1; + } else { + delta = skipStream.readVInt(); + } + + freqPointer[level] += skipStream.readVInt(); + proxPointer[level] += skipStream.readVInt(); + + return delta; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java index 15ea83267aa..4eb3b69d7a2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java @@ -19,7 +19,6 @@ package org.apache.lucene.codecs.lucene3x; import java.io.IOException; -import org.apache.lucene.codecs.lucene40.Lucene40SkipListReader; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfos; @@ -43,7 +42,7 @@ class SegmentTermDocs { private int skipInterval; private int maxSkipLevels; - private Lucene40SkipListReader skipListReader; + private Lucene3xSkipListReader skipListReader; private long freqBasePointer; private long proxBasePointer; @@ -203,10 +202,10 @@ class SegmentTermDocs { // don't skip if the target is close (within skipInterval docs away) if ((target - skipInterval) >= doc && df >= skipInterval) { // optimized case if (skipListReader == null) - skipListReader = new Lucene40SkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone + skipListReader = new Lucene3xSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone if (!haveSkipped) { // lazily initialize skip stream - skipListReader.init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads, false); + skipListReader.init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads); haveSkipped = true; } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldsWriter.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldsWriter.java index d0701a11bf9..246cd7db4b5 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldsWriter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWFieldsWriter.java @@ -24,7 +24,6 @@ import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.PostingsConsumer; import org.apache.lucene.codecs.TermStats; import org.apache.lucene.codecs.TermsConsumer; -import org.apache.lucene.codecs.lucene40.Lucene40SkipListWriter; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo.IndexOptions; @@ -39,7 +38,7 @@ class PreFlexRWFieldsWriter extends FieldsConsumer { private final TermInfosWriter termsOut; private final IndexOutput freqOut; private final IndexOutput proxOut; - private final Lucene40SkipListWriter skipListWriter; + private final PreFlexRWSkipListWriter skipListWriter; private final int totalNumDocs; public PreFlexRWFieldsWriter(SegmentWriteState state) throws IOException { @@ -75,7 +74,7 @@ class PreFlexRWFieldsWriter extends FieldsConsumer { } } - skipListWriter = new Lucene40SkipListWriter(termsOut.skipInterval, + skipListWriter = new PreFlexRWSkipListWriter(termsOut.skipInterval, termsOut.maxSkipLevels, totalNumDocs, freqOut, @@ -135,7 +134,7 @@ class PreFlexRWFieldsWriter extends FieldsConsumer { } if ((++df % termsOut.skipInterval) == 0) { - skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength, false, 0); + skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength); skipListWriter.bufferSkip(df); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSkipListWriter.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSkipListWriter.java new file mode 100644 index 00000000000..f6c8a7d4a0f --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene3x/PreFlexRWSkipListWriter.java @@ -0,0 +1,127 @@ +package org.apache.lucene.codecs.lucene3x; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.codecs.MultiLevelSkipListWriter; + + +/** + * PreFlexRW skiplist implementation. + * @lucene.experimental + */ +public class PreFlexRWSkipListWriter extends MultiLevelSkipListWriter { + private int[] lastSkipDoc; + private int[] lastSkipPayloadLength; + private long[] lastSkipFreqPointer; + private long[] lastSkipProxPointer; + + private IndexOutput freqOutput; + private IndexOutput proxOutput; + + private int curDoc; + private boolean curStorePayloads; + private int curPayloadLength; + private long curFreqPointer; + private long curProxPointer; + + public PreFlexRWSkipListWriter(int skipInterval, int numberOfSkipLevels, int docCount, IndexOutput freqOutput, IndexOutput proxOutput) { + super(skipInterval, numberOfSkipLevels, docCount); + this.freqOutput = freqOutput; + this.proxOutput = proxOutput; + + lastSkipDoc = new int[numberOfSkipLevels]; + lastSkipPayloadLength = new int[numberOfSkipLevels]; + lastSkipFreqPointer = new long[numberOfSkipLevels]; + lastSkipProxPointer = new long[numberOfSkipLevels]; + } + + /** + * Sets the values for the current skip data. + */ + public void setSkipData(int doc, boolean storePayloads, int payloadLength) { + this.curDoc = doc; + this.curStorePayloads = storePayloads; + this.curPayloadLength = payloadLength; + this.curFreqPointer = freqOutput.getFilePointer(); + if (proxOutput != null) + this.curProxPointer = proxOutput.getFilePointer(); + } + + @Override + public void resetSkip() { + super.resetSkip(); + Arrays.fill(lastSkipDoc, 0); + Arrays.fill(lastSkipPayloadLength, -1); // we don't have to write the first length in the skip list + Arrays.fill(lastSkipFreqPointer, freqOutput.getFilePointer()); + if (proxOutput != null) + Arrays.fill(lastSkipProxPointer, proxOutput.getFilePointer()); + } + + @Override + protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException { + // To efficiently store payloads in the posting lists we do not store the length of + // every payload. Instead we omit the length for a payload if the previous payload had + // the same length. + // However, in order to support skipping the payload length at every skip point must be known. + // So we use the same length encoding that we use for the posting lists for the skip data as well: + // Case 1: current field does not store payloads + // SkipDatum --> DocSkip, FreqSkip, ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // DocSkip records the document number before every SkipInterval th document in TermFreqs. + // Document numbers are represented as differences from the previous value in the sequence. + // Case 2: current field stores payloads + // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // PayloadLength --> VInt + // In this case DocSkip/2 is the difference between + // the current and the previous value. If DocSkip + // is odd, then a PayloadLength encoded as VInt follows, + // if DocSkip is even, then it is assumed that the + // current payload length equals the length at the previous + // skip point + if (curStorePayloads) { + int delta = curDoc - lastSkipDoc[level]; + if (curPayloadLength == lastSkipPayloadLength[level]) { + // the current payload length equals the length at the previous skip point, + // so we don't store the length again + skipBuffer.writeVInt(delta * 2); + } else { + // the payload length is different from the previous one. We shift the DocSkip, + // set the lowest bit and store the current payload length as VInt. + skipBuffer.writeVInt(delta * 2 + 1); + skipBuffer.writeVInt(curPayloadLength); + lastSkipPayloadLength[level] = curPayloadLength; + } + } else { + // current field does not store payloads + skipBuffer.writeVInt(curDoc - lastSkipDoc[level]); + } + + skipBuffer.writeVInt((int) (curFreqPointer - lastSkipFreqPointer[level])); + skipBuffer.writeVInt((int) (curProxPointer - lastSkipProxPointer[level])); + + lastSkipDoc[level] = curDoc; + + lastSkipFreqPointer[level] = curFreqPointer; + lastSkipProxPointer[level] = curProxPointer; + } +}