mirror of https://github.com/apache/lucene.git
LUCENE-3069: API refactoring on Sep/IntBlock PF
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3069@1516365 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9b18f12d6e
commit
ab238efd37
|
@ -24,6 +24,7 @@ package org.apache.lucene.codecs.intblock;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.sep.IntIndexOutput;
|
import org.apache.lucene.codecs.sep.IntIndexOutput;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
|
||||||
/** Abstract base class that writes fixed-size blocks of ints
|
/** Abstract base class that writes fixed-size blocks of ints
|
||||||
|
@ -51,7 +52,7 @@ public abstract class FixedIntBlockIndexOutput extends IntIndexOutput {
|
||||||
protected abstract void flushBlock() throws IOException;
|
protected abstract void flushBlock() throws IOException;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IntIndexOutput.Index index() throws IOException {
|
public IntIndexOutput.Index index() {
|
||||||
return new Index();
|
return new Index();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,7 +80,7 @@ public abstract class FixedIntBlockIndexOutput extends IntIndexOutput {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void write(IndexOutput indexOut, boolean absolute) throws IOException {
|
public void write(DataOutput indexOut, boolean absolute) throws IOException {
|
||||||
if (absolute) {
|
if (absolute) {
|
||||||
indexOut.writeVInt(upto);
|
indexOut.writeVInt(upto);
|
||||||
indexOut.writeVLong(fp);
|
indexOut.writeVLong(fp);
|
||||||
|
|
|
@ -24,6 +24,7 @@ package org.apache.lucene.codecs.intblock;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.sep.IntIndexOutput;
|
import org.apache.lucene.codecs.sep.IntIndexOutput;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
|
||||||
// TODO: much of this can be shared code w/ the fixed case
|
// TODO: much of this can be shared code w/ the fixed case
|
||||||
|
@ -60,7 +61,7 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
|
||||||
protected abstract int add(int value) throws IOException;
|
protected abstract int add(int value) throws IOException;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IntIndexOutput.Index index() throws IOException {
|
public IntIndexOutput.Index index() {
|
||||||
return new Index();
|
return new Index();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,7 +89,7 @@ public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void write(IndexOutput indexOut, boolean absolute) throws IOException {
|
public void write(DataOutput indexOut, boolean absolute) throws IOException {
|
||||||
assert upto >= 0;
|
assert upto >= 0;
|
||||||
if (absolute) {
|
if (absolute) {
|
||||||
indexOut.writeVInt(upto);
|
indexOut.writeVInt(upto);
|
||||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.codecs.sep;
|
||||||
// TODO: we may want tighter integration w/ IndexOutput --
|
// TODO: we may want tighter integration w/ IndexOutput --
|
||||||
// may give better perf:
|
// may give better perf:
|
||||||
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
|
@ -49,12 +49,12 @@ public abstract class IntIndexOutput implements Closeable {
|
||||||
|
|
||||||
/** Writes "location" of current output pointer of primary
|
/** Writes "location" of current output pointer of primary
|
||||||
* output to different output (out) */
|
* output to different output (out) */
|
||||||
public abstract void write(IndexOutput indexOut, boolean absolute) throws IOException;
|
public abstract void write(DataOutput indexOut, boolean absolute) throws IOException;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** If you are indexing the primary output file, call
|
/** If you are indexing the primary output file, call
|
||||||
* this and interact with the returned IndexWriter. */
|
* this and interact with the returned IndexWriter. */
|
||||||
public abstract Index index() throws IOException;
|
public abstract Index index();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public abstract void close() throws IOException;
|
public abstract void close() throws IOException;
|
||||||
|
|
|
@ -87,6 +87,8 @@ public final class SepPostingsWriter extends PostingsWriterBase {
|
||||||
|
|
||||||
final int totalNumDocs;
|
final int totalNumDocs;
|
||||||
|
|
||||||
|
PendingTerm lastState;
|
||||||
|
|
||||||
boolean storePayloads;
|
boolean storePayloads;
|
||||||
IndexOptions indexOptions;
|
IndexOptions indexOptions;
|
||||||
|
|
||||||
|
@ -116,9 +118,9 @@ public final class SepPostingsWriter extends PostingsWriterBase {
|
||||||
this.skipInterval = skipInterval;
|
this.skipInterval = skipInterval;
|
||||||
this.skipMinimum = skipInterval; /* set to the same for now */
|
this.skipMinimum = skipInterval; /* set to the same for now */
|
||||||
final String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DOC_EXTENSION);
|
final String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DOC_EXTENSION);
|
||||||
|
|
||||||
docOut = factory.createOutput(state.directory, docFileName, state.context);
|
docOut = factory.createOutput(state.directory, docFileName, state.context);
|
||||||
docIndex = docOut.index();
|
docIndex = docOut.index();
|
||||||
|
|
||||||
if (state.fieldInfos.hasFreq()) {
|
if (state.fieldInfos.hasFreq()) {
|
||||||
final String frqFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FREQ_EXTENSION);
|
final String frqFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FREQ_EXTENSION);
|
||||||
freqOut = factory.createOutput(state.directory, frqFileName, state.context);
|
freqOut = factory.createOutput(state.directory, frqFileName, state.context);
|
||||||
|
@ -264,8 +266,8 @@ public final class SepPostingsWriter extends PostingsWriterBase {
|
||||||
public final IntIndexOutput.Index docIndex;
|
public final IntIndexOutput.Index docIndex;
|
||||||
public final IntIndexOutput.Index freqIndex;
|
public final IntIndexOutput.Index freqIndex;
|
||||||
public final IntIndexOutput.Index posIndex;
|
public final IntIndexOutput.Index posIndex;
|
||||||
public final long payloadFP;
|
public long payloadFP;
|
||||||
public final long skipFP;
|
public long skipFP;
|
||||||
|
|
||||||
public PendingTerm(IntIndexOutput.Index docIndex, IntIndexOutput.Index freqIndex, IntIndexOutput.Index posIndex, long payloadFP, long skipFP) {
|
public PendingTerm(IntIndexOutput.Index docIndex, IntIndexOutput.Index freqIndex, IntIndexOutput.Index posIndex, long payloadFP, long skipFP) {
|
||||||
this.docIndex = docIndex;
|
this.docIndex = docIndex;
|
||||||
|
@ -331,57 +333,60 @@ public final class SepPostingsWriter extends PostingsWriterBase {
|
||||||
final int absStart = pendingTerms.size() - start;
|
final int absStart = pendingTerms.size() - start;
|
||||||
final List<PendingTerm> slice = pendingTerms.subList(absStart, absStart+count);
|
final List<PendingTerm> slice = pendingTerms.subList(absStart, absStart+count);
|
||||||
|
|
||||||
long lastPayloadFP = 0;
|
|
||||||
long lastSkipFP = 0;
|
|
||||||
|
|
||||||
if (count == 0) {
|
if (count == 0) {
|
||||||
termsOut.writeByte((byte) 0);
|
termsOut.writeByte((byte) 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
final PendingTerm firstTerm = slice.get(0);
|
long lastSkipFP = 0;
|
||||||
final IntIndexOutput.Index docIndexFlush = firstTerm.docIndex;
|
long lastPayloadFP = 0;
|
||||||
final IntIndexOutput.Index freqIndexFlush = firstTerm.freqIndex;
|
|
||||||
final IntIndexOutput.Index posIndexFlush = firstTerm.posIndex;
|
boolean isFirstTerm = true;
|
||||||
|
|
||||||
for(int idx=0;idx<slice.size();idx++) {
|
for(int idx=0;idx<slice.size();idx++) {
|
||||||
final boolean isFirstTerm = idx == 0;
|
|
||||||
final PendingTerm t = slice.get(idx);
|
|
||||||
//System.out.println(" write idx=" + idx + " docIndex=" + t.docIndex);
|
|
||||||
docIndexFlush.copyFrom(t.docIndex, false);
|
|
||||||
docIndexFlush.write(indexBytesWriter, isFirstTerm);
|
|
||||||
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
|
||||||
freqIndexFlush.copyFrom(t.freqIndex, false);
|
|
||||||
freqIndexFlush.write(indexBytesWriter, isFirstTerm);
|
|
||||||
//System.out.println(" freqIndex=" + t.freqIndex);
|
|
||||||
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
|
||||||
posIndexFlush.copyFrom(t.posIndex, false);
|
|
||||||
posIndexFlush.write(indexBytesWriter, isFirstTerm);
|
|
||||||
//System.out.println(" posIndex=" + t.posIndex);
|
|
||||||
if (storePayloads) {
|
|
||||||
//System.out.println(" payloadFP=" + t.payloadFP);
|
|
||||||
if (isFirstTerm) {
|
if (isFirstTerm) {
|
||||||
|
lastState = slice.get(idx);
|
||||||
|
}
|
||||||
|
final PendingTerm t = slice.get(idx);
|
||||||
|
//System.out.println(" last(pure): doc="+lastState.docIndex +" frq=" + lastState.freqIndex+" pos="+lastState.posIndex);
|
||||||
|
lastState.docIndex.copyFrom(t.docIndex, false);
|
||||||
|
lastState.docIndex.write(indexBytesWriter, isFirstTerm);
|
||||||
|
//System.out.print(" doc=" + lastState.docIndex + " 1FP=" + indexBytesWriter.getFilePointer());
|
||||||
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
|
lastState.freqIndex.copyFrom(t.freqIndex, false);
|
||||||
|
lastState.freqIndex.write(indexBytesWriter, isFirstTerm);
|
||||||
|
//System.out.print(" frq=" + lastState.freqIndex + " 2FP=" + indexBytesWriter.getFilePointer());
|
||||||
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
|
lastState.posIndex.copyFrom(t.posIndex, false);
|
||||||
|
lastState.posIndex.write(indexBytesWriter, isFirstTerm);
|
||||||
|
//System.out.print(" pos=" + lastState.posIndex + " 3FP=" + indexBytesWriter.getFilePointer());
|
||||||
|
if (storePayloads) {
|
||||||
|
if (isFirstTerm) {
|
||||||
|
//System.out.print(" payFP=" + (t.payloadFP));
|
||||||
indexBytesWriter.writeVLong(t.payloadFP);
|
indexBytesWriter.writeVLong(t.payloadFP);
|
||||||
} else {
|
} else {
|
||||||
|
//System.out.print(" payFP=" + (t.payloadFP - lastPayloadFP));
|
||||||
indexBytesWriter.writeVLong(t.payloadFP - lastPayloadFP);
|
indexBytesWriter.writeVLong(t.payloadFP - lastPayloadFP);
|
||||||
}
|
}
|
||||||
lastPayloadFP = t.payloadFP;
|
lastPayloadFP = t.payloadFP;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (t.skipFP != -1) {
|
if (t.skipFP != -1) {
|
||||||
if (isFirstTerm) {
|
if (isFirstTerm) {
|
||||||
|
//System.out.print(" a.skipFP=" + (t.skipFP));
|
||||||
indexBytesWriter.writeVLong(t.skipFP);
|
indexBytesWriter.writeVLong(t.skipFP);
|
||||||
} else {
|
} else {
|
||||||
|
//System.out.print(" b.skipFP=" + (t.skipFP - lastSkipFP));
|
||||||
indexBytesWriter.writeVLong(t.skipFP - lastSkipFP);
|
indexBytesWriter.writeVLong(t.skipFP - lastSkipFP);
|
||||||
}
|
}
|
||||||
lastSkipFP = t.skipFP;
|
lastSkipFP = t.skipFP;
|
||||||
//System.out.println(" skipFP=" + t.skipFP);
|
|
||||||
}
|
}
|
||||||
|
//System.out.println();
|
||||||
|
//System.out.println(" last(copy): doc="+lastState.docIndex +" frq=" + lastState.freqIndex+" pos="+lastState.posIndex);
|
||||||
|
isFirstTerm = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
//System.out.println(" numBytes=" + indexBytesWriter.getFilePointer());
|
|
||||||
termsOut.writeVLong((int) indexBytesWriter.getFilePointer());
|
termsOut.writeVLong((int) indexBytesWriter.getFilePointer());
|
||||||
indexBytesWriter.writeTo(termsOut);
|
indexBytesWriter.writeTo(termsOut);
|
||||||
indexBytesWriter.reset();
|
indexBytesWriter.reset();
|
||||||
|
|
|
@ -0,0 +1,704 @@
|
||||||
|
package org.apache.lucene.codecs.sep;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.BlockTermState;
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
|
import org.apache.lucene.codecs.TempPostingsReaderBase;
|
||||||
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.index.TermState;
|
||||||
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
|
import org.apache.lucene.store.DataInput;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
|
/** Concrete class that reads the current doc/freq/skip
|
||||||
|
* postings format.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
|
||||||
|
// TODO: -- should we switch "hasProx" higher up? and
|
||||||
|
// create two separate docs readers, one that also reads
|
||||||
|
// prox and one that doesn't?
|
||||||
|
|
||||||
|
public class TempSepPostingsReader extends TempPostingsReaderBase {
|
||||||
|
|
||||||
|
final IntIndexInput freqIn;
|
||||||
|
final IntIndexInput docIn;
|
||||||
|
final IntIndexInput posIn;
|
||||||
|
final IndexInput payloadIn;
|
||||||
|
final IndexInput skipIn;
|
||||||
|
|
||||||
|
int skipInterval;
|
||||||
|
int maxSkipLevels;
|
||||||
|
int skipMinimum;
|
||||||
|
|
||||||
|
public TempSepPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext context, IntStreamFactory intFactory, String segmentSuffix) throws IOException {
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
|
||||||
|
final String docFileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempSepPostingsWriter.DOC_EXTENSION);
|
||||||
|
docIn = intFactory.openInput(dir, docFileName, context);
|
||||||
|
|
||||||
|
skipIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempSepPostingsWriter.SKIP_EXTENSION), context);
|
||||||
|
|
||||||
|
if (fieldInfos.hasFreq()) {
|
||||||
|
freqIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempSepPostingsWriter.FREQ_EXTENSION), context);
|
||||||
|
} else {
|
||||||
|
freqIn = null;
|
||||||
|
}
|
||||||
|
if (fieldInfos.hasProx()) {
|
||||||
|
posIn = intFactory.openInput(dir, IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempSepPostingsWriter.POS_EXTENSION), context);
|
||||||
|
payloadIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, TempSepPostingsWriter.PAYLOAD_EXTENSION), context);
|
||||||
|
} else {
|
||||||
|
posIn = null;
|
||||||
|
payloadIn = null;
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(IndexInput termsIn) throws IOException {
|
||||||
|
// Make sure we are talking to the matching past writer
|
||||||
|
CodecUtil.checkHeader(termsIn, TempSepPostingsWriter.CODEC,
|
||||||
|
TempSepPostingsWriter.VERSION_START, TempSepPostingsWriter.VERSION_START);
|
||||||
|
skipInterval = termsIn.readInt();
|
||||||
|
maxSkipLevels = termsIn.readInt();
|
||||||
|
skipMinimum = termsIn.readInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
IOUtils.close(freqIn, docIn, skipIn, posIn, payloadIn);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class SepTermState extends BlockTermState {
|
||||||
|
// We store only the seek point to the docs file because
|
||||||
|
// the rest of the info (freqIndex, posIndex, etc.) is
|
||||||
|
// stored in the docs file:
|
||||||
|
IntIndexInput.Index docIndex;
|
||||||
|
IntIndexInput.Index posIndex;
|
||||||
|
IntIndexInput.Index freqIndex;
|
||||||
|
long payloadFP;
|
||||||
|
long skipFP;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SepTermState clone() {
|
||||||
|
SepTermState other = new SepTermState();
|
||||||
|
other.copyFrom(this);
|
||||||
|
return other;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copyFrom(TermState _other) {
|
||||||
|
super.copyFrom(_other);
|
||||||
|
SepTermState other = (SepTermState) _other;
|
||||||
|
if (docIndex == null) {
|
||||||
|
docIndex = other.docIndex.clone();
|
||||||
|
} else {
|
||||||
|
docIndex.copyFrom(other.docIndex);
|
||||||
|
}
|
||||||
|
if (other.freqIndex != null) {
|
||||||
|
if (freqIndex == null) {
|
||||||
|
freqIndex = other.freqIndex.clone();
|
||||||
|
} else {
|
||||||
|
freqIndex.copyFrom(other.freqIndex);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
freqIndex = null;
|
||||||
|
}
|
||||||
|
if (other.posIndex != null) {
|
||||||
|
if (posIndex == null) {
|
||||||
|
posIndex = other.posIndex.clone();
|
||||||
|
} else {
|
||||||
|
posIndex.copyFrom(other.posIndex);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
posIndex = null;
|
||||||
|
}
|
||||||
|
payloadFP = other.payloadFP;
|
||||||
|
skipFP = other.skipFP;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return super.toString() + " docIndex=" + docIndex + " freqIndex=" + freqIndex + " posIndex=" + posIndex + " payloadFP=" + payloadFP + " skipFP=" + skipFP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BlockTermState newTermState() throws IOException {
|
||||||
|
final SepTermState state = new SepTermState();
|
||||||
|
state.docIndex = docIn.index();
|
||||||
|
if (freqIn != null) {
|
||||||
|
state.freqIndex = freqIn.index();
|
||||||
|
}
|
||||||
|
if (posIn != null) {
|
||||||
|
state.posIndex = posIn.index();
|
||||||
|
}
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void decodeTerm(long[] empty, DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
|
||||||
|
throws IOException {
|
||||||
|
final SepTermState termState = (SepTermState) _termState;
|
||||||
|
termState.docIndex.read(in, absolute);
|
||||||
|
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
|
||||||
|
termState.freqIndex.read(in, absolute);
|
||||||
|
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
|
//System.out.println(" freqIndex=" + termState.freqIndex);
|
||||||
|
termState.posIndex.read(in, absolute);
|
||||||
|
//System.out.println(" posIndex=" + termState.posIndex);
|
||||||
|
if (fieldInfo.hasPayloads()) {
|
||||||
|
if (absolute) {
|
||||||
|
termState.payloadFP = in.readVLong();
|
||||||
|
} else {
|
||||||
|
termState.payloadFP += in.readVLong();
|
||||||
|
}
|
||||||
|
//System.out.println(" payloadFP=" + termState.payloadFP);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (termState.docFreq >= skipMinimum) {
|
||||||
|
//System.out.println(" readSkip @ " + in.getPosition());
|
||||||
|
if (absolute) {
|
||||||
|
termState.skipFP = in.readVLong();
|
||||||
|
} else {
|
||||||
|
termState.skipFP += in.readVLong();
|
||||||
|
}
|
||||||
|
//System.out.println(" skipFP=" + termState.skipFP);
|
||||||
|
} else if (absolute) {
|
||||||
|
termState.skipFP = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||||
|
final SepTermState termState = (SepTermState) _termState;
|
||||||
|
SepDocsEnum docsEnum;
|
||||||
|
if (reuse == null || !(reuse instanceof SepDocsEnum)) {
|
||||||
|
docsEnum = new SepDocsEnum();
|
||||||
|
} else {
|
||||||
|
docsEnum = (SepDocsEnum) reuse;
|
||||||
|
if (docsEnum.startDocIn != docIn) {
|
||||||
|
// If you are using ParellelReader, and pass in a
|
||||||
|
// reused DocsAndPositionsEnum, it could have come
|
||||||
|
// from another reader also using sep codec
|
||||||
|
docsEnum = new SepDocsEnum();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return docsEnum.init(fieldInfo, termState, liveDocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs,
|
||||||
|
DocsAndPositionsEnum reuse, int flags)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
final SepTermState termState = (SepTermState) _termState;
|
||||||
|
SepDocsAndPositionsEnum postingsEnum;
|
||||||
|
if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum)) {
|
||||||
|
postingsEnum = new SepDocsAndPositionsEnum();
|
||||||
|
} else {
|
||||||
|
postingsEnum = (SepDocsAndPositionsEnum) reuse;
|
||||||
|
if (postingsEnum.startDocIn != docIn) {
|
||||||
|
// If you are using ParellelReader, and pass in a
|
||||||
|
// reused DocsAndPositionsEnum, it could have come
|
||||||
|
// from another reader also using sep codec
|
||||||
|
postingsEnum = new SepDocsAndPositionsEnum();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return postingsEnum.init(fieldInfo, termState, liveDocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
class SepDocsEnum extends DocsEnum {
|
||||||
|
int docFreq;
|
||||||
|
int doc = -1;
|
||||||
|
int accum;
|
||||||
|
int count;
|
||||||
|
int freq;
|
||||||
|
long freqStart;
|
||||||
|
|
||||||
|
// TODO: -- should we do omitTF with 2 different enum classes?
|
||||||
|
private boolean omitTF;
|
||||||
|
private IndexOptions indexOptions;
|
||||||
|
private boolean storePayloads;
|
||||||
|
private Bits liveDocs;
|
||||||
|
private final IntIndexInput.Reader docReader;
|
||||||
|
private final IntIndexInput.Reader freqReader;
|
||||||
|
private long skipFP;
|
||||||
|
|
||||||
|
private final IntIndexInput.Index docIndex;
|
||||||
|
private final IntIndexInput.Index freqIndex;
|
||||||
|
private final IntIndexInput.Index posIndex;
|
||||||
|
private final IntIndexInput startDocIn;
|
||||||
|
|
||||||
|
// TODO: -- should we do hasProx with 2 different enum classes?
|
||||||
|
|
||||||
|
boolean skipped;
|
||||||
|
SepSkipListReader skipper;
|
||||||
|
|
||||||
|
SepDocsEnum() throws IOException {
|
||||||
|
startDocIn = docIn;
|
||||||
|
docReader = docIn.reader();
|
||||||
|
docIndex = docIn.index();
|
||||||
|
if (freqIn != null) {
|
||||||
|
freqReader = freqIn.reader();
|
||||||
|
freqIndex = freqIn.index();
|
||||||
|
} else {
|
||||||
|
freqReader = null;
|
||||||
|
freqIndex = null;
|
||||||
|
}
|
||||||
|
if (posIn != null) {
|
||||||
|
posIndex = posIn.index(); // only init this so skipper can read it
|
||||||
|
} else {
|
||||||
|
posIndex = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException {
|
||||||
|
this.liveDocs = liveDocs;
|
||||||
|
this.indexOptions = fieldInfo.getIndexOptions();
|
||||||
|
omitTF = indexOptions == IndexOptions.DOCS_ONLY;
|
||||||
|
storePayloads = fieldInfo.hasPayloads();
|
||||||
|
|
||||||
|
// TODO: can't we only do this if consumer
|
||||||
|
// skipped consuming the previous docs?
|
||||||
|
docIndex.copyFrom(termState.docIndex);
|
||||||
|
docIndex.seek(docReader);
|
||||||
|
|
||||||
|
if (!omitTF) {
|
||||||
|
freqIndex.copyFrom(termState.freqIndex);
|
||||||
|
freqIndex.seek(freqReader);
|
||||||
|
}
|
||||||
|
|
||||||
|
docFreq = termState.docFreq;
|
||||||
|
// NOTE: unused if docFreq < skipMinimum:
|
||||||
|
skipFP = termState.skipFP;
|
||||||
|
count = 0;
|
||||||
|
doc = -1;
|
||||||
|
accum = 0;
|
||||||
|
freq = 1;
|
||||||
|
skipped = false;
|
||||||
|
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
|
||||||
|
while(true) {
|
||||||
|
if (count == docFreq) {
|
||||||
|
return doc = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
count++;
|
||||||
|
|
||||||
|
// Decode next doc
|
||||||
|
//System.out.println("decode docDelta:");
|
||||||
|
accum += docReader.next();
|
||||||
|
|
||||||
|
if (!omitTF) {
|
||||||
|
//System.out.println("decode freq:");
|
||||||
|
freq = freqReader.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (liveDocs == null || liveDocs.get(accum)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (doc = accum);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int freq() throws IOException {
|
||||||
|
return freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
|
||||||
|
if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
|
||||||
|
|
||||||
|
// There are enough docs in the posting to have
|
||||||
|
// skip data, and its not too close
|
||||||
|
|
||||||
|
if (skipper == null) {
|
||||||
|
// This DocsEnum has never done any skipping
|
||||||
|
skipper = new SepSkipListReader(skipIn.clone(),
|
||||||
|
freqIn,
|
||||||
|
docIn,
|
||||||
|
posIn,
|
||||||
|
maxSkipLevels, skipInterval);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!skipped) {
|
||||||
|
// We haven't yet skipped for this posting
|
||||||
|
skipper.init(skipFP,
|
||||||
|
docIndex,
|
||||||
|
freqIndex,
|
||||||
|
posIndex,
|
||||||
|
0,
|
||||||
|
docFreq,
|
||||||
|
storePayloads);
|
||||||
|
skipper.setIndexOptions(indexOptions);
|
||||||
|
|
||||||
|
skipped = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int newCount = skipper.skipTo(target);
|
||||||
|
|
||||||
|
if (newCount > count) {
|
||||||
|
|
||||||
|
// Skipper did move
|
||||||
|
if (!omitTF) {
|
||||||
|
skipper.getFreqIndex().seek(freqReader);
|
||||||
|
}
|
||||||
|
skipper.getDocIndex().seek(docReader);
|
||||||
|
count = newCount;
|
||||||
|
doc = accum = skipper.getDoc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now, linear scan for the rest:
|
||||||
|
do {
|
||||||
|
if (nextDoc() == NO_MORE_DOCS) {
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
} while (target > doc);
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return docFreq;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SepDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||||
|
int docFreq;
|
||||||
|
int doc = -1;
|
||||||
|
int accum;
|
||||||
|
int count;
|
||||||
|
int freq;
|
||||||
|
long freqStart;
|
||||||
|
|
||||||
|
private boolean storePayloads;
|
||||||
|
private Bits liveDocs;
|
||||||
|
private final IntIndexInput.Reader docReader;
|
||||||
|
private final IntIndexInput.Reader freqReader;
|
||||||
|
private final IntIndexInput.Reader posReader;
|
||||||
|
private final IndexInput payloadIn;
|
||||||
|
private long skipFP;
|
||||||
|
|
||||||
|
private final IntIndexInput.Index docIndex;
|
||||||
|
private final IntIndexInput.Index freqIndex;
|
||||||
|
private final IntIndexInput.Index posIndex;
|
||||||
|
private final IntIndexInput startDocIn;
|
||||||
|
|
||||||
|
private long payloadFP;
|
||||||
|
|
||||||
|
private int pendingPosCount;
|
||||||
|
private int position;
|
||||||
|
private int payloadLength;
|
||||||
|
private long pendingPayloadBytes;
|
||||||
|
|
||||||
|
private boolean skipped;
|
||||||
|
private SepSkipListReader skipper;
|
||||||
|
private boolean payloadPending;
|
||||||
|
private boolean posSeekPending;
|
||||||
|
|
||||||
|
SepDocsAndPositionsEnum() throws IOException {
|
||||||
|
startDocIn = docIn;
|
||||||
|
docReader = docIn.reader();
|
||||||
|
docIndex = docIn.index();
|
||||||
|
freqReader = freqIn.reader();
|
||||||
|
freqIndex = freqIn.index();
|
||||||
|
posReader = posIn.reader();
|
||||||
|
posIndex = posIn.index();
|
||||||
|
payloadIn = TempSepPostingsReader.this.payloadIn.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
SepDocsAndPositionsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException {
|
||||||
|
this.liveDocs = liveDocs;
|
||||||
|
storePayloads = fieldInfo.hasPayloads();
|
||||||
|
//System.out.println("Sep D&P init");
|
||||||
|
|
||||||
|
// TODO: can't we only do this if consumer
|
||||||
|
// skipped consuming the previous docs?
|
||||||
|
docIndex.copyFrom(termState.docIndex);
|
||||||
|
docIndex.seek(docReader);
|
||||||
|
//System.out.println(" docIndex=" + docIndex);
|
||||||
|
|
||||||
|
freqIndex.copyFrom(termState.freqIndex);
|
||||||
|
freqIndex.seek(freqReader);
|
||||||
|
//System.out.println(" freqIndex=" + freqIndex);
|
||||||
|
|
||||||
|
posIndex.copyFrom(termState.posIndex);
|
||||||
|
//System.out.println(" posIndex=" + posIndex);
|
||||||
|
posSeekPending = true;
|
||||||
|
payloadPending = false;
|
||||||
|
|
||||||
|
payloadFP = termState.payloadFP;
|
||||||
|
skipFP = termState.skipFP;
|
||||||
|
//System.out.println(" skipFP=" + skipFP);
|
||||||
|
|
||||||
|
docFreq = termState.docFreq;
|
||||||
|
count = 0;
|
||||||
|
doc = -1;
|
||||||
|
accum = 0;
|
||||||
|
pendingPosCount = 0;
|
||||||
|
pendingPayloadBytes = 0;
|
||||||
|
skipped = false;
|
||||||
|
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
|
||||||
|
while(true) {
|
||||||
|
if (count == docFreq) {
|
||||||
|
return doc = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
count++;
|
||||||
|
|
||||||
|
// TODO: maybe we should do the 1-bit trick for encoding
|
||||||
|
// freq=1 case?
|
||||||
|
|
||||||
|
// Decode next doc
|
||||||
|
//System.out.println(" sep d&p read doc");
|
||||||
|
accum += docReader.next();
|
||||||
|
|
||||||
|
//System.out.println(" sep d&p read freq");
|
||||||
|
freq = freqReader.next();
|
||||||
|
|
||||||
|
pendingPosCount += freq;
|
||||||
|
|
||||||
|
if (liveDocs == null || liveDocs.get(accum)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
position = 0;
|
||||||
|
return (doc = accum);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int freq() throws IOException {
|
||||||
|
return freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
//System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
|
||||||
|
|
||||||
|
if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
|
||||||
|
|
||||||
|
// There are enough docs in the posting to have
|
||||||
|
// skip data, and its not too close
|
||||||
|
|
||||||
|
if (skipper == null) {
|
||||||
|
//System.out.println(" create skipper");
|
||||||
|
// This DocsEnum has never done any skipping
|
||||||
|
skipper = new SepSkipListReader(skipIn.clone(),
|
||||||
|
freqIn,
|
||||||
|
docIn,
|
||||||
|
posIn,
|
||||||
|
maxSkipLevels, skipInterval);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!skipped) {
|
||||||
|
//System.out.println(" init skip data skipFP=" + skipFP);
|
||||||
|
// We haven't yet skipped for this posting
|
||||||
|
skipper.init(skipFP,
|
||||||
|
docIndex,
|
||||||
|
freqIndex,
|
||||||
|
posIndex,
|
||||||
|
payloadFP,
|
||||||
|
docFreq,
|
||||||
|
storePayloads);
|
||||||
|
skipper.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
|
skipped = true;
|
||||||
|
}
|
||||||
|
final int newCount = skipper.skipTo(target);
|
||||||
|
//System.out.println(" skip newCount=" + newCount + " vs " + count);
|
||||||
|
|
||||||
|
if (newCount > count) {
|
||||||
|
|
||||||
|
// Skipper did move
|
||||||
|
skipper.getFreqIndex().seek(freqReader);
|
||||||
|
skipper.getDocIndex().seek(docReader);
|
||||||
|
//System.out.println(" doc seek'd to " + skipper.getDocIndex());
|
||||||
|
// NOTE: don't seek pos here; do it lazily
|
||||||
|
// instead. Eg a PhraseQuery may skip to many
|
||||||
|
// docs before finally asking for positions...
|
||||||
|
posIndex.copyFrom(skipper.getPosIndex());
|
||||||
|
posSeekPending = true;
|
||||||
|
count = newCount;
|
||||||
|
doc = accum = skipper.getDoc();
|
||||||
|
//System.out.println(" moved to doc=" + doc);
|
||||||
|
//payloadIn.seek(skipper.getPayloadPointer());
|
||||||
|
payloadFP = skipper.getPayloadPointer();
|
||||||
|
pendingPosCount = 0;
|
||||||
|
pendingPayloadBytes = 0;
|
||||||
|
payloadPending = false;
|
||||||
|
payloadLength = skipper.getPayloadLength();
|
||||||
|
//System.out.println(" move payloadLen=" + payloadLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now, linear scan for the rest:
|
||||||
|
do {
|
||||||
|
if (nextDoc() == NO_MORE_DOCS) {
|
||||||
|
//System.out.println(" advance nextDoc=END");
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
//System.out.println(" advance nextDoc=" + doc);
|
||||||
|
} while (target > doc);
|
||||||
|
|
||||||
|
//System.out.println(" return doc=" + doc);
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextPosition() throws IOException {
|
||||||
|
if (posSeekPending) {
|
||||||
|
posIndex.seek(posReader);
|
||||||
|
payloadIn.seek(payloadFP);
|
||||||
|
posSeekPending = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// scan over any docs that were iterated without their
|
||||||
|
// positions
|
||||||
|
while (pendingPosCount > freq) {
|
||||||
|
final int code = posReader.next();
|
||||||
|
if (storePayloads && (code & 1) != 0) {
|
||||||
|
// Payload length has changed
|
||||||
|
payloadLength = posReader.next();
|
||||||
|
assert payloadLength >= 0;
|
||||||
|
}
|
||||||
|
pendingPosCount--;
|
||||||
|
position = 0;
|
||||||
|
pendingPayloadBytes += payloadLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int code = posReader.next();
|
||||||
|
|
||||||
|
if (storePayloads) {
|
||||||
|
if ((code & 1) != 0) {
|
||||||
|
// Payload length has changed
|
||||||
|
payloadLength = posReader.next();
|
||||||
|
assert payloadLength >= 0;
|
||||||
|
}
|
||||||
|
position += code >>> 1;
|
||||||
|
pendingPayloadBytes += payloadLength;
|
||||||
|
payloadPending = payloadLength > 0;
|
||||||
|
} else {
|
||||||
|
position += code;
|
||||||
|
}
|
||||||
|
|
||||||
|
pendingPosCount--;
|
||||||
|
assert pendingPosCount >= 0;
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int startOffset() {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int endOffset() {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private BytesRef payload;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getPayload() throws IOException {
|
||||||
|
if (!payloadPending) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pendingPayloadBytes == 0) {
|
||||||
|
return payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert pendingPayloadBytes >= payloadLength;
|
||||||
|
|
||||||
|
if (pendingPayloadBytes > payloadLength) {
|
||||||
|
payloadIn.seek(payloadIn.getFilePointer() + (pendingPayloadBytes - payloadLength));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payload == null) {
|
||||||
|
payload = new BytesRef();
|
||||||
|
payload.bytes = new byte[payloadLength];
|
||||||
|
} else if (payload.bytes.length < payloadLength) {
|
||||||
|
payload.grow(payloadLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
payloadIn.readBytes(payload.bytes, 0, payloadLength);
|
||||||
|
payload.length = payloadLength;
|
||||||
|
pendingPayloadBytes = 0;
|
||||||
|
return payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return docFreq;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,375 @@
|
||||||
|
package org.apache.lucene.codecs.sep;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.BlockTermState;
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
|
import org.apache.lucene.codecs.TempPostingsWriterBase;
|
||||||
|
import org.apache.lucene.codecs.TermStats;
|
||||||
|
import org.apache.lucene.codecs.sep.*;
|
||||||
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
|
/** Writes frq to .frq, docs to .doc, pos to .pos, payloads
|
||||||
|
* to .pyl, skip data to .skp
|
||||||
|
*
|
||||||
|
* @lucene.experimental */
|
||||||
|
public final class TempSepPostingsWriter extends TempPostingsWriterBase {
|
||||||
|
final static String CODEC = "TempSepPostingsWriter";
|
||||||
|
|
||||||
|
final static String DOC_EXTENSION = "doc";
|
||||||
|
final static String SKIP_EXTENSION = "skp";
|
||||||
|
final static String FREQ_EXTENSION = "frq";
|
||||||
|
final static String POS_EXTENSION = "pos";
|
||||||
|
final static String PAYLOAD_EXTENSION = "pyl";
|
||||||
|
|
||||||
|
// Increment version to change it:
|
||||||
|
final static int VERSION_START = 0;
|
||||||
|
final static int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
IntIndexOutput freqOut;
|
||||||
|
IntIndexOutput.Index freqIndex;
|
||||||
|
|
||||||
|
IntIndexOutput posOut;
|
||||||
|
IntIndexOutput.Index posIndex;
|
||||||
|
|
||||||
|
IntIndexOutput docOut;
|
||||||
|
IntIndexOutput.Index docIndex;
|
||||||
|
|
||||||
|
IndexOutput payloadOut;
|
||||||
|
|
||||||
|
IndexOutput skipOut;
|
||||||
|
|
||||||
|
final SepSkipListWriter skipListWriter;
|
||||||
|
/** Expert: The fraction of TermDocs entries stored in skip tables,
|
||||||
|
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
|
||||||
|
* smaller indexes, greater acceleration, but fewer accelerable cases, while
|
||||||
|
* smaller values result in bigger indexes, less acceleration and more
|
||||||
|
* accelerable cases. More detailed experiments would be useful here. */
|
||||||
|
final int skipInterval;
|
||||||
|
static final int DEFAULT_SKIP_INTERVAL = 16;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expert: minimum docFreq to write any skip data at all
|
||||||
|
*/
|
||||||
|
final int skipMinimum;
|
||||||
|
|
||||||
|
/** Expert: The maximum number of skip levels. Smaller values result in
|
||||||
|
* slightly smaller indexes, but slower skipping in big posting lists.
|
||||||
|
*/
|
||||||
|
final int maxSkipLevels = 10;
|
||||||
|
|
||||||
|
final int totalNumDocs;
|
||||||
|
|
||||||
|
boolean storePayloads;
|
||||||
|
IndexOptions indexOptions;
|
||||||
|
|
||||||
|
FieldInfo fieldInfo;
|
||||||
|
|
||||||
|
int lastPayloadLength;
|
||||||
|
int lastPosition;
|
||||||
|
long payloadStart;
|
||||||
|
int lastDocID;
|
||||||
|
int df;
|
||||||
|
|
||||||
|
SepTermState lastState;
|
||||||
|
long lastPayloadFP;
|
||||||
|
long lastSkipFP;
|
||||||
|
|
||||||
|
public TempSepPostingsWriter(SegmentWriteState state, IntStreamFactory factory) throws IOException {
|
||||||
|
this(state, factory, DEFAULT_SKIP_INTERVAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TempSepPostingsWriter(SegmentWriteState state, IntStreamFactory factory, int skipInterval) throws IOException {
|
||||||
|
freqOut = null;
|
||||||
|
freqIndex = null;
|
||||||
|
posOut = null;
|
||||||
|
posIndex = null;
|
||||||
|
payloadOut = null;
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
this.skipInterval = skipInterval;
|
||||||
|
this.skipMinimum = skipInterval; /* set to the same for now */
|
||||||
|
final String docFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, DOC_EXTENSION);
|
||||||
|
|
||||||
|
docOut = factory.createOutput(state.directory, docFileName, state.context);
|
||||||
|
docIndex = docOut.index();
|
||||||
|
|
||||||
|
if (state.fieldInfos.hasFreq()) {
|
||||||
|
final String frqFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FREQ_EXTENSION);
|
||||||
|
freqOut = factory.createOutput(state.directory, frqFileName, state.context);
|
||||||
|
freqIndex = freqOut.index();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state.fieldInfos.hasProx()) {
|
||||||
|
final String posFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, POS_EXTENSION);
|
||||||
|
posOut = factory.createOutput(state.directory, posFileName, state.context);
|
||||||
|
posIndex = posOut.index();
|
||||||
|
|
||||||
|
// TODO: -- only if at least one field stores payloads?
|
||||||
|
final String payloadFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, PAYLOAD_EXTENSION);
|
||||||
|
payloadOut = state.directory.createOutput(payloadFileName, state.context);
|
||||||
|
}
|
||||||
|
|
||||||
|
final String skipFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SKIP_EXTENSION);
|
||||||
|
skipOut = state.directory.createOutput(skipFileName, state.context);
|
||||||
|
|
||||||
|
totalNumDocs = state.segmentInfo.getDocCount();
|
||||||
|
|
||||||
|
skipListWriter = new SepSkipListWriter(skipInterval,
|
||||||
|
maxSkipLevels,
|
||||||
|
totalNumDocs,
|
||||||
|
freqOut, docOut,
|
||||||
|
posOut, payloadOut);
|
||||||
|
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(docOut, skipOut, freqOut, posOut, payloadOut);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(IndexOutput termsOut) throws IOException {
|
||||||
|
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
|
||||||
|
// TODO: -- just ask skipper to "start" here
|
||||||
|
termsOut.writeInt(skipInterval); // write skipInterval
|
||||||
|
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
|
||||||
|
termsOut.writeInt(skipMinimum); // write skipMinimum
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SepTermState newTermState() {
|
||||||
|
return new SepTermState();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startTerm() throws IOException {
|
||||||
|
docIndex.mark();
|
||||||
|
//System.out.println("SEPW: startTerm docIndex=" + docIndex);
|
||||||
|
|
||||||
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
|
freqIndex.mark();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
|
posIndex.mark();
|
||||||
|
payloadStart = payloadOut.getFilePointer();
|
||||||
|
lastPayloadLength = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Currently, this instance is re-used across fields, so
|
||||||
|
// our parent calls setField whenever the field changes
|
||||||
|
@Override
|
||||||
|
public int setField(FieldInfo fieldInfo) {
|
||||||
|
this.fieldInfo = fieldInfo;
|
||||||
|
this.indexOptions = fieldInfo.getIndexOptions();
|
||||||
|
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
|
||||||
|
throw new UnsupportedOperationException("this codec cannot index offsets");
|
||||||
|
}
|
||||||
|
skipListWriter.setIndexOptions(indexOptions);
|
||||||
|
storePayloads = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && fieldInfo.hasPayloads();
|
||||||
|
lastPayloadFP = 0;
|
||||||
|
lastSkipFP = 0;
|
||||||
|
lastState = setEmptyState();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SepTermState setEmptyState() {
|
||||||
|
SepTermState emptyState = new SepTermState();
|
||||||
|
emptyState.docIndex = docOut.index();
|
||||||
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
|
emptyState.freqIndex = freqOut.index();
|
||||||
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
|
emptyState.posIndex = posOut.index();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
emptyState.payloadFP = 0;
|
||||||
|
emptyState.skipFP = 0;
|
||||||
|
return emptyState;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Adds a new doc in this term. If this returns null
|
||||||
|
* then we just skip consuming positions/payloads. */
|
||||||
|
@Override
|
||||||
|
public void startDoc(int docID, int termDocFreq) throws IOException {
|
||||||
|
|
||||||
|
final int delta = docID - lastDocID;
|
||||||
|
//System.out.println("SEPW: startDoc: write doc=" + docID + " delta=" + delta + " out.fp=" + docOut);
|
||||||
|
|
||||||
|
if (docID < 0 || (df > 0 && delta <= 0)) {
|
||||||
|
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (docOut: " + docOut + ")");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((++df % skipInterval) == 0) {
|
||||||
|
// TODO: -- awkward we have to make these two
|
||||||
|
// separate calls to skipper
|
||||||
|
//System.out.println(" buffer skip lastDocID=" + lastDocID);
|
||||||
|
skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
|
||||||
|
skipListWriter.bufferSkip(df);
|
||||||
|
}
|
||||||
|
|
||||||
|
lastDocID = docID;
|
||||||
|
docOut.write(delta);
|
||||||
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
|
//System.out.println(" sepw startDoc: write freq=" + termDocFreq);
|
||||||
|
freqOut.write(termDocFreq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Add a new position & payload */
|
||||||
|
@Override
|
||||||
|
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
|
||||||
|
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
|
||||||
|
final int delta = position - lastPosition;
|
||||||
|
assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
|
||||||
|
lastPosition = position;
|
||||||
|
|
||||||
|
if (storePayloads) {
|
||||||
|
final int payloadLength = payload == null ? 0 : payload.length;
|
||||||
|
if (payloadLength != lastPayloadLength) {
|
||||||
|
lastPayloadLength = payloadLength;
|
||||||
|
// TODO: explore whether we get better compression
|
||||||
|
// by not storing payloadLength into prox stream?
|
||||||
|
posOut.write((delta<<1)|1);
|
||||||
|
posOut.write(payloadLength);
|
||||||
|
} else {
|
||||||
|
posOut.write(delta << 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payloadLength > 0) {
|
||||||
|
payloadOut.writeBytes(payload.bytes, payload.offset, payloadLength);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
posOut.write(delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
lastPosition = position;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Called when we are done adding positions & payloads */
|
||||||
|
@Override
|
||||||
|
public void finishDoc() {
|
||||||
|
lastPosition = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class SepTermState extends BlockTermState {
|
||||||
|
public IntIndexOutput.Index docIndex;
|
||||||
|
public IntIndexOutput.Index freqIndex;
|
||||||
|
public IntIndexOutput.Index posIndex;
|
||||||
|
public long payloadFP;
|
||||||
|
public long skipFP;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Called when we are done adding docs to this term */
|
||||||
|
@Override
|
||||||
|
public void finishTerm(BlockTermState _state) throws IOException {
|
||||||
|
SepTermState state = (SepTermState)_state;
|
||||||
|
// TODO: -- wasteful we are counting this in two places?
|
||||||
|
assert state.docFreq > 0;
|
||||||
|
assert state.docFreq == df;
|
||||||
|
|
||||||
|
state.docIndex = docOut.index();
|
||||||
|
state.docIndex.copyFrom(docIndex, false);
|
||||||
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
|
state.freqIndex = freqOut.index();
|
||||||
|
state.freqIndex.copyFrom(freqIndex, false);
|
||||||
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
|
state.posIndex = posOut.index();
|
||||||
|
state.posIndex.copyFrom(posIndex, false);
|
||||||
|
} else {
|
||||||
|
state.posIndex = null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
state.freqIndex = null;
|
||||||
|
state.posIndex = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (df >= skipMinimum) {
|
||||||
|
state.skipFP = skipOut.getFilePointer();
|
||||||
|
//System.out.println(" skipFP=" + skipFP);
|
||||||
|
skipListWriter.writeSkip(skipOut);
|
||||||
|
//System.out.println(" numBytes=" + (skipOut.getFilePointer()-skipFP));
|
||||||
|
} else {
|
||||||
|
state.skipFP = -1;
|
||||||
|
}
|
||||||
|
state.payloadFP = payloadStart;
|
||||||
|
|
||||||
|
lastDocID = 0;
|
||||||
|
df = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
|
||||||
|
SepTermState state = (SepTermState)_state;
|
||||||
|
if (absolute) {
|
||||||
|
lastSkipFP = 0;
|
||||||
|
lastPayloadFP = 0;
|
||||||
|
lastState = state;
|
||||||
|
}
|
||||||
|
lastState.docIndex.copyFrom(state.docIndex, false);
|
||||||
|
lastState.docIndex.write(out, absolute);
|
||||||
|
if (indexOptions != IndexOptions.DOCS_ONLY) {
|
||||||
|
lastState.freqIndex.copyFrom(state.freqIndex, false);
|
||||||
|
lastState.freqIndex.write(out, absolute);
|
||||||
|
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
|
lastState.posIndex.copyFrom(state.posIndex, false);
|
||||||
|
lastState.posIndex.write(out, absolute);
|
||||||
|
if (storePayloads) {
|
||||||
|
if (absolute) {
|
||||||
|
out.writeVLong(state.payloadFP);
|
||||||
|
} else {
|
||||||
|
out.writeVLong(state.payloadFP - lastPayloadFP);
|
||||||
|
}
|
||||||
|
lastPayloadFP = state.payloadFP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (state.skipFP != -1) {
|
||||||
|
if (absolute) {
|
||||||
|
out.writeVLong(state.skipFP);
|
||||||
|
} else {
|
||||||
|
out.writeVLong(state.skipFP - lastSkipFP);
|
||||||
|
}
|
||||||
|
lastSkipFP = state.skipFP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
IOUtils.close(docOut, skipOut, freqOut, posOut, payloadOut);
|
||||||
|
}
|
||||||
|
}
|
|
@ -74,7 +74,7 @@ import org.junit.Ignore;
|
||||||
// we won't even be running the actual code, only the impostor
|
// we won't even be running the actual code, only the impostor
|
||||||
// @SuppressCodecs("Lucene4x")
|
// @SuppressCodecs("Lucene4x")
|
||||||
// Sep codec cannot yet handle the offsets in our 4.x index!
|
// Sep codec cannot yet handle the offsets in our 4.x index!
|
||||||
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene40", "Lucene41"})
|
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "TempSep", "TempFixedIntBlock", "TempVariableIntBlock"})
|
||||||
public class TestBackwardsCompatibility extends LuceneTestCase {
|
public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||||
|
|
||||||
// Uncomment these cases & run them on an older Lucene version,
|
// Uncomment these cases & run them on an older Lucene version,
|
||||||
|
|
|
@ -49,7 +49,7 @@ import org.apache.lucene.util._TestUtil;
|
||||||
// TODO: we really need to test indexingoffsets, but then getting only docs / docs + freqs.
|
// TODO: we really need to test indexingoffsets, but then getting only docs / docs + freqs.
|
||||||
// not all codecs store prx separate...
|
// not all codecs store prx separate...
|
||||||
// TODO: fix sep codec to index offsets so we can greatly reduce this list!
|
// TODO: fix sep codec to index offsets so we can greatly reduce this list!
|
||||||
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom"})
|
@SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "TempSep", "TempFixedIntBlock", "TempVariableIntBlock"})
|
||||||
public class TestPostingsOffsets extends LuceneTestCase {
|
public class TestPostingsOffsets extends LuceneTestCase {
|
||||||
IndexWriterConfig iwc;
|
IndexWriterConfig iwc;
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.mocksep;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
@ -86,7 +87,7 @@ public class MockSingleIntIndexOutput extends IntIndexOutput {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public void write(IndexOutput indexOut, boolean absolute)
|
public void write(DataOutput indexOut, boolean absolute)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (absolute) {
|
if (absolute) {
|
||||||
indexOut.writeVLong(fp);
|
indexOut.writeVLong(fp);
|
||||||
|
|
|
@ -0,0 +1,198 @@
|
||||||
|
package org.apache.lucene.codecs.temp;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.TempPostingsReaderBase;
|
||||||
|
import org.apache.lucene.codecs.TempPostingsWriterBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.BlockTermsReader;
|
||||||
|
import org.apache.lucene.codecs.blockterms.BlockTermsWriter;
|
||||||
|
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader;
|
||||||
|
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter;
|
||||||
|
import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
|
||||||
|
import org.apache.lucene.codecs.intblock.FixedIntBlockIndexInput;
|
||||||
|
import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput;
|
||||||
|
import org.apache.lucene.codecs.sep.*;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
import org.apache.lucene.store.*;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A silly test codec to verify core support for fixed
|
||||||
|
* sized int block encoders is working. The int encoder
|
||||||
|
* used here just writes each block as a series of vInt.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public final class TempFixedIntBlockPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
|
private final int blockSize;
|
||||||
|
|
||||||
|
public TempFixedIntBlockPostingsFormat() {
|
||||||
|
this(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TempFixedIntBlockPostingsFormat(int blockSize) {
|
||||||
|
super("TempFixedIntBlock");
|
||||||
|
this.blockSize = blockSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return getName() + "(blockSize=" + blockSize + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
// only for testing
|
||||||
|
public IntStreamFactory getIntFactory() {
|
||||||
|
return new MockIntFactory(blockSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encodes blocks as vInts of a fixed block size.
|
||||||
|
*/
|
||||||
|
public static class MockIntFactory extends IntStreamFactory {
|
||||||
|
private final int blockSize;
|
||||||
|
|
||||||
|
public MockIntFactory(int blockSize) {
|
||||||
|
this.blockSize = blockSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {
|
||||||
|
return new FixedIntBlockIndexInput(dir.openInput(fileName, context)) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) {
|
||||||
|
return new BlockReader() {
|
||||||
|
public void seek(long pos) {}
|
||||||
|
@Override
|
||||||
|
public void readBlock() throws IOException {
|
||||||
|
for(int i=0;i<buffer.length;i++) {
|
||||||
|
buffer[i] = in.readVInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException {
|
||||||
|
IndexOutput out = dir.createOutput(fileName, context);
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
FixedIntBlockIndexOutput ret = new FixedIntBlockIndexOutput(out, blockSize) {
|
||||||
|
@Override
|
||||||
|
protected void flushBlock() throws IOException {
|
||||||
|
for(int i=0;i<buffer.length;i++) {
|
||||||
|
out.writeVInt(buffer[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
|
TempPostingsWriterBase postingsWriter = new TempSepPostingsWriter(state, new MockIntFactory(blockSize));
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
TermsIndexWriterBase indexWriter;
|
||||||
|
try {
|
||||||
|
indexWriter = new FixedGapTermsIndexWriter(state);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
postingsWriter.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
try {
|
||||||
|
FieldsConsumer ret = new TempBlockTermsWriter(indexWriter, state, postingsWriter);
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
try {
|
||||||
|
postingsWriter.close();
|
||||||
|
} finally {
|
||||||
|
indexWriter.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||||
|
TempPostingsReaderBase postingsReader = new TempSepPostingsReader(state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo,
|
||||||
|
state.context,
|
||||||
|
new MockIntFactory(blockSize), state.segmentSuffix);
|
||||||
|
|
||||||
|
TermsIndexReaderBase indexReader;
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
indexReader = new FixedGapTermsIndexReader(state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo.name,
|
||||||
|
BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix,
|
||||||
|
IOContext.DEFAULT);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
postingsReader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
try {
|
||||||
|
FieldsProducer ret = new TempBlockTermsReader(indexReader,
|
||||||
|
state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo,
|
||||||
|
postingsReader,
|
||||||
|
state.context,
|
||||||
|
state.segmentSuffix);
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
try {
|
||||||
|
postingsReader.close();
|
||||||
|
} finally {
|
||||||
|
indexReader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,123 @@
|
||||||
|
package org.apache.lucene.codecs.temp;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.TempPostingsReaderBase;
|
||||||
|
import org.apache.lucene.codecs.TempPostingsWriterBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader;
|
||||||
|
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter;
|
||||||
|
import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
|
||||||
|
import org.apache.lucene.codecs.sep.*;
|
||||||
|
import org.apache.lucene.codecs.mocksep.*;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A silly codec that simply writes each file separately as
|
||||||
|
* single vInts. Don't use this (performance will be poor)!
|
||||||
|
* This is here just to test the core sep codec
|
||||||
|
* classes.
|
||||||
|
*/
|
||||||
|
public final class TempSepPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
|
public TempSepPostingsFormat() {
|
||||||
|
super("TempSep");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
|
|
||||||
|
TempPostingsWriterBase postingsWriter = new TempSepPostingsWriter(state, new MockSingleIntFactory());
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
TermsIndexWriterBase indexWriter;
|
||||||
|
try {
|
||||||
|
indexWriter = new FixedGapTermsIndexWriter(state);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
postingsWriter.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
try {
|
||||||
|
FieldsConsumer ret = new TempBlockTermsWriter(indexWriter, state, postingsWriter);
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
try {
|
||||||
|
postingsWriter.close();
|
||||||
|
} finally {
|
||||||
|
indexWriter.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||||
|
|
||||||
|
TempPostingsReaderBase postingsReader = new TempSepPostingsReader(state.directory, state.fieldInfos, state.segmentInfo,
|
||||||
|
state.context, new MockSingleIntFactory(), state.segmentSuffix);
|
||||||
|
|
||||||
|
TermsIndexReaderBase indexReader;
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
indexReader = new FixedGapTermsIndexReader(state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo.name,
|
||||||
|
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
||||||
|
state.segmentSuffix, state.context);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
postingsReader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
try {
|
||||||
|
FieldsProducer ret = new TempBlockTermsReader(indexReader,
|
||||||
|
state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo,
|
||||||
|
postingsReader,
|
||||||
|
state.context,
|
||||||
|
state.segmentSuffix);
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
try {
|
||||||
|
postingsReader.close();
|
||||||
|
} finally {
|
||||||
|
indexReader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,223 @@
|
||||||
|
package org.apache.lucene.codecs.temp;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
|
import org.apache.lucene.codecs.TempPostingsReaderBase;
|
||||||
|
import org.apache.lucene.codecs.TempPostingsWriterBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.BlockTermsReader;
|
||||||
|
import org.apache.lucene.codecs.blockterms.BlockTermsWriter;
|
||||||
|
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader;
|
||||||
|
import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter;
|
||||||
|
import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase;
|
||||||
|
import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase;
|
||||||
|
import org.apache.lucene.codecs.intblock.VariableIntBlockIndexInput;
|
||||||
|
import org.apache.lucene.codecs.intblock.VariableIntBlockIndexOutput;
|
||||||
|
import org.apache.lucene.codecs.sep.*;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A silly test codec to verify core support for variable
|
||||||
|
* sized int block encoders is working. The int encoder
|
||||||
|
* used here writes baseBlockSize ints at once, if the first
|
||||||
|
* int is <= 3, else 2*baseBlockSize.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public final class TempVariableIntBlockPostingsFormat extends PostingsFormat {
|
||||||
|
private final int baseBlockSize;
|
||||||
|
|
||||||
|
public TempVariableIntBlockPostingsFormat() {
|
||||||
|
this(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public TempVariableIntBlockPostingsFormat(int baseBlockSize) {
|
||||||
|
super("TempVariableIntBlock");
|
||||||
|
this.baseBlockSize = baseBlockSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return getName() + "(baseBlockSize="+ baseBlockSize + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If the first value is <= 3, writes baseBlockSize vInts at once,
|
||||||
|
* otherwise writes 2*baseBlockSize vInts.
|
||||||
|
*/
|
||||||
|
public static class MockIntFactory extends IntStreamFactory {
|
||||||
|
|
||||||
|
private final int baseBlockSize;
|
||||||
|
|
||||||
|
public MockIntFactory(int baseBlockSize) {
|
||||||
|
this.baseBlockSize = baseBlockSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {
|
||||||
|
final IndexInput in = dir.openInput(fileName, context);
|
||||||
|
final int baseBlockSize = in.readInt();
|
||||||
|
return new VariableIntBlockIndexInput(in) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) {
|
||||||
|
return new BlockReader() {
|
||||||
|
@Override
|
||||||
|
public void seek(long pos) {}
|
||||||
|
@Override
|
||||||
|
public int readBlock() throws IOException {
|
||||||
|
buffer[0] = in.readVInt();
|
||||||
|
final int count = buffer[0] <= 3 ? baseBlockSize-1 : 2*baseBlockSize-1;
|
||||||
|
assert buffer.length >= count: "buffer.length=" + buffer.length + " count=" + count;
|
||||||
|
for(int i=0;i<count;i++) {
|
||||||
|
buffer[i+1] = in.readVInt();
|
||||||
|
}
|
||||||
|
return 1+count;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException {
|
||||||
|
final IndexOutput out = dir.createOutput(fileName, context);
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
out.writeInt(baseBlockSize);
|
||||||
|
VariableIntBlockIndexOutput ret = new VariableIntBlockIndexOutput(out, 2*baseBlockSize) {
|
||||||
|
int pendingCount;
|
||||||
|
final int[] buffer = new int[2+2*baseBlockSize];
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int add(int value) throws IOException {
|
||||||
|
buffer[pendingCount++] = value;
|
||||||
|
// silly variable block length int encoder: if
|
||||||
|
// first value <= 3, we write N vints at once;
|
||||||
|
// else, 2*N
|
||||||
|
final int flushAt = buffer[0] <= 3 ? baseBlockSize : 2*baseBlockSize;
|
||||||
|
|
||||||
|
// intentionally be non-causal here:
|
||||||
|
if (pendingCount == flushAt+1) {
|
||||||
|
for(int i=0;i<flushAt;i++) {
|
||||||
|
out.writeVInt(buffer[i]);
|
||||||
|
}
|
||||||
|
buffer[0] = buffer[flushAt];
|
||||||
|
pendingCount = 1;
|
||||||
|
return flushAt;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
|
TempPostingsWriterBase postingsWriter = new TempSepPostingsWriter(state, new MockIntFactory(baseBlockSize));
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
TermsIndexWriterBase indexWriter;
|
||||||
|
try {
|
||||||
|
indexWriter = new FixedGapTermsIndexWriter(state);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
postingsWriter.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
try {
|
||||||
|
FieldsConsumer ret = new TempBlockTermsWriter(indexWriter, state, postingsWriter);
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
try {
|
||||||
|
postingsWriter.close();
|
||||||
|
} finally {
|
||||||
|
indexWriter.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||||
|
TempPostingsReaderBase postingsReader = new TempSepPostingsReader(state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo,
|
||||||
|
state.context,
|
||||||
|
new MockIntFactory(baseBlockSize), state.segmentSuffix);
|
||||||
|
|
||||||
|
TermsIndexReaderBase indexReader;
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
indexReader = new FixedGapTermsIndexReader(state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo.name,
|
||||||
|
BytesRef.getUTF8SortedAsUnicodeComparator(),
|
||||||
|
state.segmentSuffix, state.context);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
postingsReader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
try {
|
||||||
|
FieldsProducer ret = new TempBlockTermsReader(indexReader,
|
||||||
|
state.directory,
|
||||||
|
state.fieldInfos,
|
||||||
|
state.segmentInfo,
|
||||||
|
postingsReader,
|
||||||
|
state.context,
|
||||||
|
state.segmentSuffix);
|
||||||
|
success = true;
|
||||||
|
return ret;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
try {
|
||||||
|
postingsReader.close();
|
||||||
|
} finally {
|
||||||
|
indexReader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -324,7 +324,10 @@ public abstract class LuceneTestCase extends Assert {
|
||||||
"MockFixedIntBlock",
|
"MockFixedIntBlock",
|
||||||
"MockVariableIntBlock",
|
"MockVariableIntBlock",
|
||||||
"MockSep",
|
"MockSep",
|
||||||
"MockRandom"
|
"MockRandom",
|
||||||
|
"TempSep",
|
||||||
|
"TempFixedIntBlock",
|
||||||
|
"TempVariableIntBlock"
|
||||||
));
|
));
|
||||||
|
|
||||||
// -----------------------------------------------------------------
|
// -----------------------------------------------------------------
|
||||||
|
|
|
@ -25,3 +25,6 @@ org.apache.lucene.codecs.lucene41vargap.Lucene41VarGapDocFreqInterval
|
||||||
org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings
|
org.apache.lucene.codecs.bloom.TestBloomFilteredLucene41Postings
|
||||||
org.apache.lucene.codecs.asserting.AssertingPostingsFormat
|
org.apache.lucene.codecs.asserting.AssertingPostingsFormat
|
||||||
org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat
|
org.apache.lucene.codecs.lucene40.Lucene40RWPostingsFormat
|
||||||
|
org.apache.lucene.codecs.temp.TempSepPostingsFormat
|
||||||
|
org.apache.lucene.codecs.temp.TempFixedIntBlockPostingsFormat
|
||||||
|
org.apache.lucene.codecs.temp.TempVariableIntBlockPostingsFormat
|
||||||
|
|
Loading…
Reference in New Issue