mirror of https://github.com/apache/lucene.git
LUCENE-3892: remove oal.codecs.pfor (it's slower than block); add new BlockPacked postings format (copy of Block postings format except it uses oal.util.packed for packed ints encode/decode)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1367338 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
33f6da286e
commit
94aecff6c3
|
@ -24,7 +24,6 @@ import java.nio.IntBuffer;
|
||||||
import org.apache.lucene.codecs.BlockTermState;
|
import org.apache.lucene.codecs.BlockTermState;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||||
import org.apache.lucene.codecs.pfor.ForUtil;
|
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
|
@ -43,8 +42,6 @@ import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
// nocommit move ForUtil here?
|
|
||||||
|
|
||||||
// nocommit javadocs
|
// nocommit javadocs
|
||||||
public final class BlockPostingsReader extends PostingsReaderBase {
|
public final class BlockPostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,6 @@ import java.util.List;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||||
import org.apache.lucene.codecs.TermStats;
|
import org.apache.lucene.codecs.TermStats;
|
||||||
import org.apache.lucene.codecs.pfor.ForUtil; // nocommit move here?
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.codecs.pfor;
|
package org.apache.lucene.codecs.block;
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.codecs.pfor;
|
package org.apache.lucene.codecs.block;
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
|
@ -48,7 +48,7 @@ def genDecompress():
|
||||||
f = open(fileName, 'w')
|
f = open(fileName, 'w')
|
||||||
w = f.write
|
w = f.write
|
||||||
try:
|
try:
|
||||||
w("package org.apache.lucene.codecs.pfor;\n")
|
w("package org.apache.lucene.codecs.block;\n")
|
||||||
w("""/*
|
w("""/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.codecs.pfor;
|
package org.apache.lucene.codecs.blockpacked;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -18,49 +18,41 @@ package org.apache.lucene.codecs.pfor;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
||||||
import org.apache.lucene.codecs.FieldsConsumer;
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.codecs.FixedGapTermsIndexReader;
|
|
||||||
import org.apache.lucene.codecs.FixedGapTermsIndexWriter;
|
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||||
import org.apache.lucene.codecs.TermsIndexReaderBase;
|
|
||||||
import org.apache.lucene.codecs.TermsIndexWriterBase;
|
|
||||||
import org.apache.lucene.codecs.sep.SepPostingsReader;
|
|
||||||
import org.apache.lucene.codecs.sep.SepPostingsWriter;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pass ForFactory to a PostingsWriter/ReaderBase, and get
|
* Pass ForFactory to a PostingsWriter/ReaderBase, and get
|
||||||
* customized postings format plugged.
|
* customized postings format plugged.
|
||||||
*/
|
*/
|
||||||
public final class ForPostingsFormat extends PostingsFormat {
|
public final class BlockPackedPostingsFormat extends PostingsFormat {
|
||||||
private final int minBlockSize;
|
public static final String DOC_EXTENSION = "doc";
|
||||||
private final int maxBlockSize;
|
public static final String POS_EXTENSION = "pos";
|
||||||
|
public static final String PAY_EXTENSION = "pay";
|
||||||
|
|
||||||
|
private final int minTermBlockSize;
|
||||||
|
private final int maxTermBlockSize;
|
||||||
public final static int DEFAULT_BLOCK_SIZE = 128;
|
public final static int DEFAULT_BLOCK_SIZE = 128;
|
||||||
|
|
||||||
public ForPostingsFormat() {
|
public BlockPackedPostingsFormat() {
|
||||||
super("For");
|
this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
|
||||||
this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE;
|
|
||||||
this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public ForPostingsFormat(int minBlockSize, int maxBlockSize) {
|
public BlockPackedPostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
|
||||||
super("For");
|
super("BlockPacked");
|
||||||
this.minBlockSize = minBlockSize;
|
this.minTermBlockSize = minTermBlockSize;
|
||||||
assert minBlockSize > 1;
|
assert minTermBlockSize > 1;
|
||||||
this.maxBlockSize = maxBlockSize;
|
this.maxTermBlockSize = maxTermBlockSize;
|
||||||
assert minBlockSize <= maxBlockSize;
|
assert minTermBlockSize <= maxTermBlockSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -71,13 +63,14 @@ public final class ForPostingsFormat extends PostingsFormat {
|
||||||
@Override
|
@Override
|
||||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||||
// TODO: implement a new PostingsWriterBase to improve skip-settings
|
// TODO: implement a new PostingsWriterBase to improve skip-settings
|
||||||
PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new ForFactory());
|
PostingsWriterBase postingsWriter = new BlockPackedPostingsWriter(state, 128);
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
FieldsConsumer ret = new BlockTreeTermsWriter(state,
|
FieldsConsumer ret = new BlockTreeTermsWriter(state,
|
||||||
postingsWriter,
|
postingsWriter,
|
||||||
minBlockSize,
|
minTermBlockSize,
|
||||||
maxBlockSize);
|
maxTermBlockSize);
|
||||||
success = true;
|
success = true;
|
||||||
return ret;
|
return ret;
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -89,13 +82,12 @@ public final class ForPostingsFormat extends PostingsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||||
PostingsReaderBase postingsReader = new SepPostingsReader(state.dir,
|
PostingsReaderBase postingsReader = new BlockPackedPostingsReader(state.dir,
|
||||||
state.fieldInfos,
|
state.fieldInfos,
|
||||||
state.segmentInfo,
|
state.segmentInfo,
|
||||||
state.context,
|
state.context,
|
||||||
new ForFactory(),
|
state.segmentSuffix,
|
||||||
state.segmentSuffix);
|
128);
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
FieldsProducer ret = new BlockTreeTermsReader(state.dir,
|
FieldsProducer ret = new BlockTreeTermsReader(state.dir,
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,592 @@
|
||||||
|
package org.apache.lucene.codecs.blockpacked;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.IntBuffer;
|
||||||
|
import java.nio.LongBuffer;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
|
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||||
|
import org.apache.lucene.codecs.TermStats;
|
||||||
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
|
// nocommit javadocs
|
||||||
|
|
||||||
|
public final class BlockPackedPostingsWriter extends PostingsWriterBase {
|
||||||
|
|
||||||
|
private boolean DEBUG = BlockPackedPostingsReader.DEBUG;
|
||||||
|
|
||||||
|
// nocommit move these constants to the PF:
|
||||||
|
|
||||||
|
static final int maxSkipLevels = 10;
|
||||||
|
|
||||||
|
final static String TERMS_CODEC = "BlockPackedPostingsWriterTerms";
|
||||||
|
final static String DOC_CODEC = "BlockPackedPostingsWriterDoc";
|
||||||
|
final static String POS_CODEC = "BlockPackedPostingsWriterPos";
|
||||||
|
final static String PAY_CODEC = "BlockPackedPostingsWriterPay";
|
||||||
|
|
||||||
|
// Increment version to change it:
|
||||||
|
final static int VERSION_START = 0;
|
||||||
|
final static int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
final IndexOutput docOut;
|
||||||
|
final IndexOutput posOut;
|
||||||
|
final IndexOutput payOut;
|
||||||
|
|
||||||
|
static final int DEFAULT_BLOCK_SIZE = 128;
|
||||||
|
|
||||||
|
final int blockSize;
|
||||||
|
|
||||||
|
private IndexOutput termsOut;
|
||||||
|
|
||||||
|
// How current field indexes postings:
|
||||||
|
private boolean fieldHasFreqs;
|
||||||
|
private boolean fieldHasPositions;
|
||||||
|
private boolean fieldHasOffsets;
|
||||||
|
private boolean fieldHasPayloads;
|
||||||
|
|
||||||
|
// Holds starting file pointers for each term:
|
||||||
|
private long docTermStartFP;
|
||||||
|
private long posTermStartFP;
|
||||||
|
private long payTermStartFP;
|
||||||
|
|
||||||
|
final long[] docDeltaBuffer;
|
||||||
|
final long[] freqBuffer;
|
||||||
|
final LongBuffer docDeltaLBuffer;
|
||||||
|
final LongBuffer freqLBuffer;
|
||||||
|
private int docBufferUpto;
|
||||||
|
|
||||||
|
final long[] posDeltaBuffer;
|
||||||
|
final long[] payloadLengthBuffer;
|
||||||
|
final long[] offsetStartDeltaBuffer;
|
||||||
|
final long[] offsetLengthBuffer;
|
||||||
|
final LongBuffer posDeltaLBuffer;
|
||||||
|
final LongBuffer payloadLengthLBuffer;
|
||||||
|
final LongBuffer offsetStartDeltaLBuffer;
|
||||||
|
final LongBuffer offsetLengthLBuffer;
|
||||||
|
private int posBufferUpto;
|
||||||
|
|
||||||
|
private byte[] payloadBytes;
|
||||||
|
private int payloadByteUpto;
|
||||||
|
|
||||||
|
private int lastBlockDocID;
|
||||||
|
private boolean saveNextPosBlock;
|
||||||
|
private long lastBlockPosFP;
|
||||||
|
private long lastBlockPayFP;
|
||||||
|
private int lastBlockPosBufferUpto;
|
||||||
|
private int lastBlockEndOffset;
|
||||||
|
private int lastBlockPayloadByteUpto;
|
||||||
|
private int lastDocID;
|
||||||
|
private int lastPosition;
|
||||||
|
private int lastEndOffset;
|
||||||
|
private int docCount;
|
||||||
|
|
||||||
|
final byte[] encoded;
|
||||||
|
final LongBuffer encodedBuffer;
|
||||||
|
|
||||||
|
private final BlockPackedSkipWriter skipWriter;
|
||||||
|
|
||||||
|
public BlockPackedPostingsWriter(SegmentWriteState state, int blockSize) throws IOException {
|
||||||
|
super();
|
||||||
|
this.blockSize = blockSize;
|
||||||
|
|
||||||
|
docOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPackedPostingsFormat.DOC_EXTENSION),
|
||||||
|
state.context);
|
||||||
|
IndexOutput posOut = null;
|
||||||
|
IndexOutput payOut = null;
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT);
|
||||||
|
if (state.fieldInfos.hasProx()) {
|
||||||
|
posDeltaBuffer = new long[blockSize];
|
||||||
|
posDeltaLBuffer = LongBuffer.wrap(posDeltaBuffer);
|
||||||
|
posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPackedPostingsFormat.POS_EXTENSION),
|
||||||
|
state.context);
|
||||||
|
CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);
|
||||||
|
|
||||||
|
if (state.fieldInfos.hasPayloads()) {
|
||||||
|
payloadBytes = new byte[128];
|
||||||
|
payloadLengthBuffer = new long[blockSize];
|
||||||
|
payloadLengthLBuffer = LongBuffer.wrap(payloadLengthBuffer);
|
||||||
|
} else {
|
||||||
|
payloadBytes = null;
|
||||||
|
payloadLengthBuffer = null;
|
||||||
|
payloadLengthLBuffer = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state.fieldInfos.hasOffsets()) {
|
||||||
|
offsetStartDeltaBuffer = new long[blockSize];
|
||||||
|
offsetLengthBuffer = new long[blockSize];
|
||||||
|
offsetStartDeltaLBuffer = LongBuffer.wrap(offsetStartDeltaBuffer);
|
||||||
|
offsetLengthLBuffer = LongBuffer.wrap(offsetLengthBuffer);
|
||||||
|
} else {
|
||||||
|
offsetStartDeltaBuffer = null;
|
||||||
|
offsetLengthBuffer = null;
|
||||||
|
offsetStartDeltaLBuffer = null;
|
||||||
|
offsetLengthLBuffer = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state.fieldInfos.hasPayloads() || state.fieldInfos.hasOffsets()) {
|
||||||
|
payOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPackedPostingsFormat.PAY_EXTENSION),
|
||||||
|
state.context);
|
||||||
|
CodecUtil.writeHeader(payOut, PAY_CODEC, VERSION_CURRENT);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
posDeltaBuffer = null;
|
||||||
|
payloadLengthBuffer = null;
|
||||||
|
offsetStartDeltaBuffer = null;
|
||||||
|
offsetLengthBuffer = null;
|
||||||
|
payloadBytes = null;
|
||||||
|
posDeltaLBuffer = null;
|
||||||
|
payloadLengthLBuffer = null;
|
||||||
|
offsetStartDeltaLBuffer = null;
|
||||||
|
offsetLengthLBuffer = null;
|
||||||
|
}
|
||||||
|
this.payOut = payOut;
|
||||||
|
this.posOut = posOut;
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(docOut, posOut, payOut);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
docDeltaBuffer = new long[blockSize];
|
||||||
|
freqBuffer = new long[blockSize];
|
||||||
|
docDeltaLBuffer = LongBuffer.wrap(docDeltaBuffer);
|
||||||
|
freqLBuffer = LongBuffer.wrap(freqBuffer);
|
||||||
|
|
||||||
|
skipWriter = new BlockPackedSkipWriter(blockSize,
|
||||||
|
maxSkipLevels,
|
||||||
|
state.segmentInfo.getDocCount(),
|
||||||
|
docOut,
|
||||||
|
posOut,
|
||||||
|
payOut);
|
||||||
|
|
||||||
|
encoded = new byte[blockSize*4];
|
||||||
|
encodedBuffer = ByteBuffer.wrap(encoded).asLongBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void start(IndexOutput termsOut) throws IOException {
|
||||||
|
this.termsOut = termsOut;
|
||||||
|
CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT);
|
||||||
|
termsOut.writeVInt(blockSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setField(FieldInfo fieldInfo) {
|
||||||
|
IndexOptions indexOptions = fieldInfo.getIndexOptions();
|
||||||
|
fieldHasFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||||
|
fieldHasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
fieldHasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
fieldHasPayloads = fieldInfo.hasPayloads();
|
||||||
|
skipWriter.setField(fieldHasPositions, fieldHasOffsets, fieldHasPayloads);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startTerm() {
|
||||||
|
docTermStartFP = docOut.getFilePointer();
|
||||||
|
if (fieldHasPositions) {
|
||||||
|
posTermStartFP = posOut.getFilePointer();
|
||||||
|
if (fieldHasPayloads || fieldHasOffsets) {
|
||||||
|
payTermStartFP = payOut.getFilePointer();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastBlockDocID = -1;
|
||||||
|
lastDocID = 0;
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println("FPW.startTerm startFP=" + docTermStartFP);
|
||||||
|
}
|
||||||
|
skipWriter.resetSkip();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeBlock(LongBuffer buffer, IndexOutput out) throws IOException {
|
||||||
|
final int header = ForUtil.compress(buffer, encodedBuffer);
|
||||||
|
out.writeVInt(header);
|
||||||
|
out.writeBytes(encoded, ForUtil.getEncodedSize(header));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startDoc(int docID, int termDocFreq) throws IOException {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println("FPW.startDoc docID=" + docID);
|
||||||
|
}
|
||||||
|
|
||||||
|
// nocommit do this in finishDoc... but does it fail...?
|
||||||
|
// is it not always called...?
|
||||||
|
if (posOut != null && saveNextPosBlock) {
|
||||||
|
lastBlockPosFP = posOut.getFilePointer();
|
||||||
|
if (payOut != null) {
|
||||||
|
lastBlockPayFP = payOut.getFilePointer();
|
||||||
|
}
|
||||||
|
lastBlockPosBufferUpto = posBufferUpto;
|
||||||
|
lastBlockEndOffset = lastEndOffset;
|
||||||
|
lastBlockPayloadByteUpto = payloadByteUpto;
|
||||||
|
saveNextPosBlock = false;
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" now save lastBlockPosFP=" + lastBlockPosFP + " lastBlockPosBufferUpto=" + lastBlockPosBufferUpto + " lastBlockPayloadByteUpto=" + lastBlockPayloadByteUpto);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final int docDelta = docID - lastDocID;
|
||||||
|
if (docID < 0 || (docCount > 0 && docDelta <= 0)) {
|
||||||
|
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (docOut: " + docOut + ")");
|
||||||
|
}
|
||||||
|
lastDocID = docID;
|
||||||
|
|
||||||
|
docDeltaBuffer[docBufferUpto] = docDelta;
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" docDeltaBuffer[" + docBufferUpto + "]=" + docDelta);
|
||||||
|
}
|
||||||
|
if (fieldHasFreqs) {
|
||||||
|
freqBuffer[docBufferUpto] = termDocFreq;
|
||||||
|
}
|
||||||
|
|
||||||
|
docBufferUpto++;
|
||||||
|
docCount++;
|
||||||
|
|
||||||
|
if (docBufferUpto == blockSize) {
|
||||||
|
// nocommit maybe instead of buffering skip before
|
||||||
|
// writing a block based on last block's end data
|
||||||
|
// ... we could buffer after writing the block? only
|
||||||
|
// iffiness with that approach is it could be a
|
||||||
|
// pointlness skip? like we may stop adding docs
|
||||||
|
// right after that, then we have skip point AFTER
|
||||||
|
// last doc. the thing is, in finishTerm we are
|
||||||
|
// already sometimes adding a skip point AFTER the
|
||||||
|
// last doc?
|
||||||
|
if (lastBlockDocID != -1) {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" bufferSkip at writeBlock: lastDocID=" + lastBlockDocID + " docCount=" + (docCount-blockSize));
|
||||||
|
}
|
||||||
|
skipWriter.bufferSkip(lastBlockDocID, docCount-blockSize, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockEndOffset, lastBlockPayloadByteUpto);
|
||||||
|
}
|
||||||
|
lastBlockDocID = docID;
|
||||||
|
saveNextPosBlock = true;
|
||||||
|
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" write docDelta block @ fp=" + docOut.getFilePointer());
|
||||||
|
}
|
||||||
|
writeBlock(docDeltaLBuffer, docOut);
|
||||||
|
if (fieldHasFreqs) {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" write freq block @ fp=" + docOut.getFilePointer());
|
||||||
|
}
|
||||||
|
writeBlock(freqLBuffer, docOut);
|
||||||
|
}
|
||||||
|
docBufferUpto = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
lastPosition = 0;
|
||||||
|
lastEndOffset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Add a new position & payload */
|
||||||
|
@Override
|
||||||
|
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println("FPW.addPosition pos=" + position + " posBufferUpto=" + posBufferUpto + (fieldHasPayloads ? " payloadByteUpto=" + payloadByteUpto: ""));
|
||||||
|
}
|
||||||
|
posDeltaBuffer[posBufferUpto] = position - lastPosition;
|
||||||
|
if (fieldHasPayloads) {
|
||||||
|
if (payload == null || payload.length == 0) {
|
||||||
|
// no payload
|
||||||
|
payloadLengthBuffer[posBufferUpto] = 0;
|
||||||
|
} else {
|
||||||
|
payloadLengthBuffer[posBufferUpto] = payload.length;
|
||||||
|
if (payloadByteUpto + payload.length > payloadBytes.length) {
|
||||||
|
payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payload.length);
|
||||||
|
}
|
||||||
|
System.arraycopy(payload.bytes, payload.offset, payloadBytes, payloadByteUpto, payload.length);
|
||||||
|
payloadByteUpto += payload.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldHasOffsets) {
|
||||||
|
assert startOffset >= lastEndOffset;
|
||||||
|
assert endOffset >= startOffset;
|
||||||
|
offsetStartDeltaBuffer[posBufferUpto] = startOffset - lastEndOffset;
|
||||||
|
offsetLengthBuffer[posBufferUpto] = endOffset - startOffset;
|
||||||
|
lastEndOffset = endOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
posBufferUpto++;
|
||||||
|
lastPosition = position;
|
||||||
|
if (posBufferUpto == blockSize) {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" write pos bulk block @ fp=" + posOut.getFilePointer());
|
||||||
|
}
|
||||||
|
writeBlock(posDeltaLBuffer, posOut);
|
||||||
|
|
||||||
|
if (fieldHasPayloads) {
|
||||||
|
writeBlock(payloadLengthLBuffer, payOut);
|
||||||
|
payOut.writeVInt(payloadByteUpto);
|
||||||
|
payOut.writeBytes(payloadBytes, 0, payloadByteUpto);
|
||||||
|
payloadByteUpto = 0;
|
||||||
|
}
|
||||||
|
if (fieldHasOffsets) {
|
||||||
|
writeBlock(offsetStartDeltaLBuffer, payOut);
|
||||||
|
writeBlock(offsetLengthLBuffer, payOut);
|
||||||
|
}
|
||||||
|
posBufferUpto = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finishDoc() {
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class PendingTerm {
|
||||||
|
public final long docStartFP;
|
||||||
|
public final long posStartFP;
|
||||||
|
public final long payStartFP;
|
||||||
|
public final int skipOffset;
|
||||||
|
public final int lastPosBlockOffset;
|
||||||
|
|
||||||
|
public PendingTerm(long docStartFP, long posStartFP, long payStartFP, int skipOffset, int lastPosBlockOffset) {
|
||||||
|
this.docStartFP = docStartFP;
|
||||||
|
this.posStartFP = posStartFP;
|
||||||
|
this.payStartFP = payStartFP;
|
||||||
|
this.skipOffset = skipOffset;
|
||||||
|
this.lastPosBlockOffset = lastPosBlockOffset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final List<PendingTerm> pendingTerms = new ArrayList<PendingTerm>();
|
||||||
|
|
||||||
|
/** Called when we are done adding docs to this term */
|
||||||
|
@Override
|
||||||
|
public void finishTerm(TermStats stats) throws IOException {
|
||||||
|
|
||||||
|
assert stats.docFreq > 0;
|
||||||
|
|
||||||
|
// TODO: wasteful we are counting this (counting # docs
|
||||||
|
// for this term) in two places?
|
||||||
|
assert stats.docFreq == docCount: stats.docFreq + " vs " + docCount;
|
||||||
|
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println("FPW.finishTerm docFreq=" + stats.docFreq);
|
||||||
|
}
|
||||||
|
|
||||||
|
// nocommit silly that skipper must write skip when we no
|
||||||
|
// postings come after it, but if we don't do this, skip
|
||||||
|
// reader incorrectly thinks it can read another level 0
|
||||||
|
// skip entry here!:
|
||||||
|
//if (docCount > blockSize && docBufferUpto > 0) {
|
||||||
|
if (docCount > blockSize) {
|
||||||
|
final int lastDocCount = blockSize*(docCount/blockSize);
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" bufferSkip at finishTerm: lastDocID=" + lastBlockDocID + " docCount=" + lastDocCount);
|
||||||
|
}
|
||||||
|
skipWriter.bufferSkip(lastBlockDocID, lastDocCount, lastBlockPosFP, lastBlockPayFP, lastBlockPosBufferUpto, lastBlockEndOffset, lastBlockPayloadByteUpto);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DEBUG) {
|
||||||
|
if (docBufferUpto > 0) {
|
||||||
|
System.out.println(" write doc/freq vInt block (count=" + docBufferUpto + ") at fp=" + docOut.getFilePointer() + " docTermStartFP=" + docTermStartFP);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// vInt encode the remaining doc deltas and freqs:
|
||||||
|
for(int i=0;i<docBufferUpto;i++) {
|
||||||
|
final int docDelta = (int)docDeltaBuffer[i];
|
||||||
|
final int freq = (int)freqBuffer[i];
|
||||||
|
if (!fieldHasFreqs) {
|
||||||
|
docOut.writeVInt(docDelta);
|
||||||
|
} else if (freqBuffer[i] == 1) {
|
||||||
|
docOut.writeVInt((docDelta<<1)|1);
|
||||||
|
} else {
|
||||||
|
docOut.writeVInt(docDelta<<1);
|
||||||
|
docOut.writeVInt(freq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final int lastPosBlockOffset;
|
||||||
|
|
||||||
|
if (fieldHasPositions) {
|
||||||
|
if (DEBUG) {
|
||||||
|
if (posBufferUpto > 0) {
|
||||||
|
System.out.println(" write pos vInt block (count=" + posBufferUpto + ") at fp=" + posOut.getFilePointer() + " posTermStartFP=" + posTermStartFP + " hasPayloads=" + fieldHasPayloads + " hasOffsets=" + fieldHasOffsets);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert stats.totalTermFreq != -1;
|
||||||
|
if (stats.totalTermFreq > blockSize) {
|
||||||
|
lastPosBlockOffset = (int) (posOut.getFilePointer() - posTermStartFP);
|
||||||
|
} else {
|
||||||
|
lastPosBlockOffset = -1;
|
||||||
|
}
|
||||||
|
if (posBufferUpto > 0) {
|
||||||
|
posOut.writeVInt(posBufferUpto);
|
||||||
|
|
||||||
|
// nocommit should we send offsets/payloads to
|
||||||
|
// .pay...? seems wasteful (have to store extra
|
||||||
|
// vLong for low (< blockSize) DF terms = vast vast
|
||||||
|
// majority)
|
||||||
|
|
||||||
|
// vInt encode the remaining positions/payloads/offsets:
|
||||||
|
int lastPayloadLength = -1;
|
||||||
|
int payloadBytesReadUpto = 0;
|
||||||
|
for(int i=0;i<posBufferUpto;i++) {
|
||||||
|
final int posDelta = (int)posDeltaBuffer[i];
|
||||||
|
if (fieldHasPayloads) {
|
||||||
|
final int payloadLength = (int)payloadLengthBuffer[i];
|
||||||
|
if (payloadLength != lastPayloadLength) {
|
||||||
|
lastPayloadLength = payloadLength;
|
||||||
|
posOut.writeVInt((posDelta<<1)|1);
|
||||||
|
posOut.writeVInt(payloadLength);
|
||||||
|
} else {
|
||||||
|
posOut.writeVInt(posDelta<<1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" i=" + i + " payloadLen=" + payloadLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payloadLength != 0) {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" write payload @ pos.fp=" + posOut.getFilePointer());
|
||||||
|
}
|
||||||
|
posOut.writeBytes(payloadBytes, payloadBytesReadUpto, payloadLength);
|
||||||
|
payloadBytesReadUpto += payloadLength;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
posOut.writeVInt(posDelta);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldHasOffsets) {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" write offset @ pos.fp=" + posOut.getFilePointer());
|
||||||
|
}
|
||||||
|
posOut.writeVInt((int)offsetStartDeltaBuffer[i]);
|
||||||
|
posOut.writeVInt((int)offsetLengthBuffer[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldHasPayloads) {
|
||||||
|
assert payloadBytesReadUpto == payloadByteUpto;
|
||||||
|
payloadByteUpto = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" totalTermFreq=" + stats.totalTermFreq + " lastPosBlockOffset=" + lastPosBlockOffset);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
lastPosBlockOffset = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int skipOffset;
|
||||||
|
if (docCount > blockSize) {
|
||||||
|
skipOffset = (int) (skipWriter.writeSkip(docOut)-docTermStartFP);
|
||||||
|
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println("skip packet " + (docOut.getFilePointer() - (docTermStartFP + skipOffset)) + " bytes");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
skipOffset = -1;
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" no skip: docCount=" + docCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
long payStartFP;
|
||||||
|
if (stats.totalTermFreq >= blockSize) {
|
||||||
|
payStartFP = payTermStartFP;
|
||||||
|
} else {
|
||||||
|
payStartFP = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" payStartFP=" + payStartFP);
|
||||||
|
}
|
||||||
|
|
||||||
|
pendingTerms.add(new PendingTerm(docTermStartFP, posTermStartFP, payStartFP, skipOffset, lastPosBlockOffset));
|
||||||
|
docBufferUpto = 0;
|
||||||
|
posBufferUpto = 0;
|
||||||
|
lastDocID = 0;
|
||||||
|
docCount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final RAMOutputStream bytesWriter = new RAMOutputStream();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void flushTermsBlock(int start, int count) throws IOException {
|
||||||
|
|
||||||
|
if (count == 0) {
|
||||||
|
termsOut.writeByte((byte) 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert start <= pendingTerms.size();
|
||||||
|
assert count <= start;
|
||||||
|
|
||||||
|
final int limit = pendingTerms.size() - start + count;
|
||||||
|
|
||||||
|
long lastDocStartFP = 0;
|
||||||
|
long lastPosStartFP = 0;
|
||||||
|
long lastPayStartFP = 0;
|
||||||
|
for(int idx=limit-count; idx<limit; idx++) {
|
||||||
|
PendingTerm term = pendingTerms.get(idx);
|
||||||
|
|
||||||
|
bytesWriter.writeVLong(term.docStartFP - lastDocStartFP);
|
||||||
|
lastDocStartFP = term.docStartFP;
|
||||||
|
|
||||||
|
if (fieldHasPositions) {
|
||||||
|
bytesWriter.writeVLong(term.posStartFP - lastPosStartFP);
|
||||||
|
lastPosStartFP = term.posStartFP;
|
||||||
|
if (term.lastPosBlockOffset != -1) {
|
||||||
|
bytesWriter.writeVInt(term.lastPosBlockOffset);
|
||||||
|
}
|
||||||
|
if ((fieldHasPayloads || fieldHasOffsets) && term.payStartFP != -1) {
|
||||||
|
bytesWriter.writeVLong(term.payStartFP - lastPayStartFP);
|
||||||
|
lastPayStartFP = term.payStartFP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (term.skipOffset != -1) {
|
||||||
|
bytesWriter.writeVInt(term.skipOffset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
termsOut.writeVInt((int) bytesWriter.getFilePointer());
|
||||||
|
bytesWriter.writeTo(termsOut);
|
||||||
|
bytesWriter.reset();
|
||||||
|
|
||||||
|
// Remove the terms we just wrote:
|
||||||
|
pendingTerms.subList(limit-count, limit).clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
IOUtils.close(docOut, posOut, payOut);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,205 @@
|
||||||
|
package org.apache.lucene.codecs.blockpacked;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.MultiLevelSkipListReader;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements the skip list reader for the 4.0 posting list format
|
||||||
|
* that stores positions and payloads.
|
||||||
|
*
|
||||||
|
* @see Lucene40PostingsFormat
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
final class BlockPackedSkipReader extends MultiLevelSkipListReader {
|
||||||
|
private boolean DEBUG = BlockPackedPostingsReader.DEBUG;
|
||||||
|
|
||||||
|
private long docPointer[];
|
||||||
|
private long posPointer[];
|
||||||
|
private long payPointer[];
|
||||||
|
private int posBufferUpto[];
|
||||||
|
private int endOffset[];
|
||||||
|
private int payloadByteUpto[];
|
||||||
|
|
||||||
|
private long lastPosPointer;
|
||||||
|
private long lastPayPointer;
|
||||||
|
private int lastEndOffset;
|
||||||
|
private int lastPayloadByteUpto;
|
||||||
|
private long lastDocPointer;
|
||||||
|
private int lastPosBufferUpto;
|
||||||
|
|
||||||
|
public BlockPackedSkipReader(IndexInput skipStream, int maxSkipLevels, int skipInterval, boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
|
||||||
|
super(skipStream, maxSkipLevels, skipInterval);
|
||||||
|
docPointer = new long[maxSkipLevels];
|
||||||
|
if (hasPos) {
|
||||||
|
posPointer = new long[maxSkipLevels];
|
||||||
|
posBufferUpto = new int[maxSkipLevels];
|
||||||
|
if (hasPayloads) {
|
||||||
|
payloadByteUpto = new int[maxSkipLevels];
|
||||||
|
} else {
|
||||||
|
payloadByteUpto = null;
|
||||||
|
}
|
||||||
|
if (hasOffsets) {
|
||||||
|
endOffset = new int[maxSkipLevels];
|
||||||
|
} else {
|
||||||
|
endOffset = null;
|
||||||
|
}
|
||||||
|
if (hasOffsets || hasPayloads) {
|
||||||
|
payPointer = new long[maxSkipLevels];
|
||||||
|
} else {
|
||||||
|
payPointer = null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
posPointer = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void init(long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df) {
|
||||||
|
super.init(skipPointer, df);
|
||||||
|
lastDocPointer = docBasePointer;
|
||||||
|
lastPosPointer = posBasePointer;
|
||||||
|
lastPayPointer = payBasePointer;
|
||||||
|
|
||||||
|
Arrays.fill(docPointer, docBasePointer);
|
||||||
|
if (posPointer != null) {
|
||||||
|
Arrays.fill(posPointer, posBasePointer);
|
||||||
|
if (payPointer != null) {
|
||||||
|
Arrays.fill(payPointer, payBasePointer);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
assert posBasePointer == 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the doc pointer of the doc to which the last call of
|
||||||
|
* {@link MultiLevelSkipListReader#skipTo(int)} has skipped. */
|
||||||
|
public long getDocPointer() {
|
||||||
|
return lastDocPointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getPosPointer() {
|
||||||
|
return lastPosPointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getPosBufferUpto() {
|
||||||
|
return lastPosBufferUpto;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getPayPointer() {
|
||||||
|
return lastPayPointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getEndOffset() {
|
||||||
|
return lastEndOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getPayloadByteUpto() {
|
||||||
|
return lastPayloadByteUpto;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void seekChild(int level) throws IOException {
|
||||||
|
super.seekChild(level);
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println("seekChild level=" + level);
|
||||||
|
}
|
||||||
|
docPointer[level] = lastDocPointer;
|
||||||
|
if (posPointer != null) {
|
||||||
|
posPointer[level] = lastPosPointer;
|
||||||
|
posBufferUpto[level] = lastPosBufferUpto;
|
||||||
|
if (endOffset != null) {
|
||||||
|
endOffset[level] = lastEndOffset;
|
||||||
|
}
|
||||||
|
if (payloadByteUpto != null) {
|
||||||
|
payloadByteUpto[level] = lastPayloadByteUpto;
|
||||||
|
}
|
||||||
|
if (payPointer != null) {
|
||||||
|
payPointer[level] = lastPayPointer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setLastSkipData(int level) {
|
||||||
|
super.setLastSkipData(level);
|
||||||
|
lastDocPointer = docPointer[level];
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println("setLastSkipData level=" + level);
|
||||||
|
System.out.println(" lastDocPointer=" + lastDocPointer);
|
||||||
|
}
|
||||||
|
if (posPointer != null) {
|
||||||
|
lastPosPointer = posPointer[level];
|
||||||
|
lastPosBufferUpto = posBufferUpto[level];
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" lastPosPointer=" + lastPosPointer + " lastPosBUfferUpto=" + lastPosBufferUpto);
|
||||||
|
}
|
||||||
|
if (payPointer != null) {
|
||||||
|
lastPayPointer = payPointer[level];
|
||||||
|
}
|
||||||
|
if (endOffset != null) {
|
||||||
|
lastEndOffset = endOffset[level];
|
||||||
|
}
|
||||||
|
if (payloadByteUpto != null) {
|
||||||
|
lastPayloadByteUpto = payloadByteUpto[level];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println("readSkipData level=" + level);
|
||||||
|
}
|
||||||
|
int delta = skipStream.readVInt();
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" delta=" + delta);
|
||||||
|
}
|
||||||
|
docPointer[level] += skipStream.readVInt();
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" docFP=" + docPointer[level]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (posPointer != null) {
|
||||||
|
posPointer[level] += skipStream.readVInt();
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" posFP=" + posPointer[level]);
|
||||||
|
}
|
||||||
|
posBufferUpto[level] = skipStream.readVInt();
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" posBufferUpto=" + posBufferUpto[level]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payloadByteUpto != null) {
|
||||||
|
payloadByteUpto[level] = skipStream.readVInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (endOffset != null) {
|
||||||
|
endOffset[level] += skipStream.readVInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (payPointer != null) {
|
||||||
|
payPointer[level] += skipStream.readVInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return delta;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,147 @@
|
||||||
|
package org.apache.lucene.codecs.blockpacked;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
|
||||||
|
|
||||||
|
// nocommit do we need more frequent skips at level > 0?
|
||||||
|
// 128*128 is immense? may need to decouple
|
||||||
|
// baseSkipInterval & theRestSkipInterval?
|
||||||
|
|
||||||
|
final class BlockPackedSkipWriter extends MultiLevelSkipListWriter {
|
||||||
|
private boolean DEBUG = BlockPackedPostingsReader.DEBUG;
|
||||||
|
|
||||||
|
private int[] lastSkipDoc;
|
||||||
|
private long[] lastSkipDocPointer;
|
||||||
|
private long[] lastSkipPosPointer;
|
||||||
|
private long[] lastSkipPayPointer;
|
||||||
|
private int[] lastEndOffset;
|
||||||
|
private int[] lastPayloadByteUpto;
|
||||||
|
|
||||||
|
private final IndexOutput docOut;
|
||||||
|
private final IndexOutput posOut;
|
||||||
|
private final IndexOutput payOut;
|
||||||
|
|
||||||
|
private int curDoc;
|
||||||
|
private long curDocPointer;
|
||||||
|
private long curPosPointer;
|
||||||
|
private long curPayPointer;
|
||||||
|
private int curPosBufferUpto;
|
||||||
|
private int curEndOffset;
|
||||||
|
private int curPayloadByteUpto;
|
||||||
|
private boolean fieldHasPositions;
|
||||||
|
private boolean fieldHasOffsets;
|
||||||
|
private boolean fieldHasPayloads;
|
||||||
|
|
||||||
|
public BlockPackedSkipWriter(int skipInterval, int maxSkipLevels, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) {
|
||||||
|
super(skipInterval, maxSkipLevels, docCount);
|
||||||
|
this.docOut = docOut;
|
||||||
|
this.posOut = posOut;
|
||||||
|
this.payOut = payOut;
|
||||||
|
|
||||||
|
lastSkipDoc = new int[maxSkipLevels];
|
||||||
|
lastSkipDocPointer = new long[maxSkipLevels];
|
||||||
|
if (posOut != null) {
|
||||||
|
lastSkipPosPointer = new long[maxSkipLevels];
|
||||||
|
if (payOut != null) {
|
||||||
|
lastSkipPayPointer = new long[maxSkipLevels];
|
||||||
|
}
|
||||||
|
lastEndOffset = new int[maxSkipLevels];
|
||||||
|
lastPayloadByteUpto = new int[maxSkipLevels];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setField(boolean fieldHasPositions, boolean fieldHasOffsets, boolean fieldHasPayloads) {
|
||||||
|
this.fieldHasPositions = fieldHasPositions;
|
||||||
|
this.fieldHasOffsets = fieldHasOffsets;
|
||||||
|
this.fieldHasPayloads = fieldHasPayloads;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void resetSkip() {
|
||||||
|
super.resetSkip();
|
||||||
|
Arrays.fill(lastSkipDoc, 0);
|
||||||
|
Arrays.fill(lastSkipDocPointer, docOut.getFilePointer());
|
||||||
|
if (fieldHasPositions) {
|
||||||
|
Arrays.fill(lastSkipPosPointer, posOut.getFilePointer());
|
||||||
|
if (fieldHasOffsets) {
|
||||||
|
Arrays.fill(lastEndOffset, 0);
|
||||||
|
}
|
||||||
|
if (fieldHasPayloads) {
|
||||||
|
Arrays.fill(lastPayloadByteUpto, 0);
|
||||||
|
}
|
||||||
|
if (fieldHasOffsets || fieldHasPayloads) {
|
||||||
|
Arrays.fill(lastSkipPayPointer, payOut.getFilePointer());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the values for the current skip data.
|
||||||
|
*/
|
||||||
|
public void bufferSkip(int doc, int numDocs, long posFP, long payFP, int posBufferUpto, int endOffset, int payloadByteUpto) throws IOException {
|
||||||
|
this.curDoc = doc;
|
||||||
|
this.curDocPointer = docOut.getFilePointer();
|
||||||
|
this.curPosPointer = posFP;
|
||||||
|
this.curPayPointer = payFP;
|
||||||
|
this.curPosBufferUpto = posBufferUpto;
|
||||||
|
this.curPayloadByteUpto = payloadByteUpto;
|
||||||
|
this.curEndOffset = endOffset;
|
||||||
|
bufferSkip(numDocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
|
||||||
|
int delta = curDoc - lastSkipDoc[level];
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println("writeSkipData level=" + level + " lastDoc=" + curDoc + " delta=" + delta + " curDocPointer=" + curDocPointer);
|
||||||
|
}
|
||||||
|
skipBuffer.writeVInt(delta);
|
||||||
|
lastSkipDoc[level] = curDoc;
|
||||||
|
|
||||||
|
skipBuffer.writeVInt((int) (curDocPointer - lastSkipDocPointer[level]));
|
||||||
|
lastSkipDocPointer[level] = curDocPointer;
|
||||||
|
|
||||||
|
if (fieldHasPositions) {
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println(" curPosPointer=" + curPosPointer + " curPosBufferUpto=" + curPosBufferUpto);
|
||||||
|
}
|
||||||
|
skipBuffer.writeVInt((int) (curPosPointer - lastSkipPosPointer[level]));
|
||||||
|
lastSkipPosPointer[level] = curPosPointer;
|
||||||
|
skipBuffer.writeVInt(curPosBufferUpto);
|
||||||
|
|
||||||
|
if (fieldHasPayloads) {
|
||||||
|
skipBuffer.writeVInt(curPayloadByteUpto);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldHasOffsets) {
|
||||||
|
skipBuffer.writeVInt(curEndOffset - lastEndOffset[level]);
|
||||||
|
lastEndOffset[level] = curEndOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldHasOffsets || fieldHasPayloads) {
|
||||||
|
skipBuffer.writeVInt((int) (curPayPointer - lastSkipPayPointer[level]));
|
||||||
|
lastSkipPayPointer[level] = curPayPointer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,130 @@
|
||||||
|
package org.apache.lucene.codecs.blockpacked;
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.LongBuffer;
|
||||||
|
import java.nio.IntBuffer;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Reader;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Writer;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Mutable;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Encoder;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts.Decoder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode all values in normal area with fixed bit width,
|
||||||
|
* which is determined by the max value in this block.
|
||||||
|
*/
|
||||||
|
public class ForUtil {
|
||||||
|
protected static final int[] MASK = { 0x00000000,
|
||||||
|
0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
|
||||||
|
0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
|
||||||
|
0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff,
|
||||||
|
0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
|
||||||
|
0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
|
||||||
|
0x7fffffff, 0xffffffff};
|
||||||
|
|
||||||
|
/** Compress given int[] into output stream, with For format
|
||||||
|
*/
|
||||||
|
public static int compress(final LongBuffer data, LongBuffer packed) throws IOException {
|
||||||
|
int numBits=getNumBits(data.array());
|
||||||
|
|
||||||
|
if (numBits == 0) { // when block is equal, save the value once
|
||||||
|
packed.put(0, data.get(0)<<32); // java uses big endian for LongBuffer impl
|
||||||
|
return (getHeader(1,numBits));
|
||||||
|
}
|
||||||
|
|
||||||
|
PackedInts.Format format = PackedInts.fastestFormatAndBits(128, numBits, PackedInts.FASTEST).format;
|
||||||
|
PackedInts.Encoder encoder = PackedInts.getEncoder(format, PackedInts.VERSION_CURRENT, numBits);
|
||||||
|
int perIter = encoder.values();
|
||||||
|
int iters = 128/perIter;
|
||||||
|
int nblocks = encoder.blocks()*iters;
|
||||||
|
assert 128 % perIter == 0;
|
||||||
|
|
||||||
|
packed.rewind();
|
||||||
|
data.rewind();
|
||||||
|
|
||||||
|
encoder.encode(data, packed, iters);
|
||||||
|
|
||||||
|
int encodedSize = nblocks*2;
|
||||||
|
return getHeader(encodedSize,numBits);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Decompress given ouput stream into int array.
|
||||||
|
*/
|
||||||
|
public static void decompress(LongBuffer data, LongBuffer packed, int header) throws IOException {
|
||||||
|
// nocommit assert header isn't "malformed", ie besides
|
||||||
|
// numBytes / bit-width there is nothing else!
|
||||||
|
|
||||||
|
packed.rewind();
|
||||||
|
data.rewind();
|
||||||
|
int numBits = ((header >> 8) & MASK[6]);
|
||||||
|
|
||||||
|
if (numBits == 0) {
|
||||||
|
Arrays.fill(data.array(), (int)(packed.get(0)>>>32));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
PackedInts.Format format = PackedInts.fastestFormatAndBits(128, numBits, PackedInts.FASTEST).format;
|
||||||
|
PackedInts.Decoder decoder = PackedInts.getDecoder(format, PackedInts.VERSION_CURRENT, numBits);
|
||||||
|
int perIter = decoder.values();
|
||||||
|
int iters = 128/perIter;
|
||||||
|
int nblocks = decoder.blocks()*iters;
|
||||||
|
assert 128 % perIter == 0;
|
||||||
|
|
||||||
|
decoder.decode(packed, data, iters);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int getNumBits(final long[] data) {
|
||||||
|
if (isAllEqual(data)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int size=data.length;
|
||||||
|
int optBits=1;
|
||||||
|
for (int i=0; i<size; ++i) {
|
||||||
|
while ((data[i] & ~MASK[optBits]) != 0) {
|
||||||
|
optBits++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return optBits;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static boolean isAllEqual(final long[] data) {
|
||||||
|
int len = data.length;
|
||||||
|
long v = data[0];
|
||||||
|
for (int i=1; i<len; i++) {
|
||||||
|
if (data[i] != v) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
static int getHeader(int encodedSize, int numBits) {
|
||||||
|
return (encodedSize)
|
||||||
|
| ((numBits) << 8);
|
||||||
|
}
|
||||||
|
public static int getEncodedSize(int header) {
|
||||||
|
return ((header & MASK[8]))*4;
|
||||||
|
}
|
||||||
|
public static int getNumBits(int header) {
|
||||||
|
return ((header >> 8) & MASK[6]);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,128 +0,0 @@
|
||||||
package org.apache.lucene.codecs.pfor;
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.IntBuffer;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.codecs.sep.IntStreamFactory;
|
|
||||||
import org.apache.lucene.codecs.sep.IntIndexInput;
|
|
||||||
import org.apache.lucene.codecs.sep.IntIndexOutput;
|
|
||||||
import org.apache.lucene.codecs.intblock.FixedIntBlockIndexInput;
|
|
||||||
import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Used to plug to PostingsReader/WriterBase.
|
|
||||||
* Encoder and decoder in lower layers are called by
|
|
||||||
* flushBlock() and readBlock()
|
|
||||||
*/
|
|
||||||
|
|
||||||
public final class ForFactory extends IntStreamFactory {
|
|
||||||
|
|
||||||
public ForFactory() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException {
|
|
||||||
boolean success = false;
|
|
||||||
IndexOutput out = dir.createOutput(fileName, context);
|
|
||||||
try {
|
|
||||||
IntIndexOutput ret = new ForIndexOutput(out);
|
|
||||||
success = true;
|
|
||||||
return ret;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
// For some cases (e.g. disk full), the IntIndexOutput may not be
|
|
||||||
// properly created. So we should close those opened files.
|
|
||||||
IOUtils.closeWhileHandlingException(out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {
|
|
||||||
return new ForIndexInput(dir.openInput(fileName, context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Here we'll hold both input buffer and output buffer for
|
|
||||||
* encoder/decoder.
|
|
||||||
*/
|
|
||||||
private class ForIndexInput extends FixedIntBlockIndexInput {
|
|
||||||
|
|
||||||
ForIndexInput(final IndexInput in) throws IOException {
|
|
||||||
super(in);
|
|
||||||
}
|
|
||||||
|
|
||||||
class ForBlockReader implements FixedIntBlockIndexInput.BlockReader {
|
|
||||||
private final byte[] encoded;
|
|
||||||
private final int[] buffer;
|
|
||||||
private final IndexInput in;
|
|
||||||
private final IntBuffer encodedBuffer;
|
|
||||||
|
|
||||||
ForBlockReader(final IndexInput in, final int[] buffer) {
|
|
||||||
// upperbound for encoded value should include(here header is not buffered):
|
|
||||||
// blockSize of normal value when numFrameBits=32(4x bytes);
|
|
||||||
this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4];
|
|
||||||
this.in = in;
|
|
||||||
this.buffer = buffer;
|
|
||||||
this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: implement public void skipBlock() {} ?
|
|
||||||
@Override
|
|
||||||
public void readBlock() throws IOException {
|
|
||||||
final int header = in.readInt();
|
|
||||||
final int numBytes = ForUtil.getEncodedSize(header);
|
|
||||||
assert numBytes <= ForPostingsFormat.DEFAULT_BLOCK_SIZE*4;
|
|
||||||
in.readBytes(encoded,0,numBytes);
|
|
||||||
ForUtil.decompress(encodedBuffer,buffer,header);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException {
|
|
||||||
return new ForBlockReader(in,buffer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private class ForIndexOutput extends FixedIntBlockIndexOutput {
|
|
||||||
private final byte[] encoded;
|
|
||||||
private final IntBuffer encodedBuffer;
|
|
||||||
|
|
||||||
ForIndexOutput(IndexOutput out) throws IOException {
|
|
||||||
super(out,ForPostingsFormat.DEFAULT_BLOCK_SIZE);
|
|
||||||
this.encoded = new byte[ForPostingsFormat.DEFAULT_BLOCK_SIZE*4];
|
|
||||||
this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void flushBlock() throws IOException {
|
|
||||||
final int header = ForUtil.compress(buffer,encodedBuffer);
|
|
||||||
final int numBytes = ForUtil.getEncodedSize(header);
|
|
||||||
// nocommit writeVInt instead?
|
|
||||||
out.writeInt(header);
|
|
||||||
out.writeBytes(encoded, numBytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,129 +0,0 @@
|
||||||
package org.apache.lucene.codecs.pfor;
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.IntBuffer;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.codecs.sep.IntStreamFactory;
|
|
||||||
import org.apache.lucene.codecs.sep.IntIndexInput;
|
|
||||||
import org.apache.lucene.codecs.sep.IntIndexOutput;
|
|
||||||
import org.apache.lucene.codecs.intblock.FixedIntBlockIndexInput;
|
|
||||||
import org.apache.lucene.codecs.intblock.FixedIntBlockIndexOutput;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Used to plug to PostingsReader/WriterBase.
|
|
||||||
* Encoder and decoder in lower layers are called by
|
|
||||||
* flushBlock() and readBlock()
|
|
||||||
*/
|
|
||||||
|
|
||||||
public final class PForFactory extends IntStreamFactory {
|
|
||||||
|
|
||||||
public PForFactory() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException {
|
|
||||||
boolean success = false;
|
|
||||||
IndexOutput out = dir.createOutput(fileName, context);
|
|
||||||
try {
|
|
||||||
IntIndexOutput ret = new PForIndexOutput(out);
|
|
||||||
success = true;
|
|
||||||
return ret;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
// For some cases (e.g. disk full), the IntIndexOutput may not be
|
|
||||||
// properly created. So we should close those opened files.
|
|
||||||
IOUtils.closeWhileHandlingException(out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {
|
|
||||||
return new PForIndexInput(dir.openInput(fileName, context));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Here we'll hold both input buffer and output buffer for
|
|
||||||
* encoder/decoder.
|
|
||||||
*/
|
|
||||||
private class PForIndexInput extends FixedIntBlockIndexInput {
|
|
||||||
|
|
||||||
PForIndexInput(final IndexInput in) throws IOException {
|
|
||||||
super(in);
|
|
||||||
}
|
|
||||||
|
|
||||||
class PForBlockReader implements FixedIntBlockIndexInput.BlockReader {
|
|
||||||
private final byte[] encoded;
|
|
||||||
private final int[] buffer;
|
|
||||||
private final IndexInput in;
|
|
||||||
private final IntBuffer encodedBuffer;
|
|
||||||
|
|
||||||
PForBlockReader(final IndexInput in, final int[] buffer) {
|
|
||||||
// upperbound for encoded value should include(here header is not buffered):
|
|
||||||
// 1. blockSize of normal value (4x bytes);
|
|
||||||
// 2. blockSize of exception value (4x bytes);
|
|
||||||
this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8];
|
|
||||||
this.in = in;
|
|
||||||
this.buffer = buffer;
|
|
||||||
this.encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: implement public void skipBlock() {} ?
|
|
||||||
@Override
|
|
||||||
public void readBlock() throws IOException {
|
|
||||||
final int header = in.readInt();
|
|
||||||
final int numBytes = PForUtil.getEncodedSize(header);
|
|
||||||
assert numBytes <= PForPostingsFormat.DEFAULT_BLOCK_SIZE*8;
|
|
||||||
in.readBytes(encoded,0,numBytes);
|
|
||||||
PForUtil.decompress(encodedBuffer,buffer,header);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException {
|
|
||||||
return new PForBlockReader(in,buffer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private class PForIndexOutput extends FixedIntBlockIndexOutput {
|
|
||||||
private final byte[] encoded;
|
|
||||||
private final IntBuffer encodedBuffer;
|
|
||||||
|
|
||||||
PForIndexOutput(IndexOutput out) throws IOException {
|
|
||||||
super(out, PForPostingsFormat.DEFAULT_BLOCK_SIZE);
|
|
||||||
this.encoded = new byte[PForPostingsFormat.DEFAULT_BLOCK_SIZE*8];
|
|
||||||
this.encodedBuffer=ByteBuffer.wrap(encoded).asIntBuffer();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void flushBlock() throws IOException {
|
|
||||||
final int header = PForUtil.compress(buffer,encodedBuffer);
|
|
||||||
final int numBytes = PForUtil.getEncodedSize(header);
|
|
||||||
// nocommit writeVInt instead?
|
|
||||||
out.writeInt(header);
|
|
||||||
out.writeBytes(encoded, numBytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,115 +0,0 @@
|
||||||
package org.apache.lucene.codecs.pfor;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
|
||||||
import org.apache.lucene.codecs.BlockTreeTermsWriter;
|
|
||||||
import org.apache.lucene.codecs.FieldsConsumer;
|
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
|
||||||
import org.apache.lucene.codecs.FixedGapTermsIndexReader;
|
|
||||||
import org.apache.lucene.codecs.FixedGapTermsIndexWriter;
|
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
|
||||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
|
||||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
|
||||||
import org.apache.lucene.codecs.TermsIndexReaderBase;
|
|
||||||
import org.apache.lucene.codecs.TermsIndexWriterBase;
|
|
||||||
import org.apache.lucene.codecs.sep.SepPostingsReader;
|
|
||||||
import org.apache.lucene.codecs.sep.SepPostingsWriter;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Pass PForFactory to a PostingsWriter/ReaderBase, and get
|
|
||||||
* customized postings format plugged.
|
|
||||||
*/
|
|
||||||
public final class PForPostingsFormat extends PostingsFormat {
|
|
||||||
private final int minBlockSize;
|
|
||||||
private final int maxBlockSize;
|
|
||||||
public final static int DEFAULT_BLOCK_SIZE = 128;
|
|
||||||
|
|
||||||
public PForPostingsFormat() {
|
|
||||||
super("PFor");
|
|
||||||
this.minBlockSize = BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE;
|
|
||||||
this.maxBlockSize = BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE;
|
|
||||||
}
|
|
||||||
public PForPostingsFormat(int minBlockSize, int maxBlockSize) {
|
|
||||||
super("PFor");
|
|
||||||
this.minBlockSize = minBlockSize;
|
|
||||||
assert minBlockSize > 1;
|
|
||||||
this.maxBlockSize = maxBlockSize;
|
|
||||||
assert minBlockSize <= maxBlockSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return getName() + "(blocksize=" + DEFAULT_BLOCK_SIZE+ ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
|
||||||
// TODO: implement a new PostingsWriterBase to improve skip-settings
|
|
||||||
PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new PForFactory());
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
FieldsConsumer ret = new BlockTreeTermsWriter(state,
|
|
||||||
postingsWriter,
|
|
||||||
minBlockSize,
|
|
||||||
maxBlockSize);
|
|
||||||
success = true;
|
|
||||||
return ret;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
IOUtils.closeWhileHandlingException(postingsWriter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
|
||||||
PostingsReaderBase postingsReader = new SepPostingsReader(state.dir,
|
|
||||||
state.fieldInfos,
|
|
||||||
state.segmentInfo,
|
|
||||||
state.context,
|
|
||||||
new PForFactory(),
|
|
||||||
state.segmentSuffix);
|
|
||||||
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
FieldsProducer ret = new BlockTreeTermsReader(state.dir,
|
|
||||||
state.fieldInfos,
|
|
||||||
state.segmentInfo.name,
|
|
||||||
postingsReader,
|
|
||||||
state.context,
|
|
||||||
state.segmentSuffix,
|
|
||||||
state.termsIndexDivisor);
|
|
||||||
success = true;
|
|
||||||
return ret;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
IOUtils.closeWhileHandlingException(postingsReader);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,343 +0,0 @@
|
||||||
package org.apache.lucene.codecs.pfor;
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.nio.IntBuffer;
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encode all small values and exception pointers in normal area;
|
|
||||||
* Encode large values in exception area;
|
|
||||||
* Size per exception is variable, possibly: 1byte, 2bytes, or 4bytes
|
|
||||||
*/
|
|
||||||
public final class PForUtil extends ForUtil {
|
|
||||||
|
|
||||||
protected static final int[] PER_EXCEPTION_SIZE = {1,2,4};
|
|
||||||
|
|
||||||
/** Compress given int[] into Integer buffer, with PFor format
|
|
||||||
*
|
|
||||||
* @param data uncompressed data
|
|
||||||
* @param intBuffer integer buffer to hold compressed data
|
|
||||||
* @return block header
|
|
||||||
*/
|
|
||||||
public static int compress(final int[] data, IntBuffer intBuffer) {
|
|
||||||
/** estimate minimum compress size to determine numFrameBits */
|
|
||||||
int numBits=getNumBits(data);
|
|
||||||
if (numBits == 0) {
|
|
||||||
return compressDuplicateBlock(data,intBuffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
int size = data.length;
|
|
||||||
int[] excValues = new int[size];
|
|
||||||
int excNum = 0, excLastPos = -1, excFirstPos = -1, excLastNonForcePos = -1;
|
|
||||||
|
|
||||||
// num of exception until the last non-forced exception
|
|
||||||
int excNumBase = 0;
|
|
||||||
|
|
||||||
// bytes per exception
|
|
||||||
int excBytes = 1;
|
|
||||||
|
|
||||||
// bytes before exception area, e.g. header and normal area
|
|
||||||
int excByteOffset = 0;
|
|
||||||
|
|
||||||
// the max value possible for current exception pointer,
|
|
||||||
// value of the first pointer is limited by header as 254
|
|
||||||
// (first exception ranges from -1 ~ 254)
|
|
||||||
long maxChainFirst = 254;
|
|
||||||
long maxChain = maxChainFirst + 1;
|
|
||||||
|
|
||||||
boolean conValue, conForce, conEnd;
|
|
||||||
int i=0;
|
|
||||||
|
|
||||||
/** estimate exceptions */
|
|
||||||
for (i=0; i<size; ++i) {
|
|
||||||
conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception
|
|
||||||
conForce = (i >= maxChain + excLastPos); // force exception
|
|
||||||
if (conValue || conForce) {
|
|
||||||
excValues[excNum++] = data[i];
|
|
||||||
if (excLastPos == -1) {
|
|
||||||
maxChain = 1L<<numBits;
|
|
||||||
excFirstPos = i;
|
|
||||||
}
|
|
||||||
if (conValue) {
|
|
||||||
excLastNonForcePos = i;
|
|
||||||
excNumBase = excNum;
|
|
||||||
}
|
|
||||||
excLastPos = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** encode normal area, record exception positions */
|
|
||||||
excNum = 0;
|
|
||||||
if (excFirstPos < 0) { // no exception
|
|
||||||
for (i=0; i<size; ++i) {
|
|
||||||
encodeNormalValue(intBuffer,i,data[i], numBits);
|
|
||||||
}
|
|
||||||
excLastPos = -1;
|
|
||||||
} else {
|
|
||||||
for (i=0; i<excFirstPos; ++i) {
|
|
||||||
encodeNormalValue(intBuffer,i,data[i], numBits);
|
|
||||||
}
|
|
||||||
maxChain = 1L<<numBits;
|
|
||||||
excLastPos = excFirstPos;
|
|
||||||
excNum = i<size? 1:0;
|
|
||||||
for (i=excFirstPos+1; i<size; ++i) {
|
|
||||||
conValue = ((data[i] & MASK[numBits]) != data[i]); // value exception
|
|
||||||
conForce = (i >= maxChain + excLastPos); // force exception
|
|
||||||
conEnd = (excNum == excNumBase); // following forced ignored
|
|
||||||
if ((!conValue && !conForce) || conEnd) {
|
|
||||||
encodeNormalValue(intBuffer,i,data[i], numBits);
|
|
||||||
} else {
|
|
||||||
encodeNormalValue(intBuffer, excLastPos, i-excLastPos-1, numBits);
|
|
||||||
excNum++;
|
|
||||||
excLastPos = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** encode exception area */
|
|
||||||
for (i=0; i<excNum; ++i) {
|
|
||||||
if (excBytes < 2 && (excValues[i] & ~MASK[8]) != 0) {
|
|
||||||
excBytes=2;
|
|
||||||
}
|
|
||||||
if (excBytes < 4 && (excValues[i] & ~MASK[16]) != 0) {
|
|
||||||
excBytes=4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
excByteOffset = (size*numBits + 7)/8;
|
|
||||||
encodeExcValues(intBuffer, excValues, excNum, excBytes, excByteOffset);
|
|
||||||
|
|
||||||
/** encode header */
|
|
||||||
int encodedSize = (excByteOffset + excBytes*excNum + 3)/4;
|
|
||||||
|
|
||||||
return getHeader(encodedSize, numBits, excNum, excFirstPos, excBytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Decompress given Integer buffer into int array.
|
|
||||||
*
|
|
||||||
* @param intBuffer integer buffer to hold compressed data
|
|
||||||
* @param data int array to hold uncompressed data
|
|
||||||
*/
|
|
||||||
public static void decompress(IntBuffer intBuffer, int[] data, int header) {
|
|
||||||
// since this buffer is reused at upper level, rewind first
|
|
||||||
intBuffer.rewind();
|
|
||||||
|
|
||||||
int excNum = ((header >> 8) & MASK[8]) + 1;
|
|
||||||
int excFirstPos = ((header >> 16) & MASK[8]) - 1;
|
|
||||||
int excBytes = PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]];
|
|
||||||
int numBits = ((header >> 24) & MASK[6]);
|
|
||||||
|
|
||||||
decompressCore(intBuffer, data, numBits);
|
|
||||||
|
|
||||||
patchException(intBuffer,data,excNum,excFirstPos,excBytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Encode exception values into exception area.
|
|
||||||
* The width for each exception will be fixed as:
|
|
||||||
* 1, 2, or 4 byte(s).
|
|
||||||
*/
|
|
||||||
static void encodeExcValues(IntBuffer intBuffer, int[] values, int num, int perbytes, int byteOffset) {
|
|
||||||
if (num == 0)
|
|
||||||
return;
|
|
||||||
if (perbytes == 1) {
|
|
||||||
int curBytePos = byteOffset;
|
|
||||||
for (int i=0; i<num; ++i) {
|
|
||||||
int curIntPos = curBytePos / 4;
|
|
||||||
setBufferIntBits(intBuffer, curIntPos, (curBytePos & 3)*8, 8, values[i]);
|
|
||||||
curBytePos++;
|
|
||||||
}
|
|
||||||
} else if (perbytes == 2) {
|
|
||||||
int shortOffset = (byteOffset+1)/2;
|
|
||||||
int curIntPos = shortOffset/2;
|
|
||||||
int i=0;
|
|
||||||
if ((shortOffset & 1) == 1) { // cut head to ensure remaining fit ints
|
|
||||||
setBufferIntBits(intBuffer, curIntPos++, 16, 16, values[i++]);
|
|
||||||
}
|
|
||||||
for (; i<num-1; i+=2) {
|
|
||||||
intBuffer.put(curIntPos++, (values[i+1]<<16) | values[i]);
|
|
||||||
}
|
|
||||||
if (i<num) {
|
|
||||||
intBuffer.put(curIntPos, values[i]); // cut tail, also clear high 16 bits
|
|
||||||
}
|
|
||||||
} else if (perbytes == 4) {
|
|
||||||
int curIntPos = (byteOffset+3) / 4;
|
|
||||||
for (int i=0; i<num; ++i) {
|
|
||||||
intBuffer.put(curIntPos++, values[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Save only header when the whole block equals to 1
|
|
||||||
*/
|
|
||||||
static int compressDuplicateBlock(final int[] data, IntBuffer intBuffer) {
|
|
||||||
intBuffer.put(0,data[0]);
|
|
||||||
return getHeader(1, 0, 0, -1, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decode exception values base on the exception pointers in normal area,
|
|
||||||
* and values in exception area.
|
|
||||||
* As for current implementation, numInts is hardwired as 128, so the
|
|
||||||
* tail of normal area is naturally aligned to 32 bits, and we don't need to
|
|
||||||
* rewind intBuffer here.
|
|
||||||
* However, the normal area may share a same int with exception area,
|
|
||||||
* when numFrameBits * numInts % 32 != 0,
|
|
||||||
* In this case we should preprocess patch several heading exceptions,
|
|
||||||
* before calling this method.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public static void patchException(IntBuffer intBuffer, int[] data, int excNum, int excFirstPos, int excBytes) {
|
|
||||||
if (excFirstPos == -1) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
int curPos=excFirstPos;
|
|
||||||
int i,j;
|
|
||||||
|
|
||||||
if (excBytes == 1) { // each exception consumes 1 byte
|
|
||||||
for (i=0; i+3<excNum; i+=4) {
|
|
||||||
final int curInt = intBuffer.get();
|
|
||||||
curPos = patch(data, curPos, (curInt) & MASK[8]);
|
|
||||||
curPos = patch(data, curPos, (curInt >>> 8) & MASK[8]);
|
|
||||||
curPos = patch(data, curPos, (curInt >>> 16) & MASK[8]);
|
|
||||||
curPos = patch(data, curPos, (curInt >>> 24) & MASK[8]);
|
|
||||||
}
|
|
||||||
if (i<excNum) {
|
|
||||||
final int curInt = intBuffer.get();
|
|
||||||
for (j=0; j<32 && i<excNum; j+=8,i++) {
|
|
||||||
curPos = patch(data, curPos, (curInt >>> j) & MASK[8]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (excBytes == 2) { // each exception consumes 2 bytes
|
|
||||||
for (i=0; i+1<excNum; i+=2) {
|
|
||||||
final int curInt = intBuffer.get();
|
|
||||||
curPos = patch(data, curPos, (curInt) & MASK[16]);
|
|
||||||
curPos = patch(data, curPos, (curInt >>> 16) & MASK[16]);
|
|
||||||
}
|
|
||||||
if (i<excNum) {
|
|
||||||
final int curInt = intBuffer.get();
|
|
||||||
curPos = patch(data, curPos, (curInt) & MASK[16]);
|
|
||||||
}
|
|
||||||
} else if (excBytes == 4) { // each exception consumes 4 bytes
|
|
||||||
for (i=0; i<excNum; i++) {
|
|
||||||
curPos = patch(data, curPos, intBuffer.get());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int patch(int[]data, int pos, int value) {
|
|
||||||
int nextPos = data[pos] + pos + 1;
|
|
||||||
data[pos] = value;
|
|
||||||
assert nextPos > pos;
|
|
||||||
return nextPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Estimate best number of frame bits according to minimum compressed size.
|
|
||||||
* It will run 32 times.
|
|
||||||
*/
|
|
||||||
static int getNumBits(final int[] data) {
|
|
||||||
if (isAllEqual(data)) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
int optBits=1;
|
|
||||||
int optSize=estimateCompressedSize(data,optBits);
|
|
||||||
for (int i=2; i<=32; ++i) {
|
|
||||||
int curSize=estimateCompressedSize(data,i);
|
|
||||||
if (curSize<optSize) {
|
|
||||||
optSize=curSize;
|
|
||||||
optBits=i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return optBits;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterate the whole block to get maximum exception bits,
|
|
||||||
* and estimate compressed size without forced exception.
|
|
||||||
* TODO: foresee forced exception for better estimation
|
|
||||||
*/
|
|
||||||
static int estimateCompressedSize(final int[] data, int numBits) {
|
|
||||||
int size=data.length;
|
|
||||||
int totalBytes=(numBits*size+7)/8; // always round to byte
|
|
||||||
int excNum=0;
|
|
||||||
int curExcBytes=1;
|
|
||||||
for (int i=0; i<size; ++i) {
|
|
||||||
if ((data[i] & ~MASK[numBits]) != 0) { // exception
|
|
||||||
excNum++;
|
|
||||||
if (curExcBytes<2 && (data[i] & ~MASK[8]) != 0) { // exceed 1 byte exception
|
|
||||||
curExcBytes=2;
|
|
||||||
}
|
|
||||||
if (curExcBytes<4 && (data[i] & ~MASK[16]) != 0) { // exceed 2 byte exception
|
|
||||||
curExcBytes=4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (curExcBytes==2) {
|
|
||||||
totalBytes=((totalBytes+1)/2)*2; // round up to 2x bytes before filling exceptions
|
|
||||||
}
|
|
||||||
else if (curExcBytes==4) {
|
|
||||||
totalBytes=((totalBytes+3)/4)*4; // round up to 4x bytes
|
|
||||||
}
|
|
||||||
totalBytes+=excNum*curExcBytes;
|
|
||||||
|
|
||||||
return totalBytes/4*4; // round up to ints
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate the 4 byte header which contains (from lsb to msb):
|
|
||||||
*
|
|
||||||
* 8 bits for encoded block int size (excluding header, this limits DEFAULT_BLOCK_SIZE <= 2^(8-1))
|
|
||||||
*
|
|
||||||
* 8 bits for exception num - 1 (when no exceptions, this is undefined)
|
|
||||||
*
|
|
||||||
* 8 bits for the index of the first exception + 1 (when no exception, this is 0)
|
|
||||||
*
|
|
||||||
* 6 bits for num of frame bits (when 0, values in this block are all the same)
|
|
||||||
* 2 bits for the exception code: 00: byte, 01: short, 10: int
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static int getHeader(int encodedSize, int numBits, int excNum, int excFirstPos, int excBytes) {
|
|
||||||
return (encodedSize)
|
|
||||||
| (((excNum-1) & MASK[8]) << 8)
|
|
||||||
| ((excFirstPos+1) << 16)
|
|
||||||
| ((numBits) << 24)
|
|
||||||
| ((excBytes/2) << 30);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: get metadata from header.
|
|
||||||
*/
|
|
||||||
public static int getEncodedSize(int header) {
|
|
||||||
return ((header & MASK[8]))*4;
|
|
||||||
}
|
|
||||||
public static int getExcNum(int header) {
|
|
||||||
return ((header >> 8) & MASK[8]) + 1;
|
|
||||||
}
|
|
||||||
public static int getFirstPos(int header) {
|
|
||||||
return ((header >> 16) & MASK[8]) - 1;
|
|
||||||
}
|
|
||||||
public static int getExcBytes(int header) {
|
|
||||||
return PER_EXCEPTION_SIZE[(header >> 30) & MASK[2]];
|
|
||||||
}
|
|
||||||
public static int getNumBits(int header) {
|
|
||||||
return ((header >> 24) & MASK[6]);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -17,8 +17,6 @@ org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat
|
||||||
org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat
|
org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat
|
||||||
org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
|
org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
|
||||||
org.apache.lucene.codecs.memory.MemoryPostingsFormat
|
org.apache.lucene.codecs.memory.MemoryPostingsFormat
|
||||||
org.apache.lucene.codecs.pfor.ForPostingsFormat
|
|
||||||
org.apache.lucene.codecs.pfor.PForPostingsFormat
|
|
||||||
org.apache.lucene.codecs.bulkvint.BulkVIntPostingsFormat
|
org.apache.lucene.codecs.bulkvint.BulkVIntPostingsFormat
|
||||||
org.apache.lucene.codecs.block.BlockPostingsFormat
|
org.apache.lucene.codecs.block.BlockPostingsFormat
|
||||||
org.apache.lucene.codecs.memory.DirectPostingsFormat
|
org.apache.lucene.codecs.memory.DirectPostingsFormat
|
||||||
|
|
|
@ -1,293 +0,0 @@
|
||||||
package org.apache.lucene.codecs.pfor;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.IntBuffer;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Random;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.pfor.ForPostingsFormat;
|
|
||||||
import org.apache.lucene.codecs.pfor.PForUtil;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test the core utility for PFor compress and decompress
|
|
||||||
* We don't specially provide test case for For encoder/decoder, since
|
|
||||||
* PFor is a extended version of For, and most methods will be reused
|
|
||||||
* here.
|
|
||||||
*/
|
|
||||||
public class TestPForUtil extends LuceneTestCase {
|
|
||||||
static final int[] MASK={ 0x00000000,
|
|
||||||
0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f,
|
|
||||||
0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
|
|
||||||
0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff,
|
|
||||||
0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff,
|
|
||||||
0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff,
|
|
||||||
0x7fffffff, 0xffffffff};
|
|
||||||
Random gen;
|
|
||||||
public void initRandom() {
|
|
||||||
this.gen = random();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Should not encode extra information other than single int
|
|
||||||
*/
|
|
||||||
public void testAllEqual() throws Exception {
|
|
||||||
initRandom();
|
|
||||||
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
|
||||||
int[] data=new int[sz];
|
|
||||||
byte[] res = new byte[sz*8];
|
|
||||||
int[] copy = new int[sz];
|
|
||||||
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
|
||||||
int ensz;
|
|
||||||
int header;
|
|
||||||
|
|
||||||
Arrays.fill(data,gen.nextInt());
|
|
||||||
header = ForUtil.compress(data,resBuffer); // test For
|
|
||||||
ensz = ForUtil.getEncodedSize(header);
|
|
||||||
assert ensz == 4;
|
|
||||||
|
|
||||||
ForUtil.decompress(resBuffer,copy,header);
|
|
||||||
assert cmp(data,sz,copy,sz)==true;
|
|
||||||
|
|
||||||
Arrays.fill(data,gen.nextInt());
|
|
||||||
header = PForUtil.compress(data,resBuffer); // test PFor
|
|
||||||
ensz = PForUtil.getEncodedSize(header);
|
|
||||||
assert ensz == 4;
|
|
||||||
|
|
||||||
PForUtil.decompress(resBuffer,copy,header);
|
|
||||||
assert cmp(data,sz,copy,sz)==true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test correctness of forced exception.
|
|
||||||
* the forced ones should exactly fit max chain
|
|
||||||
*/
|
|
||||||
public void testForcedExceptionDistance() throws Exception {
|
|
||||||
initRandom();
|
|
||||||
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
|
||||||
int[] data=new int[sz];
|
|
||||||
byte[] res = new byte[sz*8];
|
|
||||||
int[] copy = new int[sz];
|
|
||||||
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
|
||||||
int numBits = gen.nextInt(5)+1;
|
|
||||||
|
|
||||||
int i,j;
|
|
||||||
int pace, ensz, header;
|
|
||||||
int expect, got;
|
|
||||||
|
|
||||||
// fill exception value with same pace, there should
|
|
||||||
// be no forced exceptions.
|
|
||||||
createDistribution(data, sz, 1, MASK[numBits], MASK[numBits]);
|
|
||||||
pace = 1<<numBits;
|
|
||||||
for (i=0,j=0; i<sz; i+=pace) {
|
|
||||||
int exc = gen.nextInt();
|
|
||||||
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
header = PForUtil.compress(data,resBuffer);
|
|
||||||
ensz = PForUtil.getEncodedSize(header);
|
|
||||||
expect = j;
|
|
||||||
got = PForUtil.getExcNum(header);
|
|
||||||
assert expect == got: expect+" expected but got "+got;
|
|
||||||
|
|
||||||
// there should exactly one forced exception before each
|
|
||||||
// exception when i>0
|
|
||||||
createDistribution(data, sz, 1, MASK[numBits], MASK[numBits]);
|
|
||||||
pace = (1<<numBits)+1;
|
|
||||||
for (i=0,j=0; i<sz; i+=pace) {
|
|
||||||
int exc = gen.nextInt();
|
|
||||||
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
header = PForUtil.compress(data,resBuffer);
|
|
||||||
ensz = PForUtil.getEncodedSize(header);
|
|
||||||
expect = 2*(j-1)+1;
|
|
||||||
got = PForUtil.getExcNum(header);
|
|
||||||
assert expect == got: expect+" expected but got "+got;
|
|
||||||
|
|
||||||
|
|
||||||
// two forced exception
|
|
||||||
createDistribution(data, sz, 1, MASK[numBits], MASK[numBits]);
|
|
||||||
pace = (1<<numBits)*2+1;
|
|
||||||
for (i=0,j=0; i<sz; i+=pace) {
|
|
||||||
int exc = gen.nextInt();
|
|
||||||
data[i] = (exc & 0xffff0000) == 0 ? exc | 0xffff0000 : exc;
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
header = PForUtil.compress(data,resBuffer);
|
|
||||||
ensz = PForUtil.getEncodedSize(header);
|
|
||||||
expect = 3*(j-1)+1;
|
|
||||||
got = PForUtil.getExcNum(header);
|
|
||||||
assert expect == got: expect+" expected but got "+got;
|
|
||||||
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Test correctness of ignored forced exception.
|
|
||||||
* The trailing forced exceptions should always be reverted
|
|
||||||
* since they're not necessary.
|
|
||||||
*/
|
|
||||||
public void testTrailingForcedException() throws Exception {
|
|
||||||
initRandom();
|
|
||||||
int sz=ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
|
||||||
assert sz % 32 == 0;
|
|
||||||
Integer[] buff= new Integer[sz];
|
|
||||||
int[] data = new int[sz];
|
|
||||||
int[] copy = new int[sz];
|
|
||||||
byte[] res = new byte[sz*8];
|
|
||||||
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
|
||||||
|
|
||||||
int excIndex = gen.nextInt(sz/2);
|
|
||||||
int excValue = gen.nextInt();
|
|
||||||
if ((excValue & 0xffff0000) == 0) {
|
|
||||||
excValue |= 0xffff0000; // always prepare a 4 bytes exception
|
|
||||||
}
|
|
||||||
|
|
||||||
// make value of numFrameBits to be small,
|
|
||||||
// thus easy to get forced exceptions
|
|
||||||
for (int i=0; i<sz; ++i) {
|
|
||||||
buff[i]=gen.nextInt() & 1;
|
|
||||||
}
|
|
||||||
// create only one value exception
|
|
||||||
buff[excIndex]=excValue;
|
|
||||||
|
|
||||||
for (int i=0; i<sz; ++i)
|
|
||||||
data[i] = buff[i];
|
|
||||||
|
|
||||||
int header = PForUtil.compress(data,resBuffer);
|
|
||||||
int ensz = PForUtil.getEncodedSize(header);
|
|
||||||
|
|
||||||
assert (ensz <= sz*8): ensz+" > "+sz*8; // must not exceed the loose upperbound
|
|
||||||
assert (ensz >= 4); // at least we have an exception, right?
|
|
||||||
|
|
||||||
PForUtil.decompress(resBuffer,copy,header);
|
|
||||||
|
|
||||||
// println(getHex(data,sz)+"\n");
|
|
||||||
// println(getHex(res,ensz)+"\n");
|
|
||||||
// println(getHex(copy,sz)+"\n");
|
|
||||||
|
|
||||||
// fetch the last int, i.e. last exception.
|
|
||||||
int lastExc = (res[ensz-4] << 24) |
|
|
||||||
((0xff & res[ensz-3]) << 16) |
|
|
||||||
((0xff & res[ensz-2]) << 8 ) |
|
|
||||||
(0xff & res[ensz-1]);
|
|
||||||
|
|
||||||
// trailing forced exceptions are suppressed,
|
|
||||||
// so the last exception should be what we assigned.
|
|
||||||
assert lastExc==excValue;
|
|
||||||
assert cmp(data,sz,copy,sz)==true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Test correctness of compressing and decompressing.
|
|
||||||
* Here we randomly assign a rate of exception (i.e. 1-alpha),
|
|
||||||
* and test different scale of normal/exception values.
|
|
||||||
*/
|
|
||||||
public void testAllDistribution() throws Exception {
|
|
||||||
initRandom();
|
|
||||||
int sz = ForPostingsFormat.DEFAULT_BLOCK_SIZE;
|
|
||||||
int[] data = new int[sz];
|
|
||||||
for (int i=0; i<=32; ++i) { // try to test every kinds of distribution
|
|
||||||
double alpha=gen.nextDouble(); // rate of normal value
|
|
||||||
for (int j=i; j<=32; ++j) {
|
|
||||||
createDistribution(data,sz,alpha,MASK[i],MASK[j]);
|
|
||||||
tryCompressAndDecompress(data, sz);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
public void createDistribution(int[] data, int sz, double alpha, int masknorm, int maskexc) {
|
|
||||||
Integer[] buff= new Integer[sz];
|
|
||||||
int i=0;
|
|
||||||
for (; i<sz*alpha; ++i)
|
|
||||||
buff[i]=gen.nextInt() & masknorm;
|
|
||||||
for (; i<sz; ++i)
|
|
||||||
buff[i]=gen.nextInt() & maskexc;
|
|
||||||
Collections.shuffle(Arrays.asList(buff),gen);
|
|
||||||
for (i=0; i<sz; ++i)
|
|
||||||
data[i] = buff[i];
|
|
||||||
}
|
|
||||||
public void tryCompressAndDecompress(final int[] data, int sz) throws Exception {
|
|
||||||
byte[] res = new byte[sz*8]; // loosely upperbound
|
|
||||||
IntBuffer resBuffer = ByteBuffer.wrap(res).asIntBuffer();
|
|
||||||
|
|
||||||
int header = PForUtil.compress(data,resBuffer);
|
|
||||||
int ensz = PForUtil.getEncodedSize(header);
|
|
||||||
|
|
||||||
assert (ensz <= sz*8); // must not exceed the loose upperbound
|
|
||||||
|
|
||||||
int[] copy = new int[sz];
|
|
||||||
PForUtil.decompress(resBuffer,copy,header);
|
|
||||||
|
|
||||||
// println(getHex(data,sz)+"\n");
|
|
||||||
// println(getHex(res,ensz)+"\n");
|
|
||||||
// println(getHex(copy,sz)+"\n");
|
|
||||||
|
|
||||||
assert cmp(data,sz,copy,sz)==true;
|
|
||||||
}
|
|
||||||
public boolean cmp(int[] a, int sza, int[] b, int szb) {
|
|
||||||
if (sza!=szb)
|
|
||||||
return false;
|
|
||||||
for (int i=0; i<sza; ++i) {
|
|
||||||
if (a[i]!=b[i]) {
|
|
||||||
System.err.println(String.format(Locale.ENGLISH, "! %08x != %08x in %d",a[i],b[i],i));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
public static String getHex( byte [] raw, int sz ) {
|
|
||||||
final String HEXES = "0123456789ABCDEF";
|
|
||||||
if ( raw == null ) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
final StringBuilder hex = new StringBuilder( 2 * raw.length );
|
|
||||||
for ( int i=0; i<sz; i++ ) {
|
|
||||||
if (i>0 && (i)%16 == 0)
|
|
||||||
hex.append("\n");
|
|
||||||
byte b=raw[i];
|
|
||||||
hex.append(HEXES.charAt((b & 0xF0) >> 4))
|
|
||||||
.append(HEXES.charAt((b & 0x0F)))
|
|
||||||
.append(" ");
|
|
||||||
}
|
|
||||||
return hex.toString();
|
|
||||||
}
|
|
||||||
public static String getHex( int [] raw, int sz ) {
|
|
||||||
if ( raw == null ) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
final StringBuilder hex = new StringBuilder( 4 * raw.length );
|
|
||||||
for ( int i=0; i<sz; i++ ) {
|
|
||||||
if (i>0 && i%8 == 0)
|
|
||||||
hex.append("\n");
|
|
||||||
hex.append(String.format(Locale.ENGLISH, "%08x ",raw[i]));
|
|
||||||
}
|
|
||||||
return hex.toString();
|
|
||||||
}
|
|
||||||
static void eprintln(String format, Object... args) {
|
|
||||||
System.err.println(String.format(Locale.ENGLISH, format,args));
|
|
||||||
}
|
|
||||||
static void println(String format, Object... args) {
|
|
||||||
System.out.println(String.format(Locale.ENGLISH, format,args));
|
|
||||||
}
|
|
||||||
static void print(String format, Object... args) {
|
|
||||||
System.out.print(String.format(Locale.ENGLISH, format,args));
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -61,7 +61,6 @@ import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
import org.apache.lucene.codecs.pfor.*;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Randomly combines terms index impl w/ postings impls.
|
* Randomly combines terms index impl w/ postings impls.
|
||||||
|
@ -103,8 +102,6 @@ public class MockRandomPostingsFormat extends PostingsFormat {
|
||||||
final int baseBlockSize = _TestUtil.nextInt(random, 1, 127);
|
final int baseBlockSize = _TestUtil.nextInt(random, 1, 127);
|
||||||
delegates.add(new MockVariableIntBlockPostingsFormat.MockIntFactory(baseBlockSize));
|
delegates.add(new MockVariableIntBlockPostingsFormat.MockIntFactory(baseBlockSize));
|
||||||
// TODO: others
|
// TODO: others
|
||||||
delegates.add(new ForFactory());
|
|
||||||
delegates.add(new PForFactory());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String getExtension(String fileName) {
|
private static String getExtension(String fileName) {
|
||||||
|
|
|
@ -282,9 +282,7 @@ public abstract class LuceneTestCase extends Assert {
|
||||||
"MockFixedIntBlock",
|
"MockFixedIntBlock",
|
||||||
"MockVariableIntBlock",
|
"MockVariableIntBlock",
|
||||||
"MockSep",
|
"MockSep",
|
||||||
"MockRandom",
|
"MockRandom"
|
||||||
"For",
|
|
||||||
"PFor"
|
|
||||||
));
|
));
|
||||||
|
|
||||||
// -----------------------------------------------------------------
|
// -----------------------------------------------------------------
|
||||||
|
|
Loading…
Reference in New Issue