carry back some code improvements from BlockPacked -> Block

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1370328 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-08-07 15:57:58 +00:00
parent 378cbd4523
commit 36e9b06bd6
5 changed files with 100 additions and 106 deletions

View File

@ -43,6 +43,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
/** /**
* Concrete class that reads docId(maybe frq,pos,offset,payloads) list * Concrete class that reads docId(maybe frq,pos,offset,payloads) list
@ -62,9 +63,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
// nocommit // nocommit
final String segment; final String segment;
// NOTE: not private to avoid access$NNN methods:
final static int blockSize = BlockPostingsFormat.BLOCK_SIZE;
public BlockPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException { public BlockPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException {
boolean success = false; boolean success = false;
segment = segmentInfo.name; segment = segmentInfo.name;
@ -116,8 +114,8 @@ public final class BlockPostingsReader extends PostingsReaderBase {
BlockPostingsWriter.VERSION_START, BlockPostingsWriter.VERSION_START,
BlockPostingsWriter.VERSION_START); BlockPostingsWriter.VERSION_START);
final int indexBlockSize = termsIn.readVInt(); final int indexBlockSize = termsIn.readVInt();
if (indexBlockSize != blockSize) { if (indexBlockSize != BLOCK_SIZE) {
throw new IllegalStateException("index-time blockSize (" + indexBlockSize + ") != read-time blockSize (" + blockSize + ")"); throw new IllegalStateException("index-time BLOCK_SIZE (" + indexBlockSize + ") != read-time BLOCK_SIZE (" + BLOCK_SIZE + ")");
} }
} }
@ -235,12 +233,12 @@ public final class BlockPostingsReader extends PostingsReaderBase {
termState.docStartFP = in.readVLong(); termState.docStartFP = in.readVLong();
if (fieldHasPositions) { if (fieldHasPositions) {
termState.posStartFP = in.readVLong(); termState.posStartFP = in.readVLong();
if (termState.totalTermFreq > blockSize) { if (termState.totalTermFreq > BLOCK_SIZE) {
termState.lastPosBlockOffset = in.readVInt(); termState.lastPosBlockOffset = in.readVInt();
} else { } else {
termState.lastPosBlockOffset = -1; termState.lastPosBlockOffset = -1;
} }
if ((fieldHasPayloads || fieldHasOffsets) && termState.totalTermFreq >= blockSize) { if ((fieldHasPayloads || fieldHasOffsets) && termState.totalTermFreq >= BLOCK_SIZE) {
termState.payStartFP = in.readVLong(); termState.payStartFP = in.readVLong();
} else { } else {
termState.payStartFP = -1; termState.payStartFP = -1;
@ -250,12 +248,12 @@ public final class BlockPostingsReader extends PostingsReaderBase {
termState.docStartFP += in.readVLong(); termState.docStartFP += in.readVLong();
if (fieldHasPositions) { if (fieldHasPositions) {
termState.posStartFP += in.readVLong(); termState.posStartFP += in.readVLong();
if (termState.totalTermFreq > blockSize) { if (termState.totalTermFreq > BLOCK_SIZE) {
termState.lastPosBlockOffset = in.readVInt(); termState.lastPosBlockOffset = in.readVInt();
} else { } else {
termState.lastPosBlockOffset = -1; termState.lastPosBlockOffset = -1;
} }
if ((fieldHasPayloads || fieldHasOffsets) && termState.totalTermFreq >= blockSize) { if ((fieldHasPayloads || fieldHasOffsets) && termState.totalTermFreq >= BLOCK_SIZE) {
long delta = in.readVLong(); long delta = in.readVLong();
if (termState.payStartFP == -1) { if (termState.payStartFP == -1) {
termState.payStartFP = delta; termState.payStartFP = delta;
@ -266,7 +264,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
} }
} }
if (termState.docFreq > blockSize) { if (termState.docFreq > BLOCK_SIZE) {
termState.skipOffset = in.readVInt(); termState.skipOffset = in.readVInt();
} else { } else {
termState.skipOffset = -1; termState.skipOffset = -1;
@ -327,8 +325,8 @@ public final class BlockPostingsReader extends PostingsReaderBase {
private final byte[] encoded; private final byte[] encoded;
private final IntBuffer encodedBuffer; private final IntBuffer encodedBuffer;
private final int[] docDeltaBuffer = new int[blockSize]; private final int[] docDeltaBuffer = new int[BLOCK_SIZE];
private final int[] freqBuffer = new int[blockSize]; private final int[] freqBuffer = new int[BLOCK_SIZE];
private int docBufferUpto; private int docBufferUpto;
@ -366,7 +364,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads(); indexHasPayloads = fieldInfo.hasPayloads();
encoded = new byte[blockSize*4]; encoded = new byte[BLOCK_SIZE*4];
encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
} }
@ -393,7 +391,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
} }
accum = 0; accum = 0;
docUpto = 0; docUpto = 0;
docBufferUpto = blockSize; docBufferUpto = BLOCK_SIZE;
skipped = false; skipped = false;
return this; return this;
} }
@ -413,7 +411,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
final int left = docFreq - docUpto; final int left = docFreq - docUpto;
assert left > 0; assert left > 0;
if (left >= blockSize) { if (left >= BLOCK_SIZE) {
if (DEBUG) { if (DEBUG) {
System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); System.out.println(" fill doc block from fp=" + docIn.getFilePointer());
} }
@ -449,7 +447,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
return doc = NO_MORE_DOCS; return doc = NO_MORE_DOCS;
} }
//System.out.println("["+docFreq+"]"+" nextDoc"); //System.out.println("["+docFreq+"]"+" nextDoc");
if (docBufferUpto == blockSize) { if (docBufferUpto == BLOCK_SIZE) {
refillDocs(); refillDocs();
} }
if (DEBUG) { if (DEBUG) {
@ -480,7 +478,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
// nocommit use skipper!!! it has next last doc id!! // nocommit use skipper!!! it has next last doc id!!
if (docFreq > blockSize && target - accum > blockSize) { if (docFreq > BLOCK_SIZE && target - accum > BLOCK_SIZE) {
if (DEBUG) { if (DEBUG) {
System.out.println("load skipper"); System.out.println("load skipper");
@ -490,7 +488,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
// Lazy init: first time this enum has ever been used for skipping // Lazy init: first time this enum has ever been used for skipping
skipper = new BlockSkipReader((IndexInput) docIn.clone(), skipper = new BlockSkipReader((IndexInput) docIn.clone(),
BlockPostingsWriter.maxSkipLevels, BlockPostingsWriter.maxSkipLevels,
blockSize, BLOCK_SIZE,
indexHasPos, indexHasPos,
indexHasOffsets, indexHasOffsets,
indexHasPayloads); indexHasPayloads);
@ -511,11 +509,11 @@ public final class BlockPostingsReader extends PostingsReaderBase {
if (DEBUG) { if (DEBUG) {
System.out.println("skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer()); System.out.println("skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer());
} }
assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto; assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE-1): "got " + newDocUpto;
docUpto = newDocUpto+1; docUpto = newDocUpto+1;
// Force to read next block // Force to read next block
docBufferUpto = blockSize; docBufferUpto = BLOCK_SIZE;
accum = skipper.getDoc(); // actually, this is just lastSkipEntry accum = skipper.getDoc(); // actually, this is just lastSkipEntry
docIn.seek(skipper.getDocPointer()); // now point to the block we want to search docIn.seek(skipper.getDocPointer()); // now point to the block we want to search
} }
@ -536,7 +534,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
// containing the doc? yet assert false trips ... i // containing the doc? yet assert false trips ... i
// think because if you advance w/o having done a // think because if you advance w/o having done a
// nextDoc yet()... can we assert/remove this? // nextDoc yet()... can we assert/remove this?
if (docBufferUpto == blockSize) { if (docBufferUpto == BLOCK_SIZE) {
refillDocs(); refillDocs();
} }
accum += docDeltaBuffer[docBufferUpto]; accum += docDeltaBuffer[docBufferUpto];
@ -571,9 +569,9 @@ public final class BlockPostingsReader extends PostingsReaderBase {
private final byte[] encoded; private final byte[] encoded;
private final IntBuffer encodedBuffer; private final IntBuffer encodedBuffer;
private final int[] docDeltaBuffer = new int[blockSize]; private final int[] docDeltaBuffer = new int[BLOCK_SIZE];
private final int[] freqBuffer = new int[blockSize]; private final int[] freqBuffer = new int[BLOCK_SIZE];
private final int[] posDeltaBuffer = new int[blockSize]; private final int[] posDeltaBuffer = new int[BLOCK_SIZE];
private int docBufferUpto; private int docBufferUpto;
private int posBufferUpto; private int posBufferUpto;
@ -630,7 +628,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
this.startDocIn = BlockPostingsReader.this.docIn; this.startDocIn = BlockPostingsReader.this.docIn;
this.docIn = (IndexInput) startDocIn.clone(); this.docIn = (IndexInput) startDocIn.clone();
this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone(); this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone();
encoded = new byte[blockSize*4]; encoded = new byte[BLOCK_SIZE*4];
encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads(); indexHasPayloads = fieldInfo.hasPayloads();
@ -655,9 +653,9 @@ public final class BlockPostingsReader extends PostingsReaderBase {
skipOffset = termState.skipOffset; skipOffset = termState.skipOffset;
posPendingFP = posTermStartFP; posPendingFP = posTermStartFP;
posPendingCount = 0; posPendingCount = 0;
if (termState.totalTermFreq < blockSize) { if (termState.totalTermFreq < BLOCK_SIZE) {
lastPosBlockFP = posTermStartFP; lastPosBlockFP = posTermStartFP;
} else if (termState.totalTermFreq == blockSize) { } else if (termState.totalTermFreq == BLOCK_SIZE) {
lastPosBlockFP = -1; lastPosBlockFP = -1;
} else { } else {
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset; lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
@ -666,7 +664,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
doc = -1; doc = -1;
accum = 0; accum = 0;
docUpto = 0; docUpto = 0;
docBufferUpto = blockSize; docBufferUpto = BLOCK_SIZE;
skipped = false; skipped = false;
return this; return this;
} }
@ -685,7 +683,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
//System.out.println("["+docFreq+"]"+" refillDoc"); //System.out.println("["+docFreq+"]"+" refillDoc");
final int left = docFreq - docUpto; final int left = docFreq - docUpto;
assert left > 0; assert left > 0;
if (left >= blockSize) { if (left >= BLOCK_SIZE) {
if (DEBUG) { if (DEBUG) {
System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); System.out.println(" fill doc block from fp=" + docIn.getFilePointer());
} }
@ -752,7 +750,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
return doc = NO_MORE_DOCS; return doc = NO_MORE_DOCS;
} }
//System.out.println("["+docFreq+"]"+" nextDoc"); //System.out.println("["+docFreq+"]"+" nextDoc");
if (docBufferUpto == blockSize) { if (docBufferUpto == BLOCK_SIZE) {
refillDocs(); refillDocs();
} }
if (DEBUG) { if (DEBUG) {
@ -788,8 +786,8 @@ public final class BlockPostingsReader extends PostingsReaderBase {
// nocommit 2 is heuristic guess!! // nocommit 2 is heuristic guess!!
// nocommit put cheating back! does it help? // nocommit put cheating back! does it help?
// nocommit use skipper!!! it has next last doc id!! // nocommit use skipper!!! it has next last doc id!!
//if (docFreq > blockSize && target - (blockSize - docBufferUpto) - 2*blockSize > accum) { //if (docFreq > BLOCK_SIZE && target - (BLOCK_SIZE - docBufferUpto) - 2*BLOCK_SIZE > accum) {
if (docFreq > blockSize && target - accum > blockSize) { if (docFreq > BLOCK_SIZE && target - accum > BLOCK_SIZE) {
if (DEBUG) { if (DEBUG) {
System.out.println(" try skipper"); System.out.println(" try skipper");
} }
@ -800,7 +798,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
} }
skipper = new BlockSkipReader((IndexInput) docIn.clone(), skipper = new BlockSkipReader((IndexInput) docIn.clone(),
BlockPostingsWriter.maxSkipLevels, BlockPostingsWriter.maxSkipLevels,
blockSize, BLOCK_SIZE,
true, true,
indexHasOffsets, indexHasOffsets,
indexHasPayloads); indexHasPayloads);
@ -825,11 +823,11 @@ public final class BlockPostingsReader extends PostingsReaderBase {
System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto()); System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto());
} }
assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto; assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE-1): "got " + newDocUpto;
docUpto = newDocUpto+1; docUpto = newDocUpto+1;
// Force to read next block // Force to read next block
docBufferUpto = blockSize; docBufferUpto = BLOCK_SIZE;
accum = skipper.getDoc(); accum = skipper.getDoc();
docIn.seek(skipper.getDocPointer()); docIn.seek(skipper.getDocPointer());
posPendingFP = skipper.getPosPointer(); posPendingFP = skipper.getPosPointer();
@ -851,7 +849,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
// containing the doc? yet assert false trips ... i // containing the doc? yet assert false trips ... i
// think because if you advance w/o having done a // think because if you advance w/o having done a
// nextDoc yet()... can we assert/remove this? // nextDoc yet()... can we assert/remove this?
if (docBufferUpto == blockSize) { if (docBufferUpto == BLOCK_SIZE) {
// nocommit hmm skip freq? but: we don't ever // nocommit hmm skip freq? but: we don't ever
// scan over more than one block? // scan over more than one block?
refillDocs(); refillDocs();
@ -892,7 +890,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
System.out.println(" FPR.skipPositions: toSkip=" + toSkip); System.out.println(" FPR.skipPositions: toSkip=" + toSkip);
} }
final int leftInBlock = blockSize - posBufferUpto; final int leftInBlock = BLOCK_SIZE - posBufferUpto;
if (toSkip < leftInBlock) { if (toSkip < leftInBlock) {
posBufferUpto += toSkip; posBufferUpto += toSkip;
if (DEBUG) { if (DEBUG) {
@ -900,13 +898,13 @@ public final class BlockPostingsReader extends PostingsReaderBase {
} }
} else { } else {
toSkip -= leftInBlock; toSkip -= leftInBlock;
while(toSkip >= blockSize) { while(toSkip >= BLOCK_SIZE) {
if (DEBUG) { if (DEBUG) {
System.out.println(" skip whole block @ fp=" + posIn.getFilePointer()); System.out.println(" skip whole block @ fp=" + posIn.getFilePointer());
} }
assert posIn.getFilePointer() != lastPosBlockFP; assert posIn.getFilePointer() != lastPosBlockFP;
skipBlock(posIn); skipBlock(posIn);
toSkip -= blockSize; toSkip -= BLOCK_SIZE;
} }
refillPositions(); refillPositions();
posBufferUpto = toSkip; posBufferUpto = toSkip;
@ -931,7 +929,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
posPendingFP = -1; posPendingFP = -1;
// Force buffer refill: // Force buffer refill:
posBufferUpto = blockSize; posBufferUpto = BLOCK_SIZE;
} }
if (posPendingCount > freq) { if (posPendingCount > freq) {
@ -939,7 +937,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
posPendingCount = freq; posPendingCount = freq;
} }
if (posBufferUpto == blockSize) { if (posBufferUpto == BLOCK_SIZE) {
refillPositions(); refillPositions();
posBufferUpto = 0; posBufferUpto = 0;
} }
@ -978,9 +976,9 @@ public final class BlockPostingsReader extends PostingsReaderBase {
private final byte[] encoded; private final byte[] encoded;
private final IntBuffer encodedBuffer; private final IntBuffer encodedBuffer;
private final int[] docDeltaBuffer = new int[blockSize]; private final int[] docDeltaBuffer = new int[BLOCK_SIZE];
private final int[] freqBuffer = new int[blockSize]; private final int[] freqBuffer = new int[BLOCK_SIZE];
private final int[] posDeltaBuffer = new int[blockSize]; private final int[] posDeltaBuffer = new int[BLOCK_SIZE];
private final int[] payloadLengthBuffer; private final int[] payloadLengthBuffer;
private final int[] offsetStartDeltaBuffer; private final int[] offsetStartDeltaBuffer;
@ -1056,12 +1054,12 @@ public final class BlockPostingsReader extends PostingsReaderBase {
this.docIn = (IndexInput) startDocIn.clone(); this.docIn = (IndexInput) startDocIn.clone();
this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone(); this.posIn = (IndexInput) BlockPostingsReader.this.posIn.clone();
this.payIn = (IndexInput) BlockPostingsReader.this.payIn.clone(); this.payIn = (IndexInput) BlockPostingsReader.this.payIn.clone();
encoded = new byte[blockSize*4]; encoded = new byte[BLOCK_SIZE*4];
encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (indexHasOffsets) { if (indexHasOffsets) {
offsetStartDeltaBuffer = new int[blockSize]; offsetStartDeltaBuffer = new int[BLOCK_SIZE];
offsetLengthBuffer = new int[blockSize]; offsetLengthBuffer = new int[BLOCK_SIZE];
} else { } else {
offsetStartDeltaBuffer = null; offsetStartDeltaBuffer = null;
offsetLengthBuffer = null; offsetLengthBuffer = null;
@ -1071,7 +1069,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
indexHasPayloads = fieldInfo.hasPayloads(); indexHasPayloads = fieldInfo.hasPayloads();
if (indexHasPayloads) { if (indexHasPayloads) {
payloadLengthBuffer = new int[blockSize]; payloadLengthBuffer = new int[BLOCK_SIZE];
payloadBytes = new byte[128]; payloadBytes = new byte[128];
payload = new BytesRef(); payload = new BytesRef();
} else { } else {
@ -1101,9 +1099,9 @@ public final class BlockPostingsReader extends PostingsReaderBase {
posPendingFP = posTermStartFP; posPendingFP = posTermStartFP;
payPendingFP = payTermStartFP; payPendingFP = payTermStartFP;
posPendingCount = 0; posPendingCount = 0;
if (termState.totalTermFreq < blockSize) { if (termState.totalTermFreq < BLOCK_SIZE) {
lastPosBlockFP = posTermStartFP; lastPosBlockFP = posTermStartFP;
} else if (termState.totalTermFreq == blockSize) { } else if (termState.totalTermFreq == BLOCK_SIZE) {
lastPosBlockFP = -1; lastPosBlockFP = -1;
} else { } else {
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset; lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
@ -1112,7 +1110,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
doc = -1; doc = -1;
accum = 0; accum = 0;
docUpto = 0; docUpto = 0;
docBufferUpto = blockSize; docBufferUpto = BLOCK_SIZE;
skipped = false; skipped = false;
return this; return this;
} }
@ -1132,7 +1130,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
final int left = docFreq - docUpto; final int left = docFreq - docUpto;
assert left > 0; assert left > 0;
if (left >= blockSize) { if (left >= BLOCK_SIZE) {
if (DEBUG) { if (DEBUG) {
System.out.println(" fill doc block from fp=" + docIn.getFilePointer()); System.out.println(" fill doc block from fp=" + docIn.getFilePointer());
} }
@ -1245,7 +1243,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
return doc = NO_MORE_DOCS; return doc = NO_MORE_DOCS;
} }
//System.out.println("["+docFreq+"]"+" nextDoc"); //System.out.println("["+docFreq+"]"+" nextDoc");
if (docBufferUpto == blockSize) { if (docBufferUpto == BLOCK_SIZE) {
refillDocs(); refillDocs();
} }
if (DEBUG) { if (DEBUG) {
@ -1283,8 +1281,8 @@ public final class BlockPostingsReader extends PostingsReaderBase {
// nocommit 2 is heuristic guess!! // nocommit 2 is heuristic guess!!
// nocommit put cheating back! does it help? // nocommit put cheating back! does it help?
// nocommit use skipper!!! it has next last doc id!! // nocommit use skipper!!! it has next last doc id!!
//if (docFreq > blockSize && target - (blockSize - docBufferUpto) - 2*blockSize > accum) { //if (docFreq > BLOCK_SIZE && target - (BLOCK_SIZE - docBufferUpto) - 2*BLOCK_SIZE > accum) {
if (docFreq > blockSize && target - accum > blockSize) { if (docFreq > BLOCK_SIZE && target - accum > BLOCK_SIZE) {
if (DEBUG) { if (DEBUG) {
System.out.println(" try skipper"); System.out.println(" try skipper");
@ -1297,7 +1295,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
} }
skipper = new BlockSkipReader((IndexInput) docIn.clone(), skipper = new BlockSkipReader((IndexInput) docIn.clone(),
BlockPostingsWriter.maxSkipLevels, BlockPostingsWriter.maxSkipLevels,
blockSize, BLOCK_SIZE,
true, true,
indexHasOffsets, indexHasOffsets,
indexHasPayloads); indexHasPayloads);
@ -1321,11 +1319,11 @@ public final class BlockPostingsReader extends PostingsReaderBase {
if (DEBUG) { if (DEBUG) {
System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto() + " pay.fp=" + skipper.getPayPointer() + " lastStartOffset=" + lastStartOffset); System.out.println(" skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto() + " pay.fp=" + skipper.getPayPointer() + " lastStartOffset=" + lastStartOffset);
} }
assert newDocUpto % blockSize == (blockSize-1): "got " + newDocUpto; assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE-1): "got " + newDocUpto;
docUpto = newDocUpto+1; docUpto = newDocUpto+1;
// Force to read next block // Force to read next block
docBufferUpto = blockSize; docBufferUpto = BLOCK_SIZE;
accum = skipper.getDoc(); accum = skipper.getDoc();
docIn.seek(skipper.getDocPointer()); docIn.seek(skipper.getDocPointer());
posPendingFP = skipper.getPosPointer(); posPendingFP = skipper.getPosPointer();
@ -1366,7 +1364,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
System.out.println(" FPR.skipPositions: toSkip=" + toSkip); System.out.println(" FPR.skipPositions: toSkip=" + toSkip);
} }
final int leftInBlock = blockSize - posBufferUpto; final int leftInBlock = BLOCK_SIZE - posBufferUpto;
if (toSkip < leftInBlock) { if (toSkip < leftInBlock) {
int end = posBufferUpto + toSkip; int end = posBufferUpto + toSkip;
while(posBufferUpto < end) { while(posBufferUpto < end) {
@ -1383,7 +1381,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
} }
} else { } else {
toSkip -= leftInBlock; toSkip -= leftInBlock;
while(toSkip >= blockSize) { while(toSkip >= BLOCK_SIZE) {
if (DEBUG) { if (DEBUG) {
System.out.println(" skip whole block @ fp=" + posIn.getFilePointer()); System.out.println(" skip whole block @ fp=" + posIn.getFilePointer());
} }
@ -1404,11 +1402,11 @@ public final class BlockPostingsReader extends PostingsReaderBase {
// up into lastStartOffset: // up into lastStartOffset:
readBlock(payIn, encoded, encodedBuffer, offsetStartDeltaBuffer); readBlock(payIn, encoded, encodedBuffer, offsetStartDeltaBuffer);
readBlock(payIn, encoded, encodedBuffer, offsetLengthBuffer); readBlock(payIn, encoded, encodedBuffer, offsetLengthBuffer);
for(int i=0;i<blockSize;i++) { for(int i=0;i<BLOCK_SIZE;i++) {
lastStartOffset += offsetStartDeltaBuffer[i] + offsetLengthBuffer[i]; lastStartOffset += offsetStartDeltaBuffer[i] + offsetLengthBuffer[i];
} }
} }
toSkip -= blockSize; toSkip -= BLOCK_SIZE;
} }
refillPositions(); refillPositions();
payloadByteUpto = 0; payloadByteUpto = 0;
@ -1455,7 +1453,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
} }
// Force buffer refill: // Force buffer refill:
posBufferUpto = blockSize; posBufferUpto = BLOCK_SIZE;
} }
if (indexHasPayloads) { if (indexHasPayloads) {
@ -1473,7 +1471,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
posPendingCount = freq; posPendingCount = freq;
} }
if (posBufferUpto == blockSize) { if (posBufferUpto == BLOCK_SIZE) {
refillPositions(); refillPositions();
posBufferUpto = 0; posBufferUpto = 0;
} }

View File

@ -37,6 +37,8 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import static org.apache.lucene.codecs.blockpacked.BlockPackedPostingsFormat.BLOCK_SIZE;
/** /**
* Concrete class that writes docId(maybe frq,pos,offset,payloads) list * Concrete class that writes docId(maybe frq,pos,offset,payloads) list
* with postings format. * with postings format.
@ -67,8 +69,6 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
final IndexOutput posOut; final IndexOutput posOut;
final IndexOutput payOut; final IndexOutput payOut;
final static int blockSize = BlockPostingsFormat.BLOCK_SIZE;
private IndexOutput termsOut; private IndexOutput termsOut;
// How current field indexes postings: // How current field indexes postings:
@ -123,22 +123,22 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
try { try {
CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT); CodecUtil.writeHeader(docOut, DOC_CODEC, VERSION_CURRENT);
if (state.fieldInfos.hasProx()) { if (state.fieldInfos.hasProx()) {
posDeltaBuffer = new int[blockSize]; posDeltaBuffer = new int[BLOCK_SIZE];
posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.POS_EXTENSION), posOut = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockPostingsFormat.POS_EXTENSION),
state.context); state.context);
CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT); CodecUtil.writeHeader(posOut, POS_CODEC, VERSION_CURRENT);
if (state.fieldInfos.hasPayloads()) { if (state.fieldInfos.hasPayloads()) {
payloadBytes = new byte[128]; payloadBytes = new byte[128];
payloadLengthBuffer = new int[blockSize]; payloadLengthBuffer = new int[BLOCK_SIZE];
} else { } else {
payloadBytes = null; payloadBytes = null;
payloadLengthBuffer = null; payloadLengthBuffer = null;
} }
if (state.fieldInfos.hasOffsets()) { if (state.fieldInfos.hasOffsets()) {
offsetStartDeltaBuffer = new int[blockSize]; offsetStartDeltaBuffer = new int[BLOCK_SIZE];
offsetLengthBuffer = new int[blockSize]; offsetLengthBuffer = new int[BLOCK_SIZE];
} else { } else {
offsetStartDeltaBuffer = null; offsetStartDeltaBuffer = null;
offsetLengthBuffer = null; offsetLengthBuffer = null;
@ -165,17 +165,17 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
} }
} }
docDeltaBuffer = new int[blockSize]; docDeltaBuffer = new int[BLOCK_SIZE];
freqBuffer = new int[blockSize]; freqBuffer = new int[BLOCK_SIZE];
skipWriter = new BlockSkipWriter(maxSkipLevels, skipWriter = new BlockSkipWriter(maxSkipLevels,
blockSize, BLOCK_SIZE,
state.segmentInfo.getDocCount(), state.segmentInfo.getDocCount(),
docOut, docOut,
posOut, posOut,
payOut); payOut);
encoded = new byte[blockSize*4]; encoded = new byte[BLOCK_SIZE*4];
encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer(); encodedBuffer = ByteBuffer.wrap(encoded).asIntBuffer();
} }
@ -183,7 +183,7 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
public void start(IndexOutput termsOut) throws IOException { public void start(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut; this.termsOut = termsOut;
CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT); CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT);
termsOut.writeVInt(blockSize); termsOut.writeVInt(BLOCK_SIZE);
} }
@Override @Override
@ -240,7 +240,7 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
docBufferUpto++; docBufferUpto++;
docCount++; docCount++;
if (docBufferUpto == blockSize) { if (docBufferUpto == BLOCK_SIZE) {
if (DEBUG) { if (DEBUG) {
System.out.println(" write docDelta block @ fp=" + docOut.getFilePointer()); System.out.println(" write docDelta block @ fp=" + docOut.getFilePointer());
} }
@ -291,7 +291,7 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
posBufferUpto++; posBufferUpto++;
lastPosition = position; lastPosition = position;
if (posBufferUpto == blockSize) { if (posBufferUpto == BLOCK_SIZE) {
if (DEBUG) { if (DEBUG) {
System.out.println(" write pos bulk block @ fp=" + posOut.getFilePointer()); System.out.println(" write pos bulk block @ fp=" + posOut.getFilePointer());
} }
@ -329,7 +329,7 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
// Since we don't know df for current term, we had to buffer // Since we don't know df for current term, we had to buffer
// those skip data for each block, and when a new doc comes, // those skip data for each block, and when a new doc comes,
// write them to skip file. // write them to skip file.
if (docBufferUpto == blockSize) { if (docBufferUpto == BLOCK_SIZE) {
lastBlockDocID = lastDocID; lastBlockDocID = lastDocID;
if (posOut != null) { if (posOut != null) {
if (payOut != null) { if (payOut != null) {
@ -408,7 +408,7 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
} }
assert stats.totalTermFreq != -1; assert stats.totalTermFreq != -1;
if (stats.totalTermFreq > blockSize) { if (stats.totalTermFreq > BLOCK_SIZE) {
lastPosBlockOffset = (int) (posOut.getFilePointer() - posTermStartFP); lastPosBlockOffset = (int) (posOut.getFilePointer() - posTermStartFP);
} else { } else {
lastPosBlockOffset = -1; lastPosBlockOffset = -1;
@ -418,7 +418,7 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
// nocommit should we send offsets/payloads to // nocommit should we send offsets/payloads to
// .pay...? seems wasteful (have to store extra // .pay...? seems wasteful (have to store extra
// vLong for low (< blockSize) DF terms = vast vast // vLong for low (< BLOCK_SIZE) DF terms = vast vast
// majority) // majority)
// vInt encode the remaining positions/payloads/offsets: // vInt encode the remaining positions/payloads/offsets:
@ -473,7 +473,7 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
} }
int skipOffset; int skipOffset;
if (docCount > blockSize) { if (docCount > BLOCK_SIZE) {
skipOffset = (int) (skipWriter.writeSkip(docOut)-docTermStartFP); skipOffset = (int) (skipWriter.writeSkip(docOut)-docTermStartFP);
if (DEBUG) { if (DEBUG) {
@ -487,7 +487,7 @@ public final class BlockPostingsWriter extends PostingsWriterBase {
} }
long payStartFP; long payStartFP;
if (stats.totalTermFreq >= blockSize) { if (stats.totalTermFreq >= BLOCK_SIZE) {
payStartFP = payTermStartFP; payStartFP = payTermStartFP;
} else { } else {
payStartFP = -1; payStartFP = -1;

View File

@ -69,8 +69,6 @@ final class BlockSkipWriter extends MultiLevelSkipListWriter {
private boolean fieldHasPayloads; private boolean fieldHasPayloads;
public BlockSkipWriter(int maxSkipLevels, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) { public BlockSkipWriter(int maxSkipLevels, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) {
// nocommit figure out what skipMultiplier is best (4 is
// total guess):
super(blockSize, 8, maxSkipLevels, docCount); super(blockSize, 8, maxSkipLevels, docCount);
this.docOut = docOut; this.docOut = docOut;
this.posOut = posOut; this.posOut = posOut;

View File

@ -411,7 +411,7 @@ public final class BlockPackedPostingsWriter extends PostingsWriterBase {
// nocommit should we send offsets/payloads to // nocommit should we send offsets/payloads to
// .pay...? seems wasteful (have to store extra // .pay...? seems wasteful (have to store extra
// vLong for low (< blockSize) DF terms = vast vast // vLong for low (< BLOCK_SIZE) DF terms = vast vast
// majority) // majority)
// vInt encode the remaining positions/payloads/offsets: // vInt encode the remaining positions/payloads/offsets:

View File

@ -24,25 +24,25 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.codecs.MultiLevelSkipListWriter; import org.apache.lucene.codecs.MultiLevelSkipListWriter;
/** /**
* Write skip lists with multiple levels, and support skip within block ints. * Write skip lists with multiple levels, and support skip within block ints.
* *
* Assume that docFreq = 28, skipInterval = blockSize = 12 * Assume that docFreq = 28, skipInterval = blockSize = 12
* *
* | block#0 | | block#1 | |vInts| * | block#0 | | block#1 | |vInts|
* d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list) * d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list)
* ^ ^ (level 0 skip point) * ^ ^ (level 0 skip point)
* *
* Note that skipWriter will ignore first document in block#0, since * Note that skipWriter will ignore first document in block#0, since
* it is useless as a skip point. Also, we'll never skip into the vInts * it is useless as a skip point. Also, we'll never skip into the vInts
* block, only record skip data at the start its start point(if it exist). * block, only record skip data at the start its start point(if it exist).
* *
* For each skip point, we will record: * For each skip point, we will record:
* 1. lastDocID, * 1. lastDocID,
* 2. its related file points(position, payload), * 2. its related file points(position, payload),
* 3. related numbers or uptos(position, payload). * 3. related numbers or uptos(position, payload).
* 4. start offset. * 4. start offset.
* *
*/ */
final class BlockPackedSkipWriter extends MultiLevelSkipListWriter { final class BlockPackedSkipWriter extends MultiLevelSkipListWriter {
private boolean DEBUG = BlockPackedPostingsReader.DEBUG; private boolean DEBUG = BlockPackedPostingsReader.DEBUG;
@ -69,8 +69,6 @@ final class BlockPackedSkipWriter extends MultiLevelSkipListWriter {
private boolean fieldHasPayloads; private boolean fieldHasPayloads;
public BlockPackedSkipWriter(int maxSkipLevels, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) { public BlockPackedSkipWriter(int maxSkipLevels, int blockSize, int docCount, IndexOutput docOut, IndexOutput posOut, IndexOutput payOut) {
// nocommit figure out what skipMultiplier is best (4 is
// total guess):
super(blockSize, 8, maxSkipLevels, docCount); super(blockSize, 8, maxSkipLevels, docCount);
this.docOut = docOut; this.docOut = docOut;
this.posOut = posOut; this.posOut = posOut;