mirror of https://github.com/apache/lucene.git
LUCENE-8311: Phrase impacts (#760)
This commit is contained in:
parent
bf9a7e2626
commit
cfac486afd
|
@ -197,8 +197,6 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
public PostingsEnum postings(FieldInfo fieldInfo, BlockTermState termState, PostingsEnum reuse, int flags) throws IOException {
|
||||
|
||||
boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
boolean indexHasPayloads = fieldInfo.hasPayloads();
|
||||
|
||||
if (indexHasPositions == false || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
|
||||
BlockDocsEnum docsEnum;
|
||||
|
@ -211,18 +209,6 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
docsEnum = new BlockDocsEnum(fieldInfo);
|
||||
}
|
||||
return docsEnum.reset((IntBlockTermState) termState, flags);
|
||||
} else if ((indexHasOffsets == false || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false) &&
|
||||
(indexHasPayloads == false || PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
|
||||
BlockPostingsEnum docsAndPositionsEnum;
|
||||
if (reuse instanceof BlockPostingsEnum) {
|
||||
docsAndPositionsEnum = (BlockPostingsEnum) reuse;
|
||||
if (!docsAndPositionsEnum.canReuse(docIn, fieldInfo)) {
|
||||
docsAndPositionsEnum = new BlockPostingsEnum(fieldInfo);
|
||||
}
|
||||
} else {
|
||||
docsAndPositionsEnum = new BlockPostingsEnum(fieldInfo);
|
||||
}
|
||||
return docsAndPositionsEnum.reset((IntBlockTermState) termState);
|
||||
} else {
|
||||
EverythingEnum everythingEnum;
|
||||
if (reuse instanceof EverythingEnum) {
|
||||
|
@ -243,6 +229,18 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
// no skip data
|
||||
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
|
||||
}
|
||||
|
||||
final boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
final boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
final boolean indexHasPayloads = fieldInfo.hasPayloads();
|
||||
|
||||
if (indexHasPositions &&
|
||||
PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) &&
|
||||
(indexHasOffsets == false || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false) &&
|
||||
(indexHasPayloads == false || PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
|
||||
return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
|
||||
}
|
||||
|
||||
return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, flags);
|
||||
}
|
||||
|
||||
|
@ -493,339 +491,6 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
final class BlockPostingsEnum extends PostingsEnum {
|
||||
|
||||
private final byte[] encoded;
|
||||
|
||||
private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||
private final int[] freqBuffer = new int[MAX_DATA_SIZE];
|
||||
private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||
|
||||
private int docBufferUpto;
|
||||
private int posBufferUpto;
|
||||
|
||||
private Lucene50SkipReader skipper;
|
||||
private boolean skipped;
|
||||
|
||||
final IndexInput startDocIn;
|
||||
|
||||
IndexInput docIn;
|
||||
final IndexInput posIn;
|
||||
|
||||
final boolean indexHasOffsets;
|
||||
final boolean indexHasPayloads;
|
||||
|
||||
private int docFreq; // number of docs in this posting list
|
||||
private long totalTermFreq; // number of positions in this posting list
|
||||
private int docUpto; // how many docs we've read
|
||||
private int doc; // doc we last read
|
||||
private int accum; // accumulator for doc deltas
|
||||
private int freq; // freq we last read
|
||||
private int position; // current position
|
||||
|
||||
// how many positions "behind" we are; nextPosition must
|
||||
// skip these to "catch up":
|
||||
private int posPendingCount;
|
||||
|
||||
// Lazy pos seek: if != -1 then we must seek to this FP
|
||||
// before reading positions:
|
||||
private long posPendingFP;
|
||||
|
||||
// Where this term's postings start in the .doc file:
|
||||
private long docTermStartFP;
|
||||
|
||||
// Where this term's postings start in the .pos file:
|
||||
private long posTermStartFP;
|
||||
|
||||
// Where this term's payloads/offsets start in the .pay
|
||||
// file:
|
||||
private long payTermStartFP;
|
||||
|
||||
// File pointer where the last (vInt encoded) pos delta
|
||||
// block is. We need this to know whether to bulk
|
||||
// decode vs vInt decode the block:
|
||||
private long lastPosBlockFP;
|
||||
|
||||
// Where this term's skip data starts (after
|
||||
// docTermStartFP) in the .doc file (or -1 if there is
|
||||
// no skip data for this term):
|
||||
private long skipOffset;
|
||||
|
||||
private int nextSkipDoc;
|
||||
|
||||
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
|
||||
|
||||
public BlockPostingsEnum(FieldInfo fieldInfo) throws IOException {
|
||||
this.startDocIn = Lucene50PostingsReader.this.docIn;
|
||||
this.docIn = null;
|
||||
this.posIn = Lucene50PostingsReader.this.posIn.clone();
|
||||
encoded = new byte[MAX_ENCODED_SIZE];
|
||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
indexHasPayloads = fieldInfo.hasPayloads();
|
||||
}
|
||||
|
||||
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
|
||||
return docIn == startDocIn &&
|
||||
indexHasOffsets == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) &&
|
||||
indexHasPayloads == fieldInfo.hasPayloads();
|
||||
}
|
||||
|
||||
public PostingsEnum reset(IntBlockTermState termState) throws IOException {
|
||||
docFreq = termState.docFreq;
|
||||
docTermStartFP = termState.docStartFP;
|
||||
posTermStartFP = termState.posStartFP;
|
||||
payTermStartFP = termState.payStartFP;
|
||||
skipOffset = termState.skipOffset;
|
||||
totalTermFreq = termState.totalTermFreq;
|
||||
singletonDocID = termState.singletonDocID;
|
||||
if (docFreq > 1) {
|
||||
if (docIn == null) {
|
||||
// lazy init
|
||||
docIn = startDocIn.clone();
|
||||
}
|
||||
docIn.seek(docTermStartFP);
|
||||
}
|
||||
posPendingFP = posTermStartFP;
|
||||
posPendingCount = 0;
|
||||
if (termState.totalTermFreq < BLOCK_SIZE) {
|
||||
lastPosBlockFP = posTermStartFP;
|
||||
} else if (termState.totalTermFreq == BLOCK_SIZE) {
|
||||
lastPosBlockFP = -1;
|
||||
} else {
|
||||
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
|
||||
}
|
||||
|
||||
doc = -1;
|
||||
accum = 0;
|
||||
docUpto = 0;
|
||||
if (docFreq > BLOCK_SIZE) {
|
||||
nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
|
||||
} else {
|
||||
nextSkipDoc = NO_MORE_DOCS; // not enough docs for skipping
|
||||
}
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
skipped = false;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
private void refillDocs() throws IOException {
|
||||
final int left = docFreq - docUpto;
|
||||
assert left > 0;
|
||||
|
||||
if (left >= BLOCK_SIZE) {
|
||||
forUtil.readBlock(docIn, encoded, docDeltaBuffer);
|
||||
forUtil.readBlock(docIn, encoded, freqBuffer);
|
||||
} else if (docFreq == 1) {
|
||||
docDeltaBuffer[0] = singletonDocID;
|
||||
freqBuffer[0] = (int) totalTermFreq;
|
||||
} else {
|
||||
// Read vInts:
|
||||
readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
|
||||
}
|
||||
docBufferUpto = 0;
|
||||
}
|
||||
|
||||
private void refillPositions() throws IOException {
|
||||
if (posIn.getFilePointer() == lastPosBlockFP) {
|
||||
final int count = (int) (totalTermFreq % BLOCK_SIZE);
|
||||
int payloadLength = 0;
|
||||
for(int i=0;i<count;i++) {
|
||||
int code = posIn.readVInt();
|
||||
if (indexHasPayloads) {
|
||||
if ((code & 1) != 0) {
|
||||
payloadLength = posIn.readVInt();
|
||||
}
|
||||
posDeltaBuffer[i] = code >>> 1;
|
||||
if (payloadLength != 0) {
|
||||
posIn.seek(posIn.getFilePointer() + payloadLength);
|
||||
}
|
||||
} else {
|
||||
posDeltaBuffer[i] = code;
|
||||
}
|
||||
if (indexHasOffsets) {
|
||||
if ((posIn.readVInt() & 1) != 0) {
|
||||
// offset length changed
|
||||
posIn.readVInt();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
forUtil.readBlock(posIn, encoded, posDeltaBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (docUpto == docFreq) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
if (docBufferUpto == BLOCK_SIZE) {
|
||||
refillDocs();
|
||||
}
|
||||
|
||||
accum += docDeltaBuffer[docBufferUpto];
|
||||
freq = freqBuffer[docBufferUpto];
|
||||
posPendingCount += freq;
|
||||
docBufferUpto++;
|
||||
docUpto++;
|
||||
|
||||
doc = accum;
|
||||
position = 0;
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
// TODO: make frq block load lazy/skippable
|
||||
|
||||
if (target > nextSkipDoc) {
|
||||
if (skipper == null) {
|
||||
// Lazy init: first time this enum has ever been used for skipping
|
||||
skipper = new Lucene50SkipReader(version,
|
||||
docIn.clone(),
|
||||
MAX_SKIP_LEVELS,
|
||||
true,
|
||||
indexHasOffsets,
|
||||
indexHasPayloads);
|
||||
}
|
||||
|
||||
if (!skipped) {
|
||||
assert skipOffset != -1;
|
||||
// This is the first time this enum has skipped
|
||||
// since reset() was called; load the skip data:
|
||||
skipper.init(docTermStartFP+skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
|
||||
skipped = true;
|
||||
}
|
||||
|
||||
final int newDocUpto = skipper.skipTo(target) + 1;
|
||||
|
||||
if (newDocUpto > docUpto) {
|
||||
// Skipper moved
|
||||
|
||||
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
|
||||
docUpto = newDocUpto;
|
||||
|
||||
// Force to read next block
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
accum = skipper.getDoc();
|
||||
docIn.seek(skipper.getDocPointer());
|
||||
posPendingFP = skipper.getPosPointer();
|
||||
posPendingCount = skipper.getPosBufferUpto();
|
||||
}
|
||||
nextSkipDoc = skipper.getNextSkipDoc();
|
||||
}
|
||||
if (docUpto == docFreq) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
if (docBufferUpto == BLOCK_SIZE) {
|
||||
refillDocs();
|
||||
}
|
||||
|
||||
// Now scan... this is an inlined/pared down version
|
||||
// of nextDoc():
|
||||
while (true) {
|
||||
accum += docDeltaBuffer[docBufferUpto];
|
||||
freq = freqBuffer[docBufferUpto];
|
||||
posPendingCount += freq;
|
||||
docBufferUpto++;
|
||||
docUpto++;
|
||||
|
||||
if (accum >= target) {
|
||||
break;
|
||||
}
|
||||
if (docUpto == docFreq) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
|
||||
position = 0;
|
||||
return doc = accum;
|
||||
}
|
||||
|
||||
// TODO: in theory we could avoid loading frq block
|
||||
// when not needed, ie, use skip data to load how far to
|
||||
// seek the pos pointer ... instead of having to load frq
|
||||
// blocks only to sum up how many positions to skip
|
||||
private void skipPositions() throws IOException {
|
||||
// Skip positions now:
|
||||
int toSkip = posPendingCount - freq;
|
||||
|
||||
final int leftInBlock = BLOCK_SIZE - posBufferUpto;
|
||||
if (toSkip < leftInBlock) {
|
||||
posBufferUpto += toSkip;
|
||||
} else {
|
||||
toSkip -= leftInBlock;
|
||||
while(toSkip >= BLOCK_SIZE) {
|
||||
assert posIn.getFilePointer() != lastPosBlockFP;
|
||||
forUtil.skipBlock(posIn);
|
||||
toSkip -= BLOCK_SIZE;
|
||||
}
|
||||
refillPositions();
|
||||
posBufferUpto = toSkip;
|
||||
}
|
||||
|
||||
position = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
|
||||
assert posPendingCount > 0;
|
||||
|
||||
if (posPendingFP != -1) {
|
||||
posIn.seek(posPendingFP);
|
||||
posPendingFP = -1;
|
||||
|
||||
// Force buffer refill:
|
||||
posBufferUpto = BLOCK_SIZE;
|
||||
}
|
||||
|
||||
if (posPendingCount > freq) {
|
||||
skipPositions();
|
||||
posPendingCount = freq;
|
||||
}
|
||||
|
||||
if (posBufferUpto == BLOCK_SIZE) {
|
||||
refillPositions();
|
||||
posBufferUpto = 0;
|
||||
}
|
||||
position += posDeltaBuffer[posBufferUpto++];
|
||||
posPendingCount--;
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return docFreq;
|
||||
}
|
||||
}
|
||||
|
||||
// Also handles payloads + offsets
|
||||
final class EverythingEnum extends PostingsEnum {
|
||||
|
||||
|
@ -910,12 +575,18 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
|
||||
|
||||
public EverythingEnum(FieldInfo fieldInfo) throws IOException {
|
||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
indexHasPayloads = fieldInfo.hasPayloads();
|
||||
|
||||
this.startDocIn = Lucene50PostingsReader.this.docIn;
|
||||
this.docIn = null;
|
||||
this.posIn = Lucene50PostingsReader.this.posIn.clone();
|
||||
this.payIn = Lucene50PostingsReader.this.payIn.clone();
|
||||
if (indexHasOffsets || indexHasPayloads) {
|
||||
this.payIn = Lucene50PostingsReader.this.payIn.clone();
|
||||
} else {
|
||||
this.payIn = null;
|
||||
}
|
||||
encoded = new byte[MAX_ENCODED_SIZE];
|
||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
if (indexHasOffsets) {
|
||||
offsetStartDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||
offsetLengthBuffer = new int[MAX_DATA_SIZE];
|
||||
|
@ -926,7 +597,6 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
endOffset = -1;
|
||||
}
|
||||
|
||||
indexHasPayloads = fieldInfo.hasPayloads();
|
||||
if (indexHasPayloads) {
|
||||
payloadLengthBuffer = new int[MAX_DATA_SIZE];
|
||||
payloadBytes = new byte[128];
|
||||
|
@ -1236,7 +906,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
posIn.seek(posPendingFP);
|
||||
posPendingFP = -1;
|
||||
|
||||
if (payPendingFP != -1) {
|
||||
if (payPendingFP != -1 && payIn != null) {
|
||||
payIn.seek(payPendingFP);
|
||||
payPendingFP = -1;
|
||||
}
|
||||
|
@ -1300,6 +970,298 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
}
|
||||
|
||||
final class BlockImpactsPostingsEnum extends ImpactsEnum {
|
||||
|
||||
private final byte[] encoded;
|
||||
|
||||
private final int[] docDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||
private final int[] freqBuffer = new int[MAX_DATA_SIZE];
|
||||
private final int[] posDeltaBuffer = new int[MAX_DATA_SIZE];
|
||||
|
||||
private int docBufferUpto;
|
||||
private int posBufferUpto;
|
||||
|
||||
private final Lucene50ScoreSkipReader skipper;
|
||||
|
||||
final IndexInput docIn;
|
||||
final IndexInput posIn;
|
||||
|
||||
final boolean indexHasOffsets;
|
||||
final boolean indexHasPayloads;
|
||||
|
||||
private int docFreq; // number of docs in this posting list
|
||||
private long totalTermFreq; // number of positions in this posting list
|
||||
private int docUpto; // how many docs we've read
|
||||
private int doc; // doc we last read
|
||||
private int accum; // accumulator for doc deltas
|
||||
private int freq; // freq we last read
|
||||
private int position; // current position
|
||||
|
||||
// how many positions "behind" we are; nextPosition must
|
||||
// skip these to "catch up":
|
||||
private int posPendingCount;
|
||||
|
||||
// Lazy pos seek: if != -1 then we must seek to this FP
|
||||
// before reading positions:
|
||||
private long posPendingFP;
|
||||
|
||||
// Where this term's postings start in the .doc file:
|
||||
private long docTermStartFP;
|
||||
|
||||
// Where this term's postings start in the .pos file:
|
||||
private long posTermStartFP;
|
||||
|
||||
// Where this term's payloads/offsets start in the .pay
|
||||
// file:
|
||||
private long payTermStartFP;
|
||||
|
||||
// File pointer where the last (vInt encoded) pos delta
|
||||
// block is. We need this to know whether to bulk
|
||||
// decode vs vInt decode the block:
|
||||
private long lastPosBlockFP;
|
||||
|
||||
private int nextSkipDoc = -1;
|
||||
|
||||
private long seekTo = -1;
|
||||
|
||||
public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState) throws IOException {
|
||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
indexHasPayloads = fieldInfo.hasPayloads();
|
||||
|
||||
this.docIn = Lucene50PostingsReader.this.docIn.clone();
|
||||
|
||||
encoded = new byte[MAX_ENCODED_SIZE];
|
||||
|
||||
this.posIn = Lucene50PostingsReader.this.posIn.clone();
|
||||
|
||||
docFreq = termState.docFreq;
|
||||
docTermStartFP = termState.docStartFP;
|
||||
posTermStartFP = termState.posStartFP;
|
||||
payTermStartFP = termState.payStartFP;
|
||||
totalTermFreq = termState.totalTermFreq;
|
||||
docIn.seek(docTermStartFP);
|
||||
posPendingFP = posTermStartFP;
|
||||
posPendingCount = 0;
|
||||
if (termState.totalTermFreq < BLOCK_SIZE) {
|
||||
lastPosBlockFP = posTermStartFP;
|
||||
} else if (termState.totalTermFreq == BLOCK_SIZE) {
|
||||
lastPosBlockFP = -1;
|
||||
} else {
|
||||
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
|
||||
}
|
||||
|
||||
doc = -1;
|
||||
accum = 0;
|
||||
docUpto = 0;
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
|
||||
skipper = new Lucene50ScoreSkipReader(version,
|
||||
docIn.clone(),
|
||||
MAX_SKIP_LEVELS,
|
||||
true,
|
||||
indexHasOffsets,
|
||||
indexHasPayloads);
|
||||
skipper.init(docTermStartFP+termState.skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
private void refillDocs() throws IOException {
|
||||
final int left = docFreq - docUpto;
|
||||
assert left > 0;
|
||||
|
||||
if (left >= BLOCK_SIZE) {
|
||||
forUtil.readBlock(docIn, encoded, docDeltaBuffer);
|
||||
forUtil.readBlock(docIn, encoded, freqBuffer);
|
||||
} else {
|
||||
readVIntBlock(docIn, docDeltaBuffer, freqBuffer, left, true);
|
||||
}
|
||||
docBufferUpto = 0;
|
||||
}
|
||||
|
||||
private void refillPositions() throws IOException {
|
||||
if (posIn.getFilePointer() == lastPosBlockFP) {
|
||||
final int count = (int) (totalTermFreq % BLOCK_SIZE);
|
||||
int payloadLength = 0;
|
||||
for(int i=0;i<count;i++) {
|
||||
int code = posIn.readVInt();
|
||||
if (indexHasPayloads) {
|
||||
if ((code & 1) != 0) {
|
||||
payloadLength = posIn.readVInt();
|
||||
}
|
||||
posDeltaBuffer[i] = code >>> 1;
|
||||
if (payloadLength != 0) {
|
||||
posIn.seek(posIn.getFilePointer() + payloadLength);
|
||||
}
|
||||
} else {
|
||||
posDeltaBuffer[i] = code;
|
||||
}
|
||||
if (indexHasOffsets) {
|
||||
if ((posIn.readVInt() & 1) != 0) {
|
||||
// offset length changed
|
||||
posIn.readVInt();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
forUtil.readBlock(posIn, encoded, posDeltaBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void advanceShallow(int target) throws IOException {
|
||||
if (target > nextSkipDoc) {
|
||||
// always plus one to fix the result, since skip position in Lucene50SkipReader
|
||||
// is a little different from MultiLevelSkipListReader
|
||||
final int newDocUpto = skipper.skipTo(target) + 1;
|
||||
|
||||
if (newDocUpto > docUpto) {
|
||||
// Skipper moved
|
||||
assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
|
||||
docUpto = newDocUpto;
|
||||
|
||||
// Force to read next block
|
||||
docBufferUpto = BLOCK_SIZE;
|
||||
accum = skipper.getDoc();
|
||||
posPendingFP = skipper.getPosPointer();
|
||||
posPendingCount = skipper.getPosBufferUpto();
|
||||
seekTo = skipper.getDocPointer(); // delay the seek
|
||||
}
|
||||
// next time we call advance, this is used to
|
||||
// foresee whether skipper is necessary.
|
||||
nextSkipDoc = skipper.getNextSkipDoc();
|
||||
}
|
||||
assert nextSkipDoc >= target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Impacts getImpacts() throws IOException {
|
||||
advanceShallow(doc);
|
||||
return skipper.getImpacts();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target > nextSkipDoc) {
|
||||
advanceShallow(target);
|
||||
}
|
||||
if (docUpto == docFreq) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
if (docBufferUpto == BLOCK_SIZE) {
|
||||
if (seekTo >= 0) {
|
||||
docIn.seek(seekTo);
|
||||
seekTo = -1;
|
||||
}
|
||||
refillDocs();
|
||||
}
|
||||
|
||||
// Now scan:
|
||||
while (true) {
|
||||
accum += docDeltaBuffer[docBufferUpto];
|
||||
freq = freqBuffer[docBufferUpto];
|
||||
posPendingCount += freq;
|
||||
docBufferUpto++;
|
||||
docUpto++;
|
||||
|
||||
if (accum >= target) {
|
||||
break;
|
||||
}
|
||||
if (docUpto == docFreq) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
position = 0;
|
||||
|
||||
return doc = accum;
|
||||
}
|
||||
|
||||
// TODO: in theory we could avoid loading frq block
|
||||
// when not needed, ie, use skip data to load how far to
|
||||
// seek the pos pointer ... instead of having to load frq
|
||||
// blocks only to sum up how many positions to skip
|
||||
private void skipPositions() throws IOException {
|
||||
// Skip positions now:
|
||||
int toSkip = posPendingCount - freq;
|
||||
|
||||
final int leftInBlock = BLOCK_SIZE - posBufferUpto;
|
||||
if (toSkip < leftInBlock) {
|
||||
posBufferUpto += toSkip;
|
||||
} else {
|
||||
toSkip -= leftInBlock;
|
||||
while(toSkip >= BLOCK_SIZE) {
|
||||
assert posIn.getFilePointer() != lastPosBlockFP;
|
||||
forUtil.skipBlock(posIn);
|
||||
toSkip -= BLOCK_SIZE;
|
||||
}
|
||||
refillPositions();
|
||||
posBufferUpto = toSkip;
|
||||
}
|
||||
|
||||
position = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
assert posPendingCount > 0;
|
||||
|
||||
if (posPendingFP != -1) {
|
||||
posIn.seek(posPendingFP);
|
||||
posPendingFP = -1;
|
||||
|
||||
// Force buffer refill:
|
||||
posBufferUpto = BLOCK_SIZE;
|
||||
}
|
||||
|
||||
if (posPendingCount > freq) {
|
||||
skipPositions();
|
||||
posPendingCount = freq;
|
||||
}
|
||||
|
||||
if (posBufferUpto == BLOCK_SIZE) {
|
||||
refillPositions();
|
||||
posBufferUpto = 0;
|
||||
}
|
||||
position += posDeltaBuffer[posBufferUpto++];
|
||||
|
||||
posPendingCount--;
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return docFreq;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final class BlockImpactsEverythingEnum extends ImpactsEnum {
|
||||
|
||||
|
|
|
@ -19,9 +19,19 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.ImpactsSource;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
final class ExactPhraseMatcher extends PhraseMatcher {
|
||||
|
||||
|
@ -37,9 +47,21 @@ final class ExactPhraseMatcher extends PhraseMatcher {
|
|||
}
|
||||
|
||||
private final PostingsAndPosition[] postings;
|
||||
private final DocIdSetIterator approximation;
|
||||
private final ImpactsDISI impactsApproximation;
|
||||
|
||||
ExactPhraseMatcher(PhraseQuery.PostingsAndFreq[] postings, float matchCost) {
|
||||
super(approximation(postings), matchCost);
|
||||
ExactPhraseMatcher(PhraseQuery.PostingsAndFreq[] postings, ScoreMode scoreMode, SimScorer scorer, float matchCost) {
|
||||
super(matchCost);
|
||||
|
||||
final DocIdSetIterator approximation = ConjunctionDISI.intersectIterators(Arrays.stream(postings).map(p -> p.postings).collect(Collectors.toList()));
|
||||
final ImpactsSource impactsSource = mergeImpacts(Arrays.stream(postings).map(p -> p.impacts).toArray(ImpactsEnum[]::new));
|
||||
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
this.approximation = this.impactsApproximation = new ImpactsDISI(approximation, impactsSource, scorer);
|
||||
} else {
|
||||
this.approximation = approximation;
|
||||
this.impactsApproximation = new ImpactsDISI(approximation, impactsSource, scorer);
|
||||
}
|
||||
|
||||
List<PostingsAndPosition> postingsAndPositions = new ArrayList<>();
|
||||
for(PhraseQuery.PostingsAndFreq posting : postings) {
|
||||
|
@ -48,12 +70,14 @@ final class ExactPhraseMatcher extends PhraseMatcher {
|
|||
this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
|
||||
}
|
||||
|
||||
private static DocIdSetIterator approximation(PhraseQuery.PostingsAndFreq[] postings) {
|
||||
List<DocIdSetIterator> iterators = new ArrayList<>();
|
||||
for (PhraseQuery.PostingsAndFreq posting : postings) {
|
||||
iterators.add(posting.postings);
|
||||
}
|
||||
return ConjunctionDISI.intersectIterators(iterators);
|
||||
@Override
|
||||
DocIdSetIterator approximation() {
|
||||
return approximation;
|
||||
}
|
||||
|
||||
@Override
|
||||
ImpactsDISI impactsApproximation() {
|
||||
return impactsApproximation;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -149,4 +173,173 @@ final class ExactPhraseMatcher extends PhraseMatcher {
|
|||
return postings[postings.length - 1].postings.endOffset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge impacts for multiple terms of an exact phrase.
|
||||
*/
|
||||
static ImpactsSource mergeImpacts(ImpactsEnum[] impactsEnums) {
|
||||
// Iteration of block boundaries uses the impacts enum with the lower cost.
|
||||
// This is consistent with BlockMaxConjunctionScorer.
|
||||
int tmpLeadIndex = -1;
|
||||
for (int i = 0; i < impactsEnums.length; ++i) {
|
||||
if (tmpLeadIndex == -1 || impactsEnums[i].cost() < impactsEnums[tmpLeadIndex].cost()) {
|
||||
tmpLeadIndex = i;
|
||||
}
|
||||
}
|
||||
final int leadIndex = tmpLeadIndex;
|
||||
|
||||
return new ImpactsSource() {
|
||||
|
||||
class SubIterator {
|
||||
final Iterator<Impact> iterator;
|
||||
Impact current;
|
||||
|
||||
SubIterator(List<Impact> impacts) {
|
||||
this.iterator = impacts.iterator();
|
||||
this.current = iterator.next();
|
||||
}
|
||||
|
||||
boolean next() {
|
||||
if (iterator.hasNext() == false) {
|
||||
current = null;
|
||||
return false;
|
||||
} else {
|
||||
current = iterator.next();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Impacts getImpacts() throws IOException {
|
||||
final Impacts[] impacts = new Impacts[impactsEnums.length];
|
||||
for (int i = 0; i < impactsEnums.length; ++i) {
|
||||
impacts[i] = impactsEnums[i].getImpacts();
|
||||
}
|
||||
final Impacts lead = impacts[leadIndex];
|
||||
return new Impacts() {
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
// Delegate to the lead
|
||||
return lead.numLevels();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocIdUpTo(int level) {
|
||||
// Delegate to the lead
|
||||
return lead.getDocIdUpTo(level);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the minimum level whose impacts are valid up to {@code docIdUpTo},
|
||||
* or {@code -1} if there is no such level.
|
||||
*/
|
||||
private int getLevel(Impacts impacts, int docIdUpTo) {
|
||||
for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
|
||||
if (impacts.getDocIdUpTo(level) >= docIdUpTo) {
|
||||
return level;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Impact> getImpacts(int level) {
|
||||
final int docIdUpTo = getDocIdUpTo(level);
|
||||
|
||||
PriorityQueue<SubIterator> pq = new PriorityQueue<SubIterator>(impacts.length) {
|
||||
@Override
|
||||
protected boolean lessThan(SubIterator a, SubIterator b) {
|
||||
return a.current.freq < b.current.freq;
|
||||
}
|
||||
};
|
||||
|
||||
boolean hasImpacts = false;
|
||||
List<Impact> onlyImpactList = null;
|
||||
for (int i = 0; i < impacts.length; ++i) {
|
||||
int impactsLevel = getLevel(impacts[i], docIdUpTo);
|
||||
if (impactsLevel == -1) {
|
||||
// This instance doesn't have useful impacts, ignore it: this is safe.
|
||||
continue;
|
||||
}
|
||||
|
||||
List<Impact> impactList = impacts[i].getImpacts(impactsLevel);
|
||||
Impact firstImpact = impactList.get(0);
|
||||
if (firstImpact.freq == Integer.MAX_VALUE && firstImpact.norm == 1L) {
|
||||
// Dummy impacts, ignore it too.
|
||||
continue;
|
||||
}
|
||||
|
||||
SubIterator subIterator = new SubIterator(impactList);
|
||||
pq.add(subIterator);
|
||||
if (hasImpacts == false) {
|
||||
hasImpacts = true;
|
||||
onlyImpactList = impactList;
|
||||
} else {
|
||||
onlyImpactList = null; // there are multiple impacts
|
||||
}
|
||||
}
|
||||
|
||||
if (hasImpacts == false) {
|
||||
return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
|
||||
} else if (onlyImpactList != null) {
|
||||
return onlyImpactList;
|
||||
}
|
||||
|
||||
// Idea: merge impacts by freq. The tricky thing is that we need to
|
||||
// consider freq values that are not in the impacts too. For
|
||||
// instance if the list of impacts is [{freq=2,norm=10}, {freq=4,norm=12}],
|
||||
// there might well be a document that has a freq of 2 and a length of 11,
|
||||
// which was just not added to the list of impacts because {freq=2,norm=10}
|
||||
// is more competitive.
|
||||
// We walk impacts in parallel through a PQ ordered by freq. At any time,
|
||||
// the competitive impact consists of the lowest freq among all entries of
|
||||
// the PQ (the top) and the highest norm (tracked separately).
|
||||
List<Impact> mergedImpacts = new ArrayList<>();
|
||||
SubIterator top = pq.top();
|
||||
int currentFreq = top.current.freq;
|
||||
long currentNorm = 0;
|
||||
for (SubIterator it : pq) {
|
||||
if (Long.compareUnsigned(it.current.norm, currentNorm) > 0) {
|
||||
currentNorm = it.current.norm;
|
||||
}
|
||||
}
|
||||
|
||||
outer: while (true) {
|
||||
if (mergedImpacts.size() > 0 && mergedImpacts.get(mergedImpacts.size() - 1).norm == currentNorm) {
|
||||
mergedImpacts.get(mergedImpacts.size() - 1).freq = currentFreq;
|
||||
} else {
|
||||
mergedImpacts.add(new Impact(currentFreq, currentNorm));
|
||||
}
|
||||
|
||||
do {
|
||||
if (top.next() == false) {
|
||||
// At least one clause doesn't have any more documents below the current norm,
|
||||
// so we can safely ignore further clauses. The only reason why they have more
|
||||
// impacts is because they cover more documents that we are not interested in.
|
||||
break outer;
|
||||
}
|
||||
if (Long.compareUnsigned(top.current.norm, currentNorm) > 0) {
|
||||
currentNorm = top.current.norm;
|
||||
}
|
||||
top = pq.updateTop();
|
||||
} while (top.current.freq == currentFreq);
|
||||
|
||||
currentFreq = top.current.freq;
|
||||
}
|
||||
|
||||
return mergedImpacts;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void advanceShallow(int target) throws IOException {
|
||||
for (ImpactsEnum impactsEnum : impactsEnums) {
|
||||
impactsEnum.advanceShallow(target);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -31,12 +31,14 @@ import org.apache.lucene.index.IndexReaderContext;
|
|||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
@ -250,7 +252,7 @@ public class MultiPhraseQuery extends Query {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected PhraseMatcher getPhraseMatcher(LeafReaderContext context, boolean exposeOffsets) throws IOException {
|
||||
protected PhraseMatcher getPhraseMatcher(LeafReaderContext context, SimScorer scorer, boolean exposeOffsets) throws IOException {
|
||||
assert termArrays.length != 0;
|
||||
final LeafReader reader = context.reader();
|
||||
|
||||
|
@ -295,16 +297,16 @@ public class MultiPhraseQuery extends Query {
|
|||
postingsEnum = exposeOffsets ? new UnionFullPostingsEnum(postings) : new UnionPostingsEnum(postings);
|
||||
}
|
||||
|
||||
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, positions[pos], terms);
|
||||
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, new SlowImpactsEnum(postingsEnum), positions[pos], terms);
|
||||
}
|
||||
|
||||
// sort by increasing docFreq order
|
||||
if (slop == 0) {
|
||||
ArrayUtil.timSort(postingsFreqs);
|
||||
return new ExactPhraseMatcher(postingsFreqs, totalMatchCost);
|
||||
return new ExactPhraseMatcher(postingsFreqs, scoreMode, scorer, totalMatchCost);
|
||||
}
|
||||
else {
|
||||
return new SloppyPhraseMatcher(postingsFreqs, slop, totalMatchCost, exposeOffsets);
|
||||
return new SloppyPhraseMatcher(postingsFreqs, slop, scoreMode, scorer, totalMatchCost, exposeOffsets);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,15 +28,22 @@ import java.io.IOException;
|
|||
*/
|
||||
abstract class PhraseMatcher {
|
||||
|
||||
protected final DocIdSetIterator approximation;
|
||||
private final float matchCost;
|
||||
|
||||
PhraseMatcher(DocIdSetIterator approximation, float matchCost) {
|
||||
assert TwoPhaseIterator.unwrap(approximation) == null;
|
||||
this.approximation = approximation;
|
||||
PhraseMatcher(float matchCost) {
|
||||
this.matchCost = matchCost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Approximation that only matches documents that have all terms.
|
||||
*/
|
||||
abstract DocIdSetIterator approximation();
|
||||
|
||||
/**
|
||||
* Approximation that is aware of impacts.
|
||||
*/
|
||||
abstract ImpactsDISI impactsApproximation();
|
||||
|
||||
/**
|
||||
* An upper bound on the number of possible matches on this document
|
||||
*/
|
||||
|
|
|
@ -24,17 +24,20 @@ import java.util.List;
|
|||
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -296,12 +299,14 @@ public class PhraseQuery extends Query {
|
|||
|
||||
static class PostingsAndFreq implements Comparable<PostingsAndFreq> {
|
||||
final PostingsEnum postings;
|
||||
final ImpactsEnum impacts;
|
||||
final int position;
|
||||
final Term[] terms;
|
||||
final int nTerms; // for faster comparisons
|
||||
|
||||
public PostingsAndFreq(PostingsEnum postings, int position, Term... terms) {
|
||||
public PostingsAndFreq(PostingsEnum postings, ImpactsEnum impacts, int position, Term... terms) {
|
||||
this.postings = postings;
|
||||
this.impacts = impacts;
|
||||
this.position = position;
|
||||
nTerms = terms==null ? 0 : terms.length;
|
||||
if (nTerms>0) {
|
||||
|
@ -362,7 +367,7 @@ public class PhraseQuery extends Query {
|
|||
/** A guess of
|
||||
* the average number of simple operations for the initial seek and buffer refill
|
||||
* per document for the positions of a term.
|
||||
* See also {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}.
|
||||
* See also {@link Lucene50PostingsReader.BlockImpactsPostingsEnum#nextPosition()}.
|
||||
* <p>
|
||||
* Aside: Instead of being constant this could depend among others on
|
||||
* {@link Lucene50PostingsFormat#BLOCK_SIZE},
|
||||
|
@ -374,7 +379,7 @@ public class PhraseQuery extends Query {
|
|||
*/
|
||||
private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
|
||||
|
||||
/** Number of simple operations in {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}
|
||||
/** Number of simple operations in {@link Lucene50PostingsReader.BlockImpactsPostingsEnum#nextPosition()}
|
||||
* when no seek or buffer refill is done.
|
||||
*/
|
||||
private static final int TERM_OPS_PER_POS = 7;
|
||||
|
@ -430,7 +435,7 @@ public class PhraseQuery extends Query {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected PhraseMatcher getPhraseMatcher(LeafReaderContext context, boolean exposeOffsets) throws IOException {
|
||||
protected PhraseMatcher getPhraseMatcher(LeafReaderContext context, SimScorer scorer, boolean exposeOffsets) throws IOException {
|
||||
assert terms.length > 0;
|
||||
final LeafReader reader = context.reader();
|
||||
PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.length];
|
||||
|
@ -456,18 +461,25 @@ public class PhraseQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
te.seekExact(t.bytes(), state);
|
||||
PostingsEnum postingsEnum = te.postings(null, exposeOffsets ? PostingsEnum.ALL : PostingsEnum.POSITIONS);
|
||||
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t);
|
||||
PostingsEnum postingsEnum;
|
||||
ImpactsEnum impactsEnum;
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
postingsEnum = impactsEnum = te.impacts(exposeOffsets ? PostingsEnum.OFFSETS : PostingsEnum.POSITIONS);
|
||||
} else {
|
||||
postingsEnum = te.postings(null, exposeOffsets ? PostingsEnum.OFFSETS : PostingsEnum.POSITIONS);
|
||||
impactsEnum = new SlowImpactsEnum(postingsEnum);
|
||||
}
|
||||
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, impactsEnum, positions[i], t);
|
||||
totalMatchCost += termPositionsCost(te);
|
||||
}
|
||||
|
||||
// sort by increasing docFreq order
|
||||
if (slop == 0) {
|
||||
ArrayUtil.timSort(postingsFreqs);
|
||||
return new ExactPhraseMatcher(postingsFreqs, totalMatchCost);
|
||||
return new ExactPhraseMatcher(postingsFreqs, scoreMode, scorer, totalMatchCost);
|
||||
}
|
||||
else {
|
||||
return new SloppyPhraseMatcher(postingsFreqs, slop, totalMatchCost, exposeOffsets);
|
||||
return new SloppyPhraseMatcher(postingsFreqs, slop, scoreMode, scorer, totalMatchCost, exposeOffsets);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -21,6 +21,8 @@ import java.io.IOException;
|
|||
|
||||
class PhraseScorer extends Scorer {
|
||||
|
||||
final DocIdSetIterator approximation;
|
||||
final ImpactsDISI impactsApproximation;
|
||||
final PhraseMatcher matcher;
|
||||
final ScoreMode scoreMode;
|
||||
private final LeafSimScorer simScorer;
|
||||
|
@ -35,11 +37,13 @@ class PhraseScorer extends Scorer {
|
|||
this.scoreMode = scoreMode;
|
||||
this.simScorer = simScorer;
|
||||
this.matchCost = matcher.getMatchCost();
|
||||
this.approximation = matcher.approximation();
|
||||
this.impactsApproximation = matcher.impactsApproximation();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator twoPhaseIterator() {
|
||||
return new TwoPhaseIterator(matcher.approximation) {
|
||||
return new TwoPhaseIterator(approximation) {
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
matcher.reset();
|
||||
|
@ -63,7 +67,7 @@ class PhraseScorer extends Scorer {
|
|||
|
||||
@Override
|
||||
public int docID() {
|
||||
return matcher.approximation.docID();
|
||||
return approximation.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -85,12 +89,17 @@ class PhraseScorer extends Scorer {
|
|||
@Override
|
||||
public void setMinCompetitiveScore(float minScore) {
|
||||
this.minCompetitiveScore = minScore;
|
||||
impactsApproximation.setMinCompetitiveScore(minScore);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
return impactsApproximation.advanceShallow(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
// TODO: merge impacts of all clauses to get better score upper bounds
|
||||
return simScorer.getSimScorer().score(Integer.MAX_VALUE, 1L);
|
||||
return impactsApproximation.getMaxScore(upTo);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -98,5 +107,4 @@ class PhraseScorer extends Scorer {
|
|||
return "PhraseScorer(" + weight + ")";
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -49,11 +49,11 @@ abstract class PhraseWeight extends Weight {
|
|||
|
||||
protected abstract Similarity.SimScorer getStats(IndexSearcher searcher) throws IOException;
|
||||
|
||||
protected abstract PhraseMatcher getPhraseMatcher(LeafReaderContext context, boolean exposeOffsets) throws IOException;
|
||||
protected abstract PhraseMatcher getPhraseMatcher(LeafReaderContext context, SimScorer scorer, boolean exposeOffsets) throws IOException;
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
PhraseMatcher matcher = getPhraseMatcher(context, false);
|
||||
PhraseMatcher matcher = getPhraseMatcher(context, stats, false);
|
||||
if (matcher == null)
|
||||
return null;
|
||||
LeafSimScorer simScorer = new LeafSimScorer(stats, context.reader(), field, scoreMode.needsScores());
|
||||
|
@ -62,8 +62,8 @@ abstract class PhraseWeight extends Weight {
|
|||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
PhraseMatcher matcher = getPhraseMatcher(context, false);
|
||||
if (matcher == null || matcher.approximation.advance(doc) != doc) {
|
||||
PhraseMatcher matcher = getPhraseMatcher(context, stats, false);
|
||||
if (matcher == null || matcher.approximation().advance(doc) != doc) {
|
||||
return Explanation.noMatch("no matching terms");
|
||||
}
|
||||
matcher.reset();
|
||||
|
@ -86,8 +86,8 @@ abstract class PhraseWeight extends Weight {
|
|||
@Override
|
||||
public Matches matches(LeafReaderContext context, int doc) throws IOException {
|
||||
return MatchesUtils.forField(field, () -> {
|
||||
PhraseMatcher matcher = getPhraseMatcher(context, true);
|
||||
if (matcher == null || matcher.approximation.advance(doc) != doc) {
|
||||
PhraseMatcher matcher = getPhraseMatcher(context, stats, true);
|
||||
if (matcher == null || matcher.approximation().advance(doc) != doc) {
|
||||
return null;
|
||||
}
|
||||
matcher.reset();
|
||||
|
|
|
@ -20,13 +20,19 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.index.ImpactsSource;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
|
@ -56,6 +62,9 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
|
|||
private final PhraseQueue pq; // for advancing min position
|
||||
private final boolean captureLeadMatch;
|
||||
|
||||
private final DocIdSetIterator approximation;
|
||||
private final ImpactsDISI impactsApproximation;
|
||||
|
||||
private int end; // current largest phrase position
|
||||
|
||||
private int leadPosition;
|
||||
|
@ -72,8 +81,8 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
|
|||
private boolean positioned;
|
||||
private int matchLength;
|
||||
|
||||
SloppyPhraseMatcher(PhraseQuery.PostingsAndFreq[] postings, int slop, float matchCost, boolean captureLeadMatch) {
|
||||
super(approximation(postings), matchCost);
|
||||
SloppyPhraseMatcher(PhraseQuery.PostingsAndFreq[] postings, int slop, ScoreMode scoreMode, SimScorer scorer, float matchCost, boolean captureLeadMatch) {
|
||||
super(matchCost);
|
||||
this.slop = slop;
|
||||
this.numPostings = postings.length;
|
||||
this.captureLeadMatch = captureLeadMatch;
|
||||
|
@ -82,14 +91,49 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
|
|||
for (int i = 0; i < postings.length; ++i) {
|
||||
phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
|
||||
}
|
||||
|
||||
approximation = ConjunctionDISI.intersectIterators(Arrays.stream(postings).map(p -> p.postings).collect(Collectors.toList()));
|
||||
// What would be a good upper bound of the sloppy frequency? A sum of the
|
||||
// sub frequencies would be correct, but it is usually so much higher than
|
||||
// the actual sloppy frequency that it doesn't help skip irrelevant
|
||||
// documents. As a consequence for now, sloppy phrase queries use dummy
|
||||
// impacts:
|
||||
final ImpactsSource impactsSource = new ImpactsSource() {
|
||||
@Override
|
||||
public Impacts getImpacts() throws IOException {
|
||||
return new Impacts() {
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Impact> getImpacts(int level) {
|
||||
return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocIdUpTo(int level) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void advanceShallow(int target) throws IOException {}
|
||||
};
|
||||
impactsApproximation = new ImpactsDISI(approximation, impactsSource, scorer);
|
||||
}
|
||||
|
||||
private static DocIdSetIterator approximation(PhraseQuery.PostingsAndFreq[] postings) {
|
||||
List<DocIdSetIterator> iterators = new ArrayList<>();
|
||||
for (PhraseQuery.PostingsAndFreq posting : postings) {
|
||||
iterators.add(posting.postings);
|
||||
}
|
||||
return ConjunctionDISI.intersectIterators(iterators);
|
||||
@Override
|
||||
DocIdSetIterator approximation() {
|
||||
return approximation;
|
||||
}
|
||||
|
||||
@Override
|
||||
ImpactsDISI impactsApproximation() {
|
||||
return impactsApproximation;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,6 +23,8 @@ import java.util.Arrays;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
|
@ -34,18 +36,24 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.ImpactsSource;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.AfterClass;
|
||||
|
@ -761,4 +769,306 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMergeImpacts() throws IOException {
|
||||
DummyImpactsEnum impacts1 = new DummyImpactsEnum(1000);
|
||||
DummyImpactsEnum impacts2 = new DummyImpactsEnum(2000);
|
||||
ImpactsSource mergedImpacts = ExactPhraseMatcher.mergeImpacts(new ImpactsEnum[] { impacts1, impacts2 });
|
||||
|
||||
impacts1.reset(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) },
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 11), new Impact(8, 13), new Impact(12, 14) }
|
||||
},
|
||||
new int[] {
|
||||
110,
|
||||
945
|
||||
});
|
||||
|
||||
// Merge with empty impacts
|
||||
impacts2.reset(
|
||||
new Impact[0][],
|
||||
new int[0]);
|
||||
assertEquals(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) },
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 11), new Impact(8, 13), new Impact(12, 14) }
|
||||
},
|
||||
new int[] {
|
||||
110,
|
||||
945
|
||||
},
|
||||
mergedImpacts.getImpacts());
|
||||
|
||||
// Merge with dummy impacts
|
||||
impacts2.reset(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(Integer.MAX_VALUE, 1) }
|
||||
},
|
||||
new int[] {
|
||||
5000
|
||||
});
|
||||
assertEquals(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) },
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 11), new Impact(8, 13), new Impact(12, 14) }
|
||||
},
|
||||
new int[] {
|
||||
110,
|
||||
945
|
||||
},
|
||||
mergedImpacts.getImpacts());
|
||||
|
||||
// Merge with dummy impacts that we don't special case
|
||||
impacts2.reset(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(Integer.MAX_VALUE, 2) }
|
||||
},
|
||||
new int[] {
|
||||
5000
|
||||
});
|
||||
assertEquals(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) },
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 11), new Impact(8, 13), new Impact(12, 14) }
|
||||
},
|
||||
new int[] {
|
||||
110,
|
||||
945
|
||||
},
|
||||
mergedImpacts.getImpacts());
|
||||
|
||||
// First level of impacts2 doesn't cover the first level of impacts1
|
||||
impacts2.reset(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(2, 10), new Impact(6, 13) },
|
||||
new Impact[] { new Impact(3, 9), new Impact(5, 11), new Impact(7, 13) }
|
||||
},
|
||||
new int[] {
|
||||
90,
|
||||
1000
|
||||
});
|
||||
assertEquals(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(7, 13) },
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 11), new Impact(7, 13) }
|
||||
},
|
||||
new int[] {
|
||||
110,
|
||||
945
|
||||
},
|
||||
mergedImpacts.getImpacts());
|
||||
|
||||
// Second level of impacts2 doesn't cover the first level of impacts1
|
||||
impacts2.reset(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(2, 10), new Impact(6, 11) },
|
||||
new Impact[] { new Impact(3, 9), new Impact(5, 11), new Impact(7, 13) }
|
||||
},
|
||||
new int[] {
|
||||
150,
|
||||
900
|
||||
});
|
||||
assertEquals(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(2, 10), new Impact(3, 11), new Impact(5, 12), new Impact(6, 13) },
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 11), new Impact(8, 13), new Impact(12, 14) } // same as impacts1
|
||||
},
|
||||
new int[] {
|
||||
110,
|
||||
945
|
||||
},
|
||||
mergedImpacts.getImpacts());
|
||||
|
||||
impacts2.reset(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(4, 10), new Impact(9, 13) },
|
||||
new Impact[] { new Impact(1, 1), new Impact(4, 10), new Impact(5, 11), new Impact(8, 13), new Impact(12, 14), new Impact(13, 15) }
|
||||
},
|
||||
new int[] {
|
||||
113,
|
||||
950
|
||||
});
|
||||
assertEquals(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(3, 10), new Impact(4, 12), new Impact(8, 13) },
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, 11), new Impact(8, 13), new Impact(12, 14) }
|
||||
},
|
||||
new int[] {
|
||||
110,
|
||||
945
|
||||
},
|
||||
mergedImpacts.getImpacts());
|
||||
|
||||
// Make sure negative norms are treated as unsigned
|
||||
impacts1.reset(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, -10), new Impact(8, -5) },
|
||||
new Impact[] { new Impact(3, 10), new Impact(5, -15), new Impact(8, -5), new Impact(12, -3) }
|
||||
},
|
||||
new int[] {
|
||||
110,
|
||||
945
|
||||
});
|
||||
impacts2.reset(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(2, 10), new Impact(12, -4) },
|
||||
new Impact[] { new Impact(3, 9), new Impact(12, -4), new Impact(20, -1) }
|
||||
},
|
||||
new int[] {
|
||||
150,
|
||||
960
|
||||
});
|
||||
assertEquals(
|
||||
new Impact[][] {
|
||||
new Impact[] { new Impact(2, 10), new Impact(8, -4) },
|
||||
new Impact[] { new Impact(3, 10), new Impact(8, -4), new Impact(12, -3) }
|
||||
},
|
||||
new int[] {
|
||||
110,
|
||||
945
|
||||
},
|
||||
mergedImpacts.getImpacts());
|
||||
}
|
||||
|
||||
private static void assertEquals(Impact[][] impacts, int[] docIdUpTo, Impacts actual) {
|
||||
assertEquals(impacts.length, actual.numLevels());
|
||||
for (int i = 0; i < impacts.length; ++i) {
|
||||
assertEquals(docIdUpTo[i], actual.getDocIdUpTo(i));
|
||||
assertEquals(Arrays.asList(impacts[i]), actual.getImpacts(i));
|
||||
}
|
||||
}
|
||||
|
||||
private static class DummyImpactsEnum extends ImpactsEnum {
|
||||
|
||||
private final long cost;
|
||||
private Impact[][] impacts;
|
||||
private int[] docIdUpTo;
|
||||
|
||||
DummyImpactsEnum(long cost) {
|
||||
this.cost = cost;
|
||||
}
|
||||
|
||||
void reset(Impact[][] impacts, int[] docIdUpTo) {
|
||||
this.impacts = impacts;
|
||||
this.docIdUpTo = docIdUpTo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void advanceShallow(int target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Impacts getImpacts() throws IOException {
|
||||
return new Impacts() {
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
return impacts.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocIdUpTo(int level) {
|
||||
return docIdUpTo[level];
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Impact> getImpacts(int level) {
|
||||
return Arrays.asList(impacts[level]);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return cost;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void testRandomTopDocs() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||
int numDocs = atLeast(128 * 8 * 8 * 3); // make sure some terms have skip data
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
int numTerms = random().nextInt(1 << random().nextInt(5));
|
||||
String text = IntStream.range(0, numTerms)
|
||||
.mapToObj(index -> random().nextBoolean() ? "a" : random().nextBoolean() ? "b" : "c")
|
||||
.collect(Collectors.joining(" "));
|
||||
doc.add(new TextField("foo", text, Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
IndexReader reader = DirectoryReader.open(w);
|
||||
w.close();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
for (String firstTerm : new String[] {"a", "b", "c"}) {
|
||||
for (String secondTerm : new String[] {"a", "b", "c"}) {
|
||||
Query query = new PhraseQuery("foo", new BytesRef(firstTerm), new BytesRef(secondTerm));
|
||||
|
||||
TopScoreDocCollector collector1 = TopScoreDocCollector.create(10, null, Integer.MAX_VALUE); // COMPLETE
|
||||
TopScoreDocCollector collector2 = TopScoreDocCollector.create(10, null, 10); // TOP_SCORES
|
||||
|
||||
searcher.search(query, collector1);
|
||||
searcher.search(query, collector2);
|
||||
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||
|
||||
Query filteredQuery = new BooleanQuery.Builder()
|
||||
.add(query, Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "b")), Occur.FILTER)
|
||||
.build();
|
||||
|
||||
collector1 = TopScoreDocCollector.create(10, null, Integer.MAX_VALUE); // COMPLETE
|
||||
collector2 = TopScoreDocCollector.create(10, null, 10); // TOP_SCORES
|
||||
searcher.search(filteredQuery, collector1);
|
||||
searcher.search(filteredQuery, collector2);
|
||||
CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -241,7 +241,7 @@ class TermIntervalsSource extends IntervalsSource {
|
|||
/** A guess of
|
||||
* the average number of simple operations for the initial seek and buffer refill
|
||||
* per document for the positions of a term.
|
||||
* See also {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}.
|
||||
* See also {@link Lucene50PostingsReader.EverythingEnum#nextPosition()}.
|
||||
* <p>
|
||||
* Aside: Instead of being constant this could depend among others on
|
||||
* {@link Lucene50PostingsFormat#BLOCK_SIZE},
|
||||
|
@ -253,7 +253,7 @@ class TermIntervalsSource extends IntervalsSource {
|
|||
*/
|
||||
private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
|
||||
|
||||
/** Number of simple operations in {@link Lucene50PostingsReader.BlockPostingsEnum#nextPosition()}
|
||||
/** Number of simple operations in {@link Lucene50PostingsReader.EverythingEnum#nextPosition()}
|
||||
* when no seek or buffer refill is done.
|
||||
*/
|
||||
private static final int TERM_OPS_PER_POS = 7;
|
||||
|
|
Loading…
Reference in New Issue