mirror of https://github.com/apache/lucene.git
Merge branch 'main' into speed_up_filtered_maxscore
This commit is contained in:
commit
9b314aa155
|
@ -19,7 +19,7 @@ Improvements
|
|||
|
||||
Optimizations
|
||||
---------------------
|
||||
(No changes)
|
||||
* GITHUB#14011: Reduce allocation rate in HNSW concurrent merge. (Viliam Durina)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
@ -41,6 +41,9 @@ API Changes
|
|||
* GITHUB#13957: Removed LeafSimScorer class, to save its overhead. Scorers now
|
||||
compute scores directly from a SimScorer, postings and norms. (Adrien Grand)
|
||||
|
||||
* GITHUB#13998: Add IndexInput::isLoaded to determine if the contents of an
|
||||
input is resident in physical memory. (Chris Hegarty)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
(No changes)
|
||||
|
@ -113,6 +116,15 @@ Optimizations
|
|||
* GITHUB#14014: Filtered disjunctions now get executed via `MaxScoreBulkScorer`.
|
||||
(Adrien Grand)
|
||||
|
||||
* GITHUB#14023: Make JVM inlining decisions more predictable in our main
|
||||
queries. (Adrien Grand)
|
||||
|
||||
* GITHUB#14032: Speed up PostingsEnum when positions are requested.
|
||||
(Adrien Grand)
|
||||
|
||||
* GITHUB#14031: Ensure Panama float vector distance impls inlinable.
|
||||
(Robert Muir, Chris Hegarty)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
||||
|
@ -143,6 +155,8 @@ Bug Fixes
|
|||
* GITHUB#14008: Counts provided by taxonomy facets in addition to another aggregation are now returned together with
|
||||
their corresponding ordinals. (Paul King)
|
||||
|
||||
* GITHUB#14027: Make SegmentInfos#readCommit(Directory, String, int) public (Luca Cavanna)
|
||||
|
||||
======================= Lucene 10.0.0 =======================
|
||||
|
||||
API Changes
|
||||
|
|
|
@ -638,9 +638,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
final boolean indexHasPayloads;
|
||||
final boolean indexHasOffsetsOrPayloads;
|
||||
|
||||
private int freq; // freq we last read
|
||||
private long freqFP; // offset of the freq block
|
||||
|
||||
private int position; // current position
|
||||
|
||||
// value of docBufferUpto on the last doc ID when positions have been read
|
||||
private int posDocBufferUpto;
|
||||
|
||||
// how many positions "behind" we are; nextPosition must
|
||||
// skip these to "catch up":
|
||||
private int posPendingCount;
|
||||
|
@ -662,6 +666,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
|
||||
private boolean needsOffsets; // true if we actually need offsets
|
||||
private boolean needsPayloads; // true if we actually need payloads
|
||||
private boolean needsPayloadsOrOffsets;
|
||||
|
||||
public EverythingEnum(FieldInfo fieldInfo) throws IOException {
|
||||
super(fieldInfo);
|
||||
|
@ -745,8 +750,11 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
|
||||
}
|
||||
|
||||
this.needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
|
||||
this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
|
||||
this.needsOffsets =
|
||||
indexHasOffsets && PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
|
||||
this.needsPayloads =
|
||||
indexHasPayloads && PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
|
||||
this.needsPayloadsOrOffsets = this.needsPayloads || this.needsOffsets;
|
||||
|
||||
level1BlockPosUpto = 0;
|
||||
level1BlockPayUpto = 0;
|
||||
|
@ -758,8 +766,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
return freq;
|
||||
public int freq() throws IOException {
|
||||
if (freqFP != -1) {
|
||||
docIn.seek(freqFP);
|
||||
pforUtil.decode(docInUtil, freqBuffer);
|
||||
freqFP = -1;
|
||||
}
|
||||
return freqBuffer[docBufferUpto - 1];
|
||||
}
|
||||
|
||||
private void refillDocs() throws IOException {
|
||||
|
@ -768,11 +781,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
|
||||
if (left >= BLOCK_SIZE) {
|
||||
forDeltaUtil.decodeAndPrefixSum(docInUtil, prevDocID, docBuffer);
|
||||
pforUtil.decode(docInUtil, freqBuffer);
|
||||
freqFP = docIn.getFilePointer();
|
||||
PForUtil.skip(docIn);
|
||||
docCountUpto += BLOCK_SIZE;
|
||||
} else if (docFreq == 1) {
|
||||
docBuffer[0] = singletonDocID;
|
||||
freqBuffer[0] = (int) totalTermFreq;
|
||||
freqFP = -1;
|
||||
docBuffer[1] = NO_MORE_DOCS;
|
||||
docCountUpto++;
|
||||
docBufferSize = 1;
|
||||
|
@ -781,11 +796,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
|
||||
prefixSum(docBuffer, left, prevDocID);
|
||||
docBuffer[left] = NO_MORE_DOCS;
|
||||
freqFP = -1;
|
||||
docCountUpto += left;
|
||||
docBufferSize = left;
|
||||
}
|
||||
prevDocID = docBuffer[BLOCK_SIZE - 1];
|
||||
docBufferUpto = 0;
|
||||
posDocBufferUpto = 0;
|
||||
assert docBuffer[docBufferSize] == NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
|
@ -846,6 +863,8 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
payloadByteUpto = level0BlockPayUpto;
|
||||
}
|
||||
posBufferUpto = BLOCK_SIZE;
|
||||
} else {
|
||||
posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (docFreq - docCountUpto >= BLOCK_SIZE) {
|
||||
|
@ -875,34 +894,23 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
this.doc = docBuffer[docBufferUpto];
|
||||
this.freq = freqBuffer[docBufferUpto];
|
||||
docBufferUpto++;
|
||||
posPendingCount += freq;
|
||||
position = 0;
|
||||
lastStartOffset = 0;
|
||||
return doc;
|
||||
}
|
||||
|
||||
private void skipLevel0To(int target) throws IOException {
|
||||
long posFP;
|
||||
int posUpto;
|
||||
long payFP;
|
||||
int payUpto;
|
||||
|
||||
while (true) {
|
||||
prevDocID = level0LastDocID;
|
||||
|
||||
// If nextBlockPosFP is less than the current FP, it means that the block of positions for
|
||||
// the first docs of the next block are already decoded. In this case we just accumulate
|
||||
// frequencies into posPendingCount instead of seeking backwards and decoding the same pos
|
||||
// block again.
|
||||
if (level0PosEndFP >= posIn.getFilePointer()) {
|
||||
posIn.seek(level0PosEndFP);
|
||||
posPendingCount = level0BlockPosUpto;
|
||||
if (indexHasOffsetsOrPayloads) {
|
||||
assert level0PayEndFP >= payIn.getFilePointer();
|
||||
payIn.seek(level0PayEndFP);
|
||||
payloadByteUpto = level0BlockPayUpto;
|
||||
}
|
||||
posBufferUpto = BLOCK_SIZE;
|
||||
} else {
|
||||
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, BLOCK_SIZE);
|
||||
}
|
||||
posFP = level0PosEndFP;
|
||||
posUpto = level0BlockPosUpto;
|
||||
payFP = level0PayEndFP;
|
||||
payUpto = level0BlockPayUpto;
|
||||
|
||||
if (docFreq - docCountUpto >= BLOCK_SIZE) {
|
||||
docIn.readVLong(); // skip0 num bytes
|
||||
|
@ -931,6 +939,23 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If nextBlockPosFP is less than the current FP, it means that the block of positions for
|
||||
// the first docs of the next block are already decoded. In this case we just accumulate
|
||||
// frequencies into posPendingCount instead of seeking backwards and decoding the same pos
|
||||
// block again.
|
||||
if (posFP >= posIn.getFilePointer()) {
|
||||
posIn.seek(posFP);
|
||||
posPendingCount = posUpto;
|
||||
if (indexHasOffsetsOrPayloads) {
|
||||
assert level0PayEndFP >= payIn.getFilePointer();
|
||||
payIn.seek(payFP);
|
||||
payloadByteUpto = payUpto;
|
||||
}
|
||||
posBufferUpto = BLOCK_SIZE;
|
||||
} else {
|
||||
posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, BLOCK_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -947,16 +972,12 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
|
||||
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
|
||||
this.freq = freqBuffer[next];
|
||||
this.docBufferUpto = next + 1;
|
||||
position = 0;
|
||||
lastStartOffset = 0;
|
||||
|
||||
return this.doc = docBuffer[next];
|
||||
}
|
||||
|
||||
private void skipPositions() throws IOException {
|
||||
private void skipPositions(int freq) throws IOException {
|
||||
// Skip positions now:
|
||||
int toSkip = posPendingCount - freq;
|
||||
// if (DEBUG) {
|
||||
|
@ -1003,41 +1024,45 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
lastStartOffset = 0;
|
||||
}
|
||||
|
||||
private void refillLastPositionBlock() throws IOException {
|
||||
final int count = (int) (totalTermFreq % BLOCK_SIZE);
|
||||
int payloadLength = 0;
|
||||
int offsetLength = 0;
|
||||
payloadByteUpto = 0;
|
||||
for (int i = 0; i < count; i++) {
|
||||
int code = posIn.readVInt();
|
||||
if (indexHasPayloads) {
|
||||
if ((code & 1) != 0) {
|
||||
payloadLength = posIn.readVInt();
|
||||
}
|
||||
payloadLengthBuffer[i] = payloadLength;
|
||||
posDeltaBuffer[i] = code >>> 1;
|
||||
if (payloadLength != 0) {
|
||||
if (payloadByteUpto + payloadLength > payloadBytes.length) {
|
||||
payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
|
||||
}
|
||||
posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
|
||||
payloadByteUpto += payloadLength;
|
||||
}
|
||||
} else {
|
||||
posDeltaBuffer[i] = code;
|
||||
}
|
||||
|
||||
if (indexHasOffsets) {
|
||||
int deltaCode = posIn.readVInt();
|
||||
if ((deltaCode & 1) != 0) {
|
||||
offsetLength = posIn.readVInt();
|
||||
}
|
||||
offsetStartDeltaBuffer[i] = deltaCode >>> 1;
|
||||
offsetLengthBuffer[i] = offsetLength;
|
||||
}
|
||||
}
|
||||
payloadByteUpto = 0;
|
||||
}
|
||||
|
||||
private void refillPositions() throws IOException {
|
||||
if (posIn.getFilePointer() == lastPosBlockFP) {
|
||||
final int count = (int) (totalTermFreq % BLOCK_SIZE);
|
||||
int payloadLength = 0;
|
||||
int offsetLength = 0;
|
||||
payloadByteUpto = 0;
|
||||
for (int i = 0; i < count; i++) {
|
||||
int code = posIn.readVInt();
|
||||
if (indexHasPayloads) {
|
||||
if ((code & 1) != 0) {
|
||||
payloadLength = posIn.readVInt();
|
||||
}
|
||||
payloadLengthBuffer[i] = payloadLength;
|
||||
posDeltaBuffer[i] = code >>> 1;
|
||||
if (payloadLength != 0) {
|
||||
if (payloadByteUpto + payloadLength > payloadBytes.length) {
|
||||
payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
|
||||
}
|
||||
posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
|
||||
payloadByteUpto += payloadLength;
|
||||
}
|
||||
} else {
|
||||
posDeltaBuffer[i] = code;
|
||||
}
|
||||
|
||||
if (indexHasOffsets) {
|
||||
int deltaCode = posIn.readVInt();
|
||||
if ((deltaCode & 1) != 0) {
|
||||
offsetLength = posIn.readVInt();
|
||||
}
|
||||
offsetStartDeltaBuffer[i] = deltaCode >>> 1;
|
||||
offsetLengthBuffer[i] = offsetLength;
|
||||
}
|
||||
}
|
||||
payloadByteUpto = 0;
|
||||
refillLastPositionBlock();
|
||||
} else {
|
||||
pforUtil.decode(posInUtil, posDeltaBuffer);
|
||||
|
||||
|
@ -1054,8 +1079,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
// this works, because when writing a vint block we always force the first length to be
|
||||
// written
|
||||
PForUtil.skip(payIn); // skip over lengths
|
||||
int numBytes = payIn.readVInt(); // read length of payloadBytes
|
||||
payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
|
||||
payIn.skipBytes(payIn.readVInt()); // skip over payloadBytes
|
||||
}
|
||||
payloadByteUpto = 0;
|
||||
}
|
||||
|
@ -1074,13 +1098,40 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
}
|
||||
|
||||
private void accumulatePayloadAndOffsets() {
|
||||
if (needsPayloads) {
|
||||
payloadLength = payloadLengthBuffer[posBufferUpto];
|
||||
payload.bytes = payloadBytes;
|
||||
payload.offset = payloadByteUpto;
|
||||
payload.length = payloadLength;
|
||||
payloadByteUpto += payloadLength;
|
||||
}
|
||||
|
||||
if (needsOffsets) {
|
||||
startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto];
|
||||
endOffset = startOffset + offsetLengthBuffer[posBufferUpto];
|
||||
lastStartOffset = startOffset;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
assert posPendingCount > 0;
|
||||
if (posDocBufferUpto != docBufferUpto) {
|
||||
int freq = freq(); // triggers lazy decoding of freqs
|
||||
|
||||
if (posPendingCount > freq) {
|
||||
skipPositions();
|
||||
posPendingCount = freq;
|
||||
// First position that is being read on this doc.
|
||||
posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, docBufferUpto);
|
||||
posDocBufferUpto = docBufferUpto;
|
||||
|
||||
assert posPendingCount > 0;
|
||||
|
||||
if (posPendingCount > freq) {
|
||||
skipPositions(freq);
|
||||
posPendingCount = freq;
|
||||
}
|
||||
|
||||
position = 0;
|
||||
lastStartOffset = 0;
|
||||
}
|
||||
|
||||
if (posBufferUpto == BLOCK_SIZE) {
|
||||
|
@ -1089,18 +1140,8 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
position += posDeltaBuffer[posBufferUpto];
|
||||
|
||||
if (indexHasPayloads) {
|
||||
payloadLength = payloadLengthBuffer[posBufferUpto];
|
||||
payload.bytes = payloadBytes;
|
||||
payload.offset = payloadByteUpto;
|
||||
payload.length = payloadLength;
|
||||
payloadByteUpto += payloadLength;
|
||||
}
|
||||
|
||||
if (indexHasOffsets) {
|
||||
startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto];
|
||||
endOffset = startOffset + offsetLengthBuffer[posBufferUpto];
|
||||
lastStartOffset = startOffset;
|
||||
if (needsPayloadsOrOffsets) {
|
||||
accumulatePayloadAndOffsets();
|
||||
}
|
||||
|
||||
posBufferUpto++;
|
||||
|
@ -1110,17 +1151,23 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int startOffset() {
|
||||
if (needsOffsets == false) {
|
||||
return -1;
|
||||
}
|
||||
return startOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() {
|
||||
if (needsOffsets == false) {
|
||||
return -1;
|
||||
}
|
||||
return endOffset;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() {
|
||||
if (payloadLength == 0) {
|
||||
if (needsPayloads == false || payloadLength == 0) {
|
||||
return null;
|
||||
} else {
|
||||
return payload;
|
||||
|
@ -1466,9 +1513,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
final boolean indexHasPayloads;
|
||||
final boolean indexHasOffsetsOrPayloads;
|
||||
|
||||
private int freq; // freq we last read
|
||||
private long freqFP; // offset of the freq block
|
||||
|
||||
private int position; // current position
|
||||
|
||||
// value of docBufferUpto on the last doc ID when positions have been read
|
||||
private int posDocBufferUpto;
|
||||
|
||||
// how many positions "behind" we are; nextPosition must
|
||||
// skip these to "catch up":
|
||||
private int posPendingCount;
|
||||
|
@ -1516,8 +1567,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
return freq;
|
||||
public int freq() throws IOException {
|
||||
if (freqFP != -1) {
|
||||
docIn.seek(freqFP);
|
||||
pforUtil.decode(docInUtil, freqBuffer);
|
||||
freqFP = -1;
|
||||
}
|
||||
return freqBuffer[docBufferUpto - 1];
|
||||
}
|
||||
|
||||
private void refillDocs() throws IOException {
|
||||
|
@ -1526,24 +1582,30 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
|
||||
if (left >= BLOCK_SIZE) {
|
||||
forDeltaUtil.decodeAndPrefixSum(docInUtil, prevDocID, docBuffer);
|
||||
pforUtil.decode(docInUtil, freqBuffer);
|
||||
freqFP = docIn.getFilePointer();
|
||||
PForUtil.skip(docIn);
|
||||
docCountUpto += BLOCK_SIZE;
|
||||
} else if (docFreq == 1) {
|
||||
docBuffer[0] = singletonDocID;
|
||||
freqBuffer[0] = (int) totalTermFreq;
|
||||
freqFP = -1;
|
||||
docBuffer[1] = NO_MORE_DOCS;
|
||||
docCountUpto++;
|
||||
docBufferSize = 1;
|
||||
|
||||
} else {
|
||||
// Read vInts:
|
||||
PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
|
||||
prefixSum(docBuffer, left, prevDocID);
|
||||
docBuffer[left] = NO_MORE_DOCS;
|
||||
freqFP = -1;
|
||||
docCountUpto += left;
|
||||
docBufferSize = left;
|
||||
freqFP = -1;
|
||||
}
|
||||
prevDocID = docBuffer[BLOCK_SIZE - 1];
|
||||
docBufferUpto = 0;
|
||||
posDocBufferUpto = 0;
|
||||
assert docBuffer[docBufferSize] == NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
|
@ -1585,20 +1647,14 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
private void skipLevel0To(int target) throws IOException {
|
||||
long posFP;
|
||||
int posUpto;
|
||||
|
||||
while (true) {
|
||||
prevDocID = level0LastDocID;
|
||||
|
||||
// If nextBlockPosFP is less than the current FP, it means that the block of positions for
|
||||
// the first docs of the next block are already decoded. In this case we just accumulate
|
||||
// frequencies into posPendingCount instead of seeking backwards and decoding the same pos
|
||||
// block again.
|
||||
if (level0PosEndFP >= posIn.getFilePointer()) {
|
||||
posIn.seek(level0PosEndFP);
|
||||
posPendingCount = level0BlockPosUpto;
|
||||
posBufferUpto = BLOCK_SIZE;
|
||||
} else {
|
||||
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, BLOCK_SIZE);
|
||||
}
|
||||
posFP = level0PosEndFP;
|
||||
posUpto = level0BlockPosUpto;
|
||||
|
||||
if (docFreq - docCountUpto >= BLOCK_SIZE) {
|
||||
docIn.readVLong(); // skip0 num bytes
|
||||
|
@ -1631,6 +1687,18 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If nextBlockPosFP is less than the current FP, it means that the block of positions for
|
||||
// the first docs of the next block are already decoded. In this case we just accumulate
|
||||
// frequencies into posPendingCount instead of seeking backwards and decoding the same pos
|
||||
// block again.
|
||||
if (posFP >= posIn.getFilePointer()) {
|
||||
posIn.seek(posFP);
|
||||
posPendingCount = posUpto;
|
||||
posBufferUpto = BLOCK_SIZE;
|
||||
} else {
|
||||
posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, BLOCK_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1660,30 +1728,25 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
doc = docBuffer[docBufferUpto];
|
||||
freq = freqBuffer[docBufferUpto];
|
||||
posPendingCount += freq;
|
||||
docBufferUpto++;
|
||||
position = 0;
|
||||
return this.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
advanceShallow(target);
|
||||
if (needsRefilling) {
|
||||
if (target > level0LastDocID || needsRefilling) {
|
||||
advanceShallow(target);
|
||||
assert needsRefilling;
|
||||
refillDocs();
|
||||
needsRefilling = false;
|
||||
}
|
||||
|
||||
int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
|
||||
posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
|
||||
freq = freqBuffer[next];
|
||||
docBufferUpto = next + 1;
|
||||
position = 0;
|
||||
return this.doc = docBuffer[next];
|
||||
}
|
||||
|
||||
private void skipPositions() throws IOException {
|
||||
private void skipPositions(int freq) throws IOException {
|
||||
// Skip positions now:
|
||||
int toSkip = posPendingCount - freq;
|
||||
// if (DEBUG) {
|
||||
|
@ -1703,8 +1766,6 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
refillPositions();
|
||||
posBufferUpto = toSkip;
|
||||
}
|
||||
|
||||
position = 0;
|
||||
}
|
||||
|
||||
private void refillPositions() throws IOException {
|
||||
|
@ -1739,11 +1800,21 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
assert posPendingCount > 0;
|
||||
if (posDocBufferUpto != docBufferUpto) {
|
||||
int freq = freq(); // triggers lazy decoding of freqs
|
||||
|
||||
if (posPendingCount > freq) {
|
||||
skipPositions();
|
||||
posPendingCount = freq;
|
||||
// First position that is being read on this doc.
|
||||
posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, docBufferUpto);
|
||||
posDocBufferUpto = docBufferUpto;
|
||||
|
||||
assert posPendingCount > 0;
|
||||
|
||||
if (posPendingCount > freq) {
|
||||
skipPositions(freq);
|
||||
posPendingCount = freq;
|
||||
}
|
||||
|
||||
position = 0;
|
||||
}
|
||||
|
||||
if (posBufferUpto == BLOCK_SIZE) {
|
||||
|
|
|
@ -284,7 +284,14 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
return readCommit(directory, segmentFileName, Version.MIN_SUPPORTED_MAJOR);
|
||||
}
|
||||
|
||||
static final SegmentInfos readCommit(
|
||||
/**
|
||||
* Read a particular segmentFileName, as long as the commit's {@link
|
||||
* SegmentInfos#getIndexCreatedVersionMajor()} is strictly greater than the provided minimum
|
||||
* supported major version. If the commit's version is older, an {@link
|
||||
* IndexFormatTooOldException} will be thrown. Note that this may throw an IOException if a commit
|
||||
* is in process.
|
||||
*/
|
||||
public static final SegmentInfos readCommit(
|
||||
Directory directory, String segmentFileName, int minSupportedMajorVersion)
|
||||
throws IOException {
|
||||
|
||||
|
@ -307,7 +314,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
|||
}
|
||||
|
||||
/** Read the commit from the provided {@link ChecksumIndexInput}. */
|
||||
static final SegmentInfos readCommit(
|
||||
public static final SegmentInfos readCommit(
|
||||
Directory directory, ChecksumIndexInput input, long generation, int minSupportedMajorVersion)
|
||||
throws IOException {
|
||||
Throwable priorE = null;
|
||||
|
|
|
@ -38,7 +38,7 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer {
|
|||
private final Scorer[] scorers;
|
||||
private final DocIdSetIterator[] iterators;
|
||||
private final DocIdSetIterator lead1, lead2;
|
||||
private final Scorer scorer1, scorer2;
|
||||
private final Scorable scorer1, scorer2;
|
||||
private final DocAndScore scorable = new DocAndScore();
|
||||
private final double[] sumOfOtherClauses;
|
||||
private final int maxDoc;
|
||||
|
@ -51,10 +51,10 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer {
|
|||
Arrays.sort(this.scorers, Comparator.comparingLong(scorer -> scorer.iterator().cost()));
|
||||
this.iterators =
|
||||
Arrays.stream(this.scorers).map(Scorer::iterator).toArray(DocIdSetIterator[]::new);
|
||||
lead1 = iterators[0];
|
||||
lead2 = iterators[1];
|
||||
scorer1 = this.scorers[0];
|
||||
scorer2 = this.scorers[1];
|
||||
lead1 = ScorerUtil.likelyImpactsEnum(iterators[0]);
|
||||
lead2 = ScorerUtil.likelyImpactsEnum(iterators[1]);
|
||||
scorer1 = ScorerUtil.likelyTermScorer(this.scorers[0]);
|
||||
scorer2 = ScorerUtil.likelyTermScorer(this.scorers[1]);
|
||||
this.sumOfOtherClauses = new double[this.scorers.length];
|
||||
for (int i = 0; i < sumOfOtherClauses.length; i++) {
|
||||
sumOfOtherClauses[i] = Double.POSITIVE_INFINITY;
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.util.List;
|
|||
*/
|
||||
final class BlockMaxConjunctionScorer extends Scorer {
|
||||
final Scorer[] scorers;
|
||||
final Scorable[] scorables;
|
||||
final DocIdSetIterator[] approximations;
|
||||
final TwoPhaseIterator[] twoPhases;
|
||||
float minScore;
|
||||
|
@ -38,6 +39,8 @@ final class BlockMaxConjunctionScorer extends Scorer {
|
|||
this.scorers = scorersList.toArray(new Scorer[scorersList.size()]);
|
||||
// Sort scorer by cost
|
||||
Arrays.sort(this.scorers, Comparator.comparingLong(s -> s.iterator().cost()));
|
||||
this.scorables =
|
||||
Arrays.stream(scorers).map(ScorerUtil::likelyTermScorer).toArray(Scorable[]::new);
|
||||
|
||||
this.approximations = new DocIdSetIterator[scorers.length];
|
||||
List<TwoPhaseIterator> twoPhaseList = new ArrayList<>();
|
||||
|
@ -50,6 +53,7 @@ final class BlockMaxConjunctionScorer extends Scorer {
|
|||
} else {
|
||||
approximations[i] = scorer.iterator();
|
||||
}
|
||||
approximations[i] = ScorerUtil.likelyImpactsEnum(approximations[i]);
|
||||
scorer.advanceShallow(0);
|
||||
}
|
||||
this.twoPhases = twoPhaseList.toArray(new TwoPhaseIterator[twoPhaseList.size()]);
|
||||
|
@ -207,7 +211,7 @@ final class BlockMaxConjunctionScorer extends Scorer {
|
|||
@Override
|
||||
public float score() throws IOException {
|
||||
double score = 0;
|
||||
for (Scorer scorer : scorers) {
|
||||
for (Scorable scorer : scorables) {
|
||||
score += scorer.score();
|
||||
}
|
||||
return (float) score;
|
||||
|
|
|
@ -604,23 +604,20 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
// Important(this can only be processed after nested clauses have been flattened)
|
||||
{
|
||||
final Collection<Query> shoulds = clauseSets.get(Occur.SHOULD);
|
||||
if (shoulds.size() > 0) {
|
||||
if (shoulds.size() < minimumNumberShouldMatch) {
|
||||
return new MatchNoDocsQuery("SHOULD clause count less than minimumNumberShouldMatch");
|
||||
}
|
||||
|
||||
if (shoulds.size() == minimumNumberShouldMatch) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
for (BooleanClause clause : clauses) {
|
||||
if (clause.occur() == Occur.SHOULD) {
|
||||
builder.add(clause.query(), Occur.MUST);
|
||||
} else {
|
||||
builder.add(clause);
|
||||
}
|
||||
if (shoulds.size() < minimumNumberShouldMatch) {
|
||||
return new MatchNoDocsQuery("SHOULD clause count less than minimumNumberShouldMatch");
|
||||
}
|
||||
if (shoulds.size() > 0 && shoulds.size() == minimumNumberShouldMatch) {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
for (BooleanClause clause : clauses) {
|
||||
if (clause.occur() == Occur.SHOULD) {
|
||||
builder.add(clause.query(), Occur.MUST);
|
||||
} else {
|
||||
builder.add(clause);
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -155,7 +155,7 @@ final class BooleanScorer extends BulkScorer {
|
|||
this.needsScores = needsScores;
|
||||
LongArrayList costs = new LongArrayList(scorers.size());
|
||||
for (Scorer scorer : scorers) {
|
||||
DisiWrapper w = new DisiWrapper(scorer);
|
||||
DisiWrapper w = new DisiWrapper(scorer, false);
|
||||
costs.add(w.cost);
|
||||
final DisiWrapper evicted = tail.insertWithOverflow(w);
|
||||
if (evicted != null) {
|
||||
|
@ -177,7 +177,7 @@ final class BooleanScorer extends BulkScorer {
|
|||
Bucket[] buckets = BooleanScorer.this.buckets;
|
||||
|
||||
DocIdSetIterator it = w.iterator;
|
||||
Scorer scorer = w.scorer;
|
||||
Scorable scorer = w.scorable;
|
||||
int doc = w.doc;
|
||||
if (doc < min) {
|
||||
doc = it.advance(min);
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.util.Bits;
|
|||
*/
|
||||
final class ConjunctionBulkScorer extends BulkScorer {
|
||||
|
||||
private final Scorer[] scoringScorers;
|
||||
private final Scorable[] scoringScorers;
|
||||
private final DocIdSetIterator lead1, lead2;
|
||||
private final List<DocIdSetIterator> others;
|
||||
private final Scorable scorable;
|
||||
|
@ -45,7 +45,8 @@ final class ConjunctionBulkScorer extends BulkScorer {
|
|||
allScorers.addAll(requiredScoring);
|
||||
allScorers.addAll(requiredNoScoring);
|
||||
|
||||
this.scoringScorers = requiredScoring.toArray(Scorer[]::new);
|
||||
this.scoringScorers =
|
||||
requiredScoring.stream().map(ScorerUtil::likelyTermScorer).toArray(Scorable[]::new);
|
||||
List<DocIdSetIterator> iterators = new ArrayList<>();
|
||||
for (Scorer scorer : allScorers) {
|
||||
iterators.add(scorer.iterator());
|
||||
|
@ -59,7 +60,7 @@ final class ConjunctionBulkScorer extends BulkScorer {
|
|||
@Override
|
||||
public float score() throws IOException {
|
||||
double score = 0;
|
||||
for (Scorer scorer : scoringScorers) {
|
||||
for (Scorable scorer : scoringScorers) {
|
||||
score += scorer.score();
|
||||
}
|
||||
return (float) score;
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Wrapper used in {@link DisiPriorityQueue}.
|
||||
*
|
||||
|
@ -24,6 +26,7 @@ package org.apache.lucene.search;
|
|||
public class DisiWrapper {
|
||||
public final DocIdSetIterator iterator;
|
||||
public final Scorer scorer;
|
||||
public final Scorable scorable;
|
||||
public final long cost;
|
||||
public final float matchCost; // the match cost for two-phase iterators, 0 otherwise
|
||||
public int doc; // the current doc, used for comparison
|
||||
|
@ -42,9 +45,14 @@ public class DisiWrapper {
|
|||
// for MaxScoreBulkScorer
|
||||
float maxWindowScore;
|
||||
|
||||
public DisiWrapper(Scorer scorer) {
|
||||
this.scorer = scorer;
|
||||
this.iterator = scorer.iterator();
|
||||
public DisiWrapper(Scorer scorer, boolean impacts) {
|
||||
this.scorer = Objects.requireNonNull(scorer);
|
||||
this.scorable = ScorerUtil.likelyTermScorer(scorer);
|
||||
if (impacts) {
|
||||
this.iterator = ScorerUtil.likelyImpactsEnum(scorer.iterator());
|
||||
} else {
|
||||
this.iterator = scorer.iterator();
|
||||
}
|
||||
this.cost = iterator.cost();
|
||||
this.doc = -1;
|
||||
this.twoPhaseView = scorer.twoPhaseIterator();
|
||||
|
|
|
@ -60,7 +60,7 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
float scoreMax = 0;
|
||||
double otherScoreSum = 0;
|
||||
for (DisiWrapper w = topList; w != null; w = w.next) {
|
||||
float subScore = w.scorer.score();
|
||||
float subScore = w.scorable.score();
|
||||
if (subScore >= scoreMax) {
|
||||
otherScoreSum += scoreMax;
|
||||
scoreMax = subScore;
|
||||
|
|
|
@ -37,7 +37,7 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
}
|
||||
this.subScorers = new DisiPriorityQueue(subScorers.size());
|
||||
for (Scorer scorer : subScorers) {
|
||||
final DisiWrapper w = new DisiWrapper(scorer);
|
||||
final DisiWrapper w = new DisiWrapper(scorer, false);
|
||||
this.subScorers.add(w);
|
||||
}
|
||||
this.needsScores = scoreMode != ScoreMode.COMPLETE_NO_SCORES;
|
||||
|
|
|
@ -40,7 +40,7 @@ final class DisjunctionSumScorer extends DisjunctionScorer {
|
|||
double score = 0;
|
||||
|
||||
for (DisiWrapper w = topList; w != null; w = w.next) {
|
||||
score += w.scorer.score();
|
||||
score += w.scorable.score();
|
||||
}
|
||||
return (float) score;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Wrapper around a {@link DocIdSetIterator}. */
|
||||
public class FilterDocIdSetIterator extends DocIdSetIterator {
|
||||
|
||||
/** Wrapped instance. */
|
||||
protected final DocIdSetIterator in;
|
||||
|
||||
/** Sole constructor. */
|
||||
public FilterDocIdSetIterator(DocIdSetIterator in) {
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return in.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return in.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return in.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return in.cost();
|
||||
}
|
||||
}
|
|
@ -35,7 +35,7 @@ public abstract class IndriDisjunctionScorer extends IndriScorer {
|
|||
this.subScorersList = subScorersList;
|
||||
this.subScorers = new DisiPriorityQueue(subScorersList.size());
|
||||
for (Scorer scorer : subScorersList) {
|
||||
final DisiWrapper w = new DisiWrapper(scorer);
|
||||
final DisiWrapper w = new DisiWrapper(scorer, false);
|
||||
this.subScorers.add(w);
|
||||
}
|
||||
this.approximation = new DisjunctionDISIApproximation(this.subScorers);
|
||||
|
|
|
@ -58,7 +58,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
this.filter = null;
|
||||
filterMatches = null;
|
||||
} else {
|
||||
this.filter = new DisiWrapper(filter);
|
||||
this.filter = new DisiWrapper(filter, false);
|
||||
filterMatches = new FixedBitSet(INNER_WINDOW_SIZE);
|
||||
}
|
||||
allScorers = new DisiWrapper[scorers.size()];
|
||||
|
@ -66,7 +66,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
int i = 0;
|
||||
long cost = 0;
|
||||
for (Scorer scorer : scorers) {
|
||||
DisiWrapper w = new DisiWrapper(scorer);
|
||||
DisiWrapper w = new DisiWrapper(scorer, true);
|
||||
cost += w.cost;
|
||||
allScorers[i++] = w;
|
||||
}
|
||||
|
@ -256,7 +256,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
if (acceptDocs != null && acceptDocs.get(doc) == false) {
|
||||
continue;
|
||||
}
|
||||
scoreNonEssentialClauses(collector, doc, top.scorer.score(), firstEssentialScorer);
|
||||
scoreNonEssentialClauses(collector, doc, top.scorable.score(), firstEssentialScorer);
|
||||
}
|
||||
top.doc = top.iterator.docID();
|
||||
essentialQueue.updateTop();
|
||||
|
@ -284,7 +284,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
continue;
|
||||
}
|
||||
|
||||
double score = lead1.scorer.score();
|
||||
double score = lead1.scorable.score();
|
||||
|
||||
// We specialize handling the second best scorer, which seems to help a bit with performance.
|
||||
// But this is the exact same logic as in the below for loop.
|
||||
|
@ -303,7 +303,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
continue;
|
||||
}
|
||||
|
||||
score += lead2.scorer.score();
|
||||
score += lead2.scorable.score();
|
||||
|
||||
for (int i = allScorers.length - 3; i >= firstRequiredScorer; --i) {
|
||||
if ((float) MathUtil.sumUpperBound(score + maxScoreSums[i], allScorers.length)
|
||||
|
@ -321,7 +321,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
lead1.doc = lead1.iterator.advance(Math.min(w.doc, max));
|
||||
continue outer;
|
||||
}
|
||||
score += w.scorer.score();
|
||||
score += w.scorable.score();
|
||||
}
|
||||
|
||||
scoreNonEssentialClauses(collector, lead1.doc, score, firstRequiredScorer);
|
||||
|
@ -342,7 +342,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
if (acceptDocs == null || acceptDocs.get(doc)) {
|
||||
final int i = doc - innerWindowMin;
|
||||
windowMatches[i >>> 6] |= 1L << i;
|
||||
windowScores[i] += top.scorer.score();
|
||||
windowScores[i] += top.scorable.score();
|
||||
}
|
||||
}
|
||||
top.doc = top.iterator.docID();
|
||||
|
@ -439,7 +439,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
|
|||
scorer.doc = scorer.iterator.advance(doc);
|
||||
}
|
||||
if (scorer.doc == doc) {
|
||||
score += scorer.scorer.score();
|
||||
score += scorer.scorable.score();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -113,10 +113,10 @@ final class MultiTermQueryConstantScoreBlendedWrapper<Q extends MultiTermQuery>
|
|||
DisiPriorityQueue subs = new DisiPriorityQueue(highFrequencyTerms.size() + 1);
|
||||
for (DocIdSetIterator disi : highFrequencyTerms) {
|
||||
Scorer s = wrapWithDummyScorer(this, disi);
|
||||
subs.add(new DisiWrapper(s));
|
||||
subs.add(new DisiWrapper(s, false));
|
||||
}
|
||||
Scorer s = wrapWithDummyScorer(this, otherTerms.build().iterator());
|
||||
subs.add(new DisiWrapper(s));
|
||||
subs.add(new DisiWrapper(s, false));
|
||||
|
||||
return new WeightOrDocIdSetIterator(new DisjunctionDISIApproximation(subs));
|
||||
}
|
||||
|
|
|
@ -16,12 +16,48 @@
|
|||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.stream.LongStream;
|
||||
import java.util.stream.StreamSupport;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FeatureField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/** Util class for Scorer related methods */
|
||||
class ScorerUtil {
|
||||
|
||||
private static final Class<?> DEFAULT_IMPACTS_ENUM_CLASS;
|
||||
|
||||
static {
|
||||
try (Directory dir = new ByteBuffersDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig())) {
|
||||
Document doc = new Document();
|
||||
doc.add(new FeatureField("field", "value", 1f));
|
||||
w.addDocument(doc);
|
||||
try (DirectoryReader reader = DirectoryReader.open(w)) {
|
||||
LeafReader leafReader = reader.leaves().get(0).reader();
|
||||
TermsEnum te = leafReader.terms("field").iterator();
|
||||
if (te.seekExact(new BytesRef("value")) == false) {
|
||||
throw new Error();
|
||||
}
|
||||
ImpactsEnum ie = te.impacts(PostingsEnum.FREQS);
|
||||
DEFAULT_IMPACTS_ENUM_CLASS = ie.getClass();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new Error(e);
|
||||
}
|
||||
}
|
||||
|
||||
static long costWithMinShouldMatch(LongStream costs, int numScorers, int minShouldMatch) {
|
||||
// the idea here is the following: a boolean query c1,c2,...cn with minShouldMatch=m
|
||||
// could be rewritten to:
|
||||
|
@ -46,4 +82,30 @@ class ScorerUtil {
|
|||
costs.forEach(pq::insertWithOverflow);
|
||||
return StreamSupport.stream(pq.spliterator(), false).mapToLong(Number::longValue).sum();
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize a {@link DocIdSetIterator} for the case when it is likely implemented via an {@link
|
||||
* ImpactsEnum}. This return method only has 2 possible return types, which helps make sure that
|
||||
* calls to {@link DocIdSetIterator#nextDoc()} and {@link DocIdSetIterator#advance(int)} are
|
||||
* bimorphic at most and candidate for inlining.
|
||||
*/
|
||||
static DocIdSetIterator likelyImpactsEnum(DocIdSetIterator it) {
|
||||
if (it.getClass() != DEFAULT_IMPACTS_ENUM_CLASS
|
||||
&& it.getClass() != FilterDocIdSetIterator.class) {
|
||||
it = new FilterDocIdSetIterator(it);
|
||||
}
|
||||
return it;
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize a {@link Scorable} for the case when it is likely implemented via a {@link
|
||||
* TermScorer}. This return method only has 2 possible return types, which helps make sure that
|
||||
* calls to {@link Scorable#score()} are bimorphic at most and candidate for inlining.
|
||||
*/
|
||||
static Scorable likelyTermScorer(Scorable scorable) {
|
||||
if (scorable.getClass() != TermScorer.class && scorable.getClass() != FilterScorable.class) {
|
||||
scorable = new FilterScorable(scorable);
|
||||
}
|
||||
return scorable;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -646,7 +646,7 @@ public final class SynonymQuery extends Query {
|
|||
final float boost;
|
||||
|
||||
DisiWrapperFreq(Scorer scorer, float boost) {
|
||||
super(scorer);
|
||||
super(scorer, false);
|
||||
this.pe = (PostingsEnum) scorer.iterator();
|
||||
this.boost = boost;
|
||||
}
|
||||
|
|
|
@ -196,7 +196,12 @@ final class WANDScorer extends Scorer {
|
|||
}
|
||||
|
||||
for (Scorer scorer : scorers) {
|
||||
addUnpositionedLead(new DisiWrapper(scorer));
|
||||
// Ideally we would pass true when scoreMode == TOP_SCORES and false otherwise, but this would
|
||||
// break the optimization as there could then be 3 different impls of DocIdSetIterator
|
||||
// (ImpactsEnum, PostingsEnum and <Else>). So we pass true to favor disjunctions sorted by
|
||||
// descending score as opposed to non-scoring disjunctions whose minShouldMatch is greater
|
||||
// than 1.
|
||||
addUnpositionedLead(new DisiWrapper(scorer, true));
|
||||
}
|
||||
|
||||
this.cost =
|
||||
|
@ -221,7 +226,7 @@ final class WANDScorer extends Scorer {
|
|||
List<Float> leadScores = new ArrayList<>();
|
||||
for (DisiWrapper w = lead; w != null; w = w.next) {
|
||||
assert w.doc == doc;
|
||||
leadScores.add(w.scorer.score());
|
||||
leadScores.add(w.scorable.score());
|
||||
}
|
||||
// Make sure to recompute the sum in the same order to get the same floating point rounding
|
||||
// errors.
|
||||
|
@ -370,7 +375,7 @@ final class WANDScorer extends Scorer {
|
|||
this.lead = lead;
|
||||
freq += 1;
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
leadScore += lead.scorer.score();
|
||||
leadScore += lead.scorable.score();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -522,7 +527,7 @@ final class WANDScorer extends Scorer {
|
|||
lead.next = null;
|
||||
freq = 1;
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
leadScore = lead.scorer.score();
|
||||
leadScore = lead.scorable.score();
|
||||
}
|
||||
while (head.size() > 0 && head.top().doc == doc) {
|
||||
addLead(head.pop());
|
||||
|
@ -553,7 +558,7 @@ final class WANDScorer extends Scorer {
|
|||
if (scoreMode != ScoreMode.TOP_SCORES) {
|
||||
// With TOP_SCORES, the score was already computed on the fly.
|
||||
for (DisiWrapper s = lead; s != null; s = s.next) {
|
||||
leadScore += s.scorer.score();
|
||||
leadScore += s.scorable.score();
|
||||
}
|
||||
}
|
||||
return (float) leadScore;
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.store;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Optional;
|
||||
import org.apache.lucene.codecs.CompoundFormat;
|
||||
|
||||
/**
|
||||
|
@ -234,4 +235,19 @@ public abstract class IndexInput extends DataInput implements Closeable {
|
|||
* <p>The default implementation is a no-op.
|
||||
*/
|
||||
public void updateReadAdvice(ReadAdvice readAdvice) throws IOException {}
|
||||
|
||||
/**
|
||||
* Returns a hint whether all the contents of this input are resident in physical memory. It's a
|
||||
* hint because the operating system may have paged out some of the data by the time this method
|
||||
* returns. If the optional is true, then it's likely that the contents of this input are resident
|
||||
* in physical memory. A value of false does not imply that the contents are not resident in
|
||||
* physical memory. An empty optional is returned if it is not possible to determine.
|
||||
*
|
||||
* <p>This runs in linear time with the {@link #length()} of this input / page size.
|
||||
*
|
||||
* <p>The default implementation returns an empty optional.
|
||||
*/
|
||||
public Optional<Boolean> isLoaded() {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.store;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Optional;
|
||||
import org.apache.lucene.util.BitUtil; // javadocs
|
||||
|
||||
/**
|
||||
|
@ -77,4 +78,13 @@ public interface RandomAccessInput {
|
|||
* @see IndexInput#prefetch
|
||||
*/
|
||||
default void prefetch(long offset, long length) throws IOException {}
|
||||
|
||||
/**
|
||||
* Returns a hint whether all the contents of this input are resident in physical memory.
|
||||
*
|
||||
* @see IndexInput#isLoaded()
|
||||
*/
|
||||
default Optional<Boolean> isLoaded() {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ import static org.apache.lucene.util.fst.FST.NON_FINAL_END_NODE;
|
|||
import static org.apache.lucene.util.fst.FST.getNumPresenceBytes;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
|
@ -869,14 +870,14 @@ public class FSTCompiler<T> {
|
|||
}
|
||||
|
||||
// compare shared prefix length
|
||||
int pos1 = 0;
|
||||
int pos2 = input.offset;
|
||||
final int pos1Stop = Math.min(lastInput.length(), input.length);
|
||||
while (pos1 < pos1Stop && lastInput.intAt(pos1) == input.ints[pos2]) {
|
||||
pos1++;
|
||||
pos2++;
|
||||
int pos = 0;
|
||||
if (lastInput.length() > 0) {
|
||||
int mismatch =
|
||||
Arrays.mismatch(
|
||||
lastInput.ints(), 0, lastInput.length(), input.ints, input.offset, input.length);
|
||||
pos += mismatch == -1 ? lastInput.length() : mismatch;
|
||||
}
|
||||
final int prefixLenPlus1 = pos1 + 1;
|
||||
final int prefixLenPlus1 = pos + 1;
|
||||
|
||||
if (frontier.length < input.length + 1) {
|
||||
final UnCompiledNode<T>[] next = ArrayUtil.grow(frontier, input.length + 1);
|
||||
|
|
|
@ -75,6 +75,9 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
}
|
||||
}
|
||||
|
||||
// cached vector sizes for smaller method bodies
|
||||
private static final int FLOAT_SPECIES_LENGTH = FLOAT_SPECIES.length();
|
||||
|
||||
// the way FMA should work! if available use it, otherwise fall back to mul/add
|
||||
private static FloatVector fma(FloatVector a, FloatVector b, FloatVector c) {
|
||||
if (Constants.HAS_FAST_VECTOR_FMA) {
|
||||
|
@ -99,7 +102,7 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
float res = 0;
|
||||
|
||||
// if the array size is large (> 2x platform vector size), its worth the overhead to vectorize
|
||||
if (a.length > 2 * FLOAT_SPECIES.length()) {
|
||||
if (a.length > 2 * FLOAT_SPECIES_LENGTH) {
|
||||
i += FLOAT_SPECIES.loopBound(a.length);
|
||||
res += dotProductBody(a, b, i);
|
||||
}
|
||||
|
@ -120,30 +123,33 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
FloatVector acc2 = FloatVector.zero(FLOAT_SPECIES);
|
||||
FloatVector acc3 = FloatVector.zero(FLOAT_SPECIES);
|
||||
FloatVector acc4 = FloatVector.zero(FLOAT_SPECIES);
|
||||
int unrolledLimit = limit - 3 * FLOAT_SPECIES.length();
|
||||
for (; i < unrolledLimit; i += 4 * FLOAT_SPECIES.length()) {
|
||||
final int unrolledLimit = limit - 3 * FLOAT_SPECIES_LENGTH;
|
||||
for (; i < unrolledLimit; i += 4 * FLOAT_SPECIES_LENGTH) {
|
||||
// one
|
||||
FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i);
|
||||
FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i);
|
||||
acc1 = fma(va, vb, acc1);
|
||||
|
||||
// two
|
||||
FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i + FLOAT_SPECIES.length());
|
||||
FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i + FLOAT_SPECIES.length());
|
||||
final int i2 = i + FLOAT_SPECIES_LENGTH;
|
||||
FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i2);
|
||||
FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i2);
|
||||
acc2 = fma(vc, vd, acc2);
|
||||
|
||||
// three
|
||||
FloatVector ve = FloatVector.fromArray(FLOAT_SPECIES, a, i + 2 * FLOAT_SPECIES.length());
|
||||
FloatVector vf = FloatVector.fromArray(FLOAT_SPECIES, b, i + 2 * FLOAT_SPECIES.length());
|
||||
final int i3 = i2 + FLOAT_SPECIES_LENGTH;
|
||||
FloatVector ve = FloatVector.fromArray(FLOAT_SPECIES, a, i3);
|
||||
FloatVector vf = FloatVector.fromArray(FLOAT_SPECIES, b, i3);
|
||||
acc3 = fma(ve, vf, acc3);
|
||||
|
||||
// four
|
||||
FloatVector vg = FloatVector.fromArray(FLOAT_SPECIES, a, i + 3 * FLOAT_SPECIES.length());
|
||||
FloatVector vh = FloatVector.fromArray(FLOAT_SPECIES, b, i + 3 * FLOAT_SPECIES.length());
|
||||
final int i4 = i3 + FLOAT_SPECIES_LENGTH;
|
||||
FloatVector vg = FloatVector.fromArray(FLOAT_SPECIES, a, i4);
|
||||
FloatVector vh = FloatVector.fromArray(FLOAT_SPECIES, b, i4);
|
||||
acc4 = fma(vg, vh, acc4);
|
||||
}
|
||||
// vector tail: less scalar computations for unaligned sizes, esp with big vector sizes
|
||||
for (; i < limit; i += FLOAT_SPECIES.length()) {
|
||||
for (; i < limit; i += FLOAT_SPECIES_LENGTH) {
|
||||
FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i);
|
||||
FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i);
|
||||
acc1 = fma(va, vb, acc1);
|
||||
|
@ -162,7 +168,7 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
float norm2 = 0;
|
||||
|
||||
// if the array size is large (> 2x platform vector size), its worth the overhead to vectorize
|
||||
if (a.length > 2 * FLOAT_SPECIES.length()) {
|
||||
if (a.length > 2 * FLOAT_SPECIES_LENGTH) {
|
||||
i += FLOAT_SPECIES.loopBound(a.length);
|
||||
float[] ret = cosineBody(a, b, i);
|
||||
sum += ret[0];
|
||||
|
@ -190,8 +196,8 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
FloatVector norm1_2 = FloatVector.zero(FLOAT_SPECIES);
|
||||
FloatVector norm2_1 = FloatVector.zero(FLOAT_SPECIES);
|
||||
FloatVector norm2_2 = FloatVector.zero(FLOAT_SPECIES);
|
||||
int unrolledLimit = limit - FLOAT_SPECIES.length();
|
||||
for (; i < unrolledLimit; i += 2 * FLOAT_SPECIES.length()) {
|
||||
final int unrolledLimit = limit - FLOAT_SPECIES_LENGTH;
|
||||
for (; i < unrolledLimit; i += 2 * FLOAT_SPECIES_LENGTH) {
|
||||
// one
|
||||
FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i);
|
||||
FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i);
|
||||
|
@ -200,14 +206,15 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
norm2_1 = fma(vb, vb, norm2_1);
|
||||
|
||||
// two
|
||||
FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i + FLOAT_SPECIES.length());
|
||||
FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i + FLOAT_SPECIES.length());
|
||||
final int i2 = i + FLOAT_SPECIES_LENGTH;
|
||||
FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i2);
|
||||
FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i2);
|
||||
sum2 = fma(vc, vd, sum2);
|
||||
norm1_2 = fma(vc, vc, norm1_2);
|
||||
norm2_2 = fma(vd, vd, norm2_2);
|
||||
}
|
||||
// vector tail: less scalar computations for unaligned sizes, esp with big vector sizes
|
||||
for (; i < limit; i += FLOAT_SPECIES.length()) {
|
||||
for (; i < limit; i += FLOAT_SPECIES_LENGTH) {
|
||||
FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i);
|
||||
FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i);
|
||||
sum1 = fma(va, vb, sum1);
|
||||
|
@ -227,7 +234,7 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
float res = 0;
|
||||
|
||||
// if the array size is large (> 2x platform vector size), its worth the overhead to vectorize
|
||||
if (a.length > 2 * FLOAT_SPECIES.length()) {
|
||||
if (a.length > 2 * FLOAT_SPECIES_LENGTH) {
|
||||
i += FLOAT_SPECIES.loopBound(a.length);
|
||||
res += squareDistanceBody(a, b, i);
|
||||
}
|
||||
|
@ -240,6 +247,12 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
return res;
|
||||
}
|
||||
|
||||
/** helper: returns fma(a.sub(b), a.sub(b), c) */
|
||||
private static FloatVector square(FloatVector a, FloatVector b, FloatVector c) {
|
||||
FloatVector diff = a.sub(b);
|
||||
return fma(diff, diff, c);
|
||||
}
|
||||
|
||||
/** vectorized square distance body */
|
||||
private float squareDistanceBody(float[] a, float[] b, int limit) {
|
||||
int i = 0;
|
||||
|
@ -249,38 +262,36 @@ final class PanamaVectorUtilSupport implements VectorUtilSupport {
|
|||
FloatVector acc2 = FloatVector.zero(FLOAT_SPECIES);
|
||||
FloatVector acc3 = FloatVector.zero(FLOAT_SPECIES);
|
||||
FloatVector acc4 = FloatVector.zero(FLOAT_SPECIES);
|
||||
int unrolledLimit = limit - 3 * FLOAT_SPECIES.length();
|
||||
for (; i < unrolledLimit; i += 4 * FLOAT_SPECIES.length()) {
|
||||
final int unrolledLimit = limit - 3 * FLOAT_SPECIES_LENGTH;
|
||||
for (; i < unrolledLimit; i += 4 * FLOAT_SPECIES_LENGTH) {
|
||||
// one
|
||||
FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i);
|
||||
FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i);
|
||||
FloatVector diff1 = va.sub(vb);
|
||||
acc1 = fma(diff1, diff1, acc1);
|
||||
acc1 = square(va, vb, acc1);
|
||||
|
||||
// two
|
||||
FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i + FLOAT_SPECIES.length());
|
||||
FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i + FLOAT_SPECIES.length());
|
||||
FloatVector diff2 = vc.sub(vd);
|
||||
acc2 = fma(diff2, diff2, acc2);
|
||||
final int i2 = i + FLOAT_SPECIES_LENGTH;
|
||||
FloatVector vc = FloatVector.fromArray(FLOAT_SPECIES, a, i2);
|
||||
FloatVector vd = FloatVector.fromArray(FLOAT_SPECIES, b, i2);
|
||||
acc2 = square(vc, vd, acc2);
|
||||
|
||||
// three
|
||||
FloatVector ve = FloatVector.fromArray(FLOAT_SPECIES, a, i + 2 * FLOAT_SPECIES.length());
|
||||
FloatVector vf = FloatVector.fromArray(FLOAT_SPECIES, b, i + 2 * FLOAT_SPECIES.length());
|
||||
FloatVector diff3 = ve.sub(vf);
|
||||
acc3 = fma(diff3, diff3, acc3);
|
||||
final int i3 = i2 + FLOAT_SPECIES_LENGTH;
|
||||
FloatVector ve = FloatVector.fromArray(FLOAT_SPECIES, a, i3);
|
||||
FloatVector vf = FloatVector.fromArray(FLOAT_SPECIES, b, i3);
|
||||
acc3 = square(ve, vf, acc3);
|
||||
|
||||
// four
|
||||
FloatVector vg = FloatVector.fromArray(FLOAT_SPECIES, a, i + 3 * FLOAT_SPECIES.length());
|
||||
FloatVector vh = FloatVector.fromArray(FLOAT_SPECIES, b, i + 3 * FLOAT_SPECIES.length());
|
||||
FloatVector diff4 = vg.sub(vh);
|
||||
acc4 = fma(diff4, diff4, acc4);
|
||||
final int i4 = i3 + FLOAT_SPECIES_LENGTH;
|
||||
FloatVector vg = FloatVector.fromArray(FLOAT_SPECIES, a, i4);
|
||||
FloatVector vh = FloatVector.fromArray(FLOAT_SPECIES, b, i4);
|
||||
acc4 = square(vg, vh, acc4);
|
||||
}
|
||||
// vector tail: less scalar computations for unaligned sizes, esp with big vector sizes
|
||||
for (; i < limit; i += FLOAT_SPECIES.length()) {
|
||||
for (; i < limit; i += FLOAT_SPECIES_LENGTH) {
|
||||
FloatVector va = FloatVector.fromArray(FLOAT_SPECIES, a, i);
|
||||
FloatVector vb = FloatVector.fromArray(FLOAT_SPECIES, b, i);
|
||||
FloatVector diff = va.sub(vb);
|
||||
acc1 = fma(diff, diff, acc1);
|
||||
acc1 = square(va, vb, acc1);
|
||||
}
|
||||
// reduce
|
||||
FloatVector res1 = acc1.add(acc2);
|
||||
|
|
|
@ -420,6 +420,16 @@ abstract class MemorySegmentIndexInput extends IndexInput
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<Boolean> isLoaded() {
|
||||
for (MemorySegment seg : segments) {
|
||||
if (seg.isLoaded() == false) {
|
||||
return Optional.of(Boolean.FALSE);
|
||||
}
|
||||
}
|
||||
return Optional.of(Boolean.TRUE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte readByte(long pos) throws IOException {
|
||||
try {
|
||||
|
|
|
@ -401,14 +401,12 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
|
||||
bq =
|
||||
new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(random().nextInt(5))
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.add(new MatchAllDocsQuery(), Occur.FILTER)
|
||||
.build();
|
||||
Query expected =
|
||||
new BooleanQuery.Builder()
|
||||
.setMinimumNumberShouldMatch(bq.getMinimumNumberShouldMatch())
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.build();
|
||||
|
@ -476,7 +474,22 @@ public class TestBooleanRewrites extends LuceneTestCase {
|
|||
Query query = randomBooleanQuery(random());
|
||||
final TopDocs td1 = searcher1.search(query, 100);
|
||||
final TopDocs td2 = searcher2.search(query, 100);
|
||||
assertEquals(td1, td2);
|
||||
try {
|
||||
assertEquals(td1, td2);
|
||||
} catch (AssertionError e) {
|
||||
System.out.println(query);
|
||||
Query rewritten = query;
|
||||
do {
|
||||
query = rewritten;
|
||||
rewritten = query.rewrite(searcher1);
|
||||
System.out.println(rewritten);
|
||||
TopDocs tdx = searcher2.search(rewritten, 100);
|
||||
if (td2.totalHits.value() != tdx.totalHits.value()) {
|
||||
System.out.println("Bad");
|
||||
}
|
||||
} while (query != rewritten);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
searcher1.getIndexReader().close();
|
||||
|
|
|
@ -70,7 +70,7 @@ public class TestDisiPriorityQueue extends LuceneTestCase {
|
|||
private static DisiWrapper wrapper(DocIdSetIterator iterator) throws IOException {
|
||||
Query q = new DummyQuery(iterator);
|
||||
Scorer s = q.createWeight(null, ScoreMode.COMPLETE_NO_SCORES, 1.0f).scorer(null);
|
||||
return new DisiWrapper(s);
|
||||
return new DisiWrapper(s, random().nextBoolean());
|
||||
}
|
||||
|
||||
private static DocIdSetIterator randomDisi(Random r) {
|
||||
|
|
|
@ -422,15 +422,17 @@ public final class CombinedFieldQuery extends Query implements Accountable {
|
|||
}
|
||||
|
||||
private static class WeightedDisiWrapper extends DisiWrapper {
|
||||
final PostingsEnum postingsEnum;
|
||||
final float weight;
|
||||
|
||||
WeightedDisiWrapper(Scorer scorer, float weight) {
|
||||
super(scorer);
|
||||
super(scorer, false);
|
||||
this.weight = weight;
|
||||
this.postingsEnum = (PostingsEnum) scorer.iterator();
|
||||
}
|
||||
|
||||
float freq() throws IOException {
|
||||
return weight * ((PostingsEnum) iterator).freq();
|
||||
return weight * postingsEnum.freq();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ final class CoveringScorer extends Scorer {
|
|||
subScorers = new DisiPriorityQueue(scorers.size());
|
||||
|
||||
for (Scorer scorer : scorers) {
|
||||
subScorers.add(new DisiWrapper(scorer));
|
||||
subScorers.add(new DisiWrapper(scorer, false));
|
||||
}
|
||||
|
||||
this.cost = scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost).sum();
|
||||
|
@ -210,7 +210,7 @@ final class CoveringScorer extends Scorer {
|
|||
setTopListAndFreqIfNecessary();
|
||||
double score = 0;
|
||||
for (DisiWrapper w = topList; w != null; w = w.next) {
|
||||
score += w.scorer.score();
|
||||
score += w.scorable.score();
|
||||
}
|
||||
return (float) score;
|
||||
}
|
||||
|
|
|
@ -51,9 +51,11 @@ import org.apache.lucene.store.AlreadyClosedException;
|
|||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.FilterDirectory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.MMapDirectory;
|
||||
import org.apache.lucene.store.RandomAccessInput;
|
||||
import org.apache.lucene.store.ReadAdvice;
|
||||
import org.apache.lucene.tests.mockfile.ExtrasFS;
|
||||
|
@ -1636,4 +1638,44 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testIsLoaded() throws IOException {
|
||||
testIsLoaded(0);
|
||||
}
|
||||
|
||||
public void testIsLoadedOnSlice() throws IOException {
|
||||
testIsLoaded(TestUtil.nextInt(random(), 1, 1024));
|
||||
}
|
||||
|
||||
private void testIsLoaded(int startOffset) throws IOException {
|
||||
try (Directory dir = getDirectory(createTempDir())) {
|
||||
if (FilterDirectory.unwrap(dir) instanceof MMapDirectory mMapDirectory) {
|
||||
mMapDirectory.setPreload(MMapDirectory.ALL_FILES);
|
||||
}
|
||||
final int totalLength = startOffset + TestUtil.nextInt(random(), 16384, 65536);
|
||||
byte[] arr = new byte[totalLength];
|
||||
random().nextBytes(arr);
|
||||
try (IndexOutput out = dir.createOutput("temp.bin", IOContext.DEFAULT)) {
|
||||
out.writeBytes(arr, arr.length);
|
||||
}
|
||||
|
||||
try (IndexInput orig = dir.openInput("temp.bin", IOContext.DEFAULT)) {
|
||||
IndexInput in;
|
||||
if (startOffset == 0) {
|
||||
in = orig.clone();
|
||||
} else {
|
||||
in = orig.slice("slice", startOffset, totalLength - startOffset);
|
||||
}
|
||||
var loaded = in.isLoaded();
|
||||
if (FilterDirectory.unwrap(dir) instanceof MMapDirectory
|
||||
// direct IO wraps MMap but does not support isLoaded
|
||||
&& !(dir.getClass().getName().contains("DirectIO"))) {
|
||||
assertTrue(loaded.isPresent());
|
||||
assertTrue(loaded.get());
|
||||
} else {
|
||||
assertFalse(loaded.isPresent());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.tests.store;
|
|||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.internal.tests.TestSecrets;
|
||||
import org.apache.lucene.store.FilterIndexInput;
|
||||
|
@ -184,6 +185,13 @@ public class MockIndexInputWrapper extends FilterIndexInput {
|
|||
in.prefetch(offset, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<Boolean> isLoaded() {
|
||||
ensureOpen();
|
||||
ensureAccessible();
|
||||
return in.isLoaded();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateReadAdvice(ReadAdvice readAdvice) throws IOException {
|
||||
ensureOpen();
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.tests.store;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
import org.apache.lucene.internal.hppc.LongHashSet;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
|
@ -206,5 +207,10 @@ public class SerialIOCountingDirectory extends FilterDirectory {
|
|||
IndexInput clone = in.clone();
|
||||
return new SerializedIOCountingIndexInput(clone, readAdvice, sliceOffset, sliceLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<Boolean> isLoaded() {
|
||||
return in.isLoaded();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue