LUCENE-8142: Make postings APIs expose raw impacts rather than scores.

This commit is contained in:
Adrien Grand 2018-05-02 11:52:36 +02:00
parent 555b7ef270
commit af680af77f
54 changed files with 902 additions and 505 deletions

View File

@ -30,16 +30,15 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
@ -661,9 +660,9 @@ public class BlockTermsReader extends FieldsProducer {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
decodeMetaData();
return postingsReader.impacts(fieldInfo, state, scorer, flags);
return postingsReader.impacts(fieldInfo, state, flags);
}
@Override

View File

@ -24,7 +24,6 @@ import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@ -208,9 +207,9 @@ final class OrdsIntersectTermsEnum extends TermsEnum {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
currentFrame.decodeMetaData();
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, scorer, flags);
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, flags);
}
private int getState() {

View File

@ -29,7 +29,6 @@ import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@ -936,7 +935,7 @@ public final class OrdsSegmentTermsEnum extends TermsEnum {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
assert !eof;
//if (DEBUG) {
//System.out.println("BTTR.docs seg=" + segment);
@ -945,7 +944,7 @@ public final class OrdsSegmentTermsEnum extends TermsEnum {
//if (DEBUG) {
//System.out.println(" state=" + currentFrame.state);
//}
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, scorer, flags);
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, flags);
}
@Override

View File

@ -32,16 +32,15 @@ import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
@ -375,8 +374,8 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return delegate().impacts(scorer, flags);
public ImpactsEnum impacts(int flags) throws IOException {
return delegate().impacts(flags);
}
}

View File

@ -39,7 +39,6 @@ import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.Accountable;
@ -948,8 +947,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
public ImpactsEnum impacts(int flags) throws IOException {
return new SlowImpactsEnum(postings(null, flags));
}
}
@ -1503,8 +1502,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
public ImpactsEnum impacts(int flags) throws IOException {
return new SlowImpactsEnum(postings(null, flags));
}
@Override

View File

@ -31,18 +31,17 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
@ -435,9 +434,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
decodeMetaData();
return postingsReader.impacts(fieldInfo, state, scorer, flags);
return postingsReader.impacts(fieldInfo, state, flags);
}
// TODO: this can be achieved by making use of Util.getByOutput()

View File

@ -42,7 +42,6 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
@ -301,9 +300,9 @@ public class FSTTermsReader extends FieldsProducer {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
decodeMetaData();
return postingsReader.impacts(fieldInfo, state, scorer, flags);
return postingsReader.impacts(fieldInfo, state, flags);
}
@Override

View File

@ -29,7 +29,6 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.*;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
@ -44,11 +43,11 @@ import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST.BytesReader;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.BlockPackedReader;
@ -871,7 +870,7 @@ class MemoryDocValuesProducer extends DocValuesProducer {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
throw new UnsupportedOperationException();
}
}

View File

@ -31,19 +31,18 @@ import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IOContext;
@ -819,8 +818,8 @@ public final class MemoryPostingsFormat extends PostingsFormat {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
public ImpactsEnum impacts(int flags) throws IOException {
return new SlowImpactsEnum(postings(null, flags));
}
@Override

View File

@ -36,7 +36,6 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
@ -234,8 +233,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
public ImpactsEnum impacts(int flags) throws IOException {
return new SlowImpactsEnum(postings(null, flags));
}
}

View File

@ -25,15 +25,14 @@ import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
@ -414,8 +413,8 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1));
public ImpactsEnum impacts(int flags) throws IOException {
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS));
}
}

View File

@ -23,23 +23,25 @@ import java.util.Iterator;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.lucene.index.Impact;
/**
* This class accumulates the (freq, norm) pairs that may produce competitive scores.
*/
public final class CompetitiveFreqNormAccumulator {
public final class CompetitiveImpactAccumulator {
// We speed up accumulation for common norm values by first computing
// the max freq for all norms in -128..127
private final int[] maxFreqs;
private boolean dirty;
private final TreeSet<FreqAndNorm> freqNormPairs;
private final TreeSet<Impact> freqNormPairs;
/** Sole constructor. */
public CompetitiveFreqNormAccumulator() {
public CompetitiveImpactAccumulator() {
maxFreqs = new int[256];
Comparator<FreqAndNorm> comparator = new Comparator<CompetitiveFreqNormAccumulator.FreqAndNorm>() {
Comparator<Impact> comparator = new Comparator<Impact>() {
@Override
public int compare(FreqAndNorm o1, FreqAndNorm o2) {
public int compare(Impact o1, Impact o2) {
// greater freqs compare greater
int cmp = Integer.compare(o1.freq, o2.freq);
if (cmp == 0) {
@ -59,44 +61,6 @@ public final class CompetitiveFreqNormAccumulator {
freqNormPairs.clear();
}
/**
* A (freq, norm) pair.
*/
public static class FreqAndNorm {
/** Doc-term frequency. */
public final int freq;
/** Normalization factor. */
public final long norm;
/** Sole constructor. */
public FreqAndNorm(int freq, long norm) {
this.freq = freq;
this.norm = norm;
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj instanceof FreqAndNorm == false) {
return false;
}
FreqAndNorm that = (FreqAndNorm) obj;
return freq == that.freq && norm == that.norm;
}
@Override
public int hashCode() {
int h = getClass().hashCode();
h = 31 * h + freq;
h = 31 * h + Long.hashCode(norm);
return h;
}
@Override
public String toString() {
return "{" + freq + "," + norm + "}";
}
}
/** Accumulate a (freq,norm) pair, updating this structure if there is no
* equivalent or more competitive entry already. */
public void add(int freq, long norm) {
@ -105,23 +69,23 @@ public final class CompetitiveFreqNormAccumulator {
maxFreqs[index] = Math.max(maxFreqs[index], freq);
dirty = true;
} else {
add(new FreqAndNorm(freq, norm));
add(new Impact(freq, norm));
}
}
/** Merge {@code acc} into this. */
public void addAll(CompetitiveFreqNormAccumulator acc) {
for (FreqAndNorm entry : acc.getCompetitiveFreqNormPairs()) {
public void addAll(CompetitiveImpactAccumulator acc) {
for (Impact entry : acc.getCompetitiveFreqNormPairs()) {
add(entry);
}
}
/** Get the set of competitive freq and norm pairs, orderer by increasing freq and norm. */
public SortedSet<FreqAndNorm> getCompetitiveFreqNormPairs() {
public SortedSet<Impact> getCompetitiveFreqNormPairs() {
if (dirty) {
for (int i = 0; i < maxFreqs.length; ++i) {
if (maxFreqs[i] > 0) {
add(new FreqAndNorm(maxFreqs[i], (byte) i));
add(new Impact(maxFreqs[i], (byte) i));
maxFreqs[i] = 0;
}
}
@ -130,8 +94,8 @@ public final class CompetitiveFreqNormAccumulator {
return Collections.unmodifiableSortedSet(freqNormPairs);
}
private void add(FreqAndNorm newEntry) {
FreqAndNorm next = freqNormPairs.ceiling(newEntry);
private void add(Impact newEntry) {
Impact next = freqNormPairs.ceiling(newEntry);
if (next == null) {
// nothing is more competitive
freqNormPairs.add(newEntry);
@ -144,8 +108,8 @@ public final class CompetitiveFreqNormAccumulator {
freqNormPairs.add(newEntry);
}
for (Iterator<FreqAndNorm> it = freqNormPairs.headSet(newEntry, false).descendingIterator(); it.hasNext(); ) {
FreqAndNorm entry = it.next();
for (Iterator<Impact> it = freqNormPairs.headSet(newEntry, false).descendingIterator(); it.hasNext(); ) {
Impact entry = it.next();
if (Long.compareUnsigned(entry.norm, newEntry.norm) >= 0) {
// less competitive
it.remove();

View File

@ -24,7 +24,6 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
@ -72,7 +71,7 @@ public abstract class PostingsReaderBase implements Closeable, Accountable {
* Return a {@link ImpactsEnum} that computes impacts with {@code scorer}.
* @see #postings(FieldInfo, BlockTermState, PostingsEnum, int)
*/
public abstract ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, Similarity.SimScorer scorer, int flags) throws IOException;
public abstract ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException;
/**
* Checks consistency of this reader.

View File

@ -24,7 +24,6 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@ -235,9 +234,9 @@ final class IntersectTermsEnum extends TermsEnum {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
currentFrame.decodeMetaData();
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, scorer, flags);
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, flags);
}
private int getState() {

View File

@ -25,7 +25,6 @@ import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@ -1005,7 +1004,7 @@ final class SegmentTermsEnum extends TermsEnum {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
assert !eof;
//if (DEBUG) {
//System.out.println("BTTR.docs seg=" + segment);
@ -1014,7 +1013,7 @@ final class SegmentTermsEnum extends TermsEnum {
//if (DEBUG) {
//System.out.println(" state=" + currentFrame.state);
//}
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, scorer, flags);
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, flags);
}
@Override

View File

@ -37,7 +37,6 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
@ -946,10 +945,9 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
final PostingsEnum delegate = postings(null, PostingsEnum.FREQS);
final float maxScore = scorer.score(Float.MAX_VALUE, 1);
return new SlowImpactsEnum(delegate, maxScore);
return new SlowImpactsEnum(delegate);
}
}

View File

@ -19,20 +19,19 @@ package org.apache.lucene.codecs.lucene50;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@ -239,13 +238,12 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
}
@Override
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, SimScorer scorer, int flags) throws IOException {
Objects.requireNonNull(scorer);
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException {
if (state.docFreq <= BLOCK_SIZE || version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
// no skip data
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags), scorer.score(Float.MAX_VALUE, 1));
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
}
return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, scorer, flags);
return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, flags);
}
final class BlockDocsEnum extends PostingsEnum {
@ -1367,7 +1365,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
private long seekTo = -1;
public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, SimScorer scorer, int flags) throws IOException {
public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, int flags) throws IOException {
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
@ -1440,8 +1438,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
MAX_SKIP_LEVELS,
indexHasPos,
indexHasOffsets,
indexHasPayloads,
scorer);
indexHasPayloads);
skipper.init(docTermStartFP+termState.skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
if (indexHasFreq == false) {
@ -1544,17 +1541,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public float getMaxScore(int upTo) throws IOException {
return skipper.getMaxScore(upTo);
}
@Override
public int advanceShallow(int target) throws IOException {
public void advanceShallow(int target) throws IOException {
if (target > nextSkipDoc) {
// always plus one to fix the result, since skip position in Lucene50SkipReader
// is a little different from MultiLevelSkipListReader
@ -1580,7 +1567,17 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
nextSkipDoc = skipper.getNextSkipDoc();
}
assert nextSkipDoc >= target;
return nextSkipDoc;
}
@Override
public Impacts getImpacts() throws IOException {
advanceShallow(doc);
return skipper.getImpacts();
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override

View File

@ -31,7 +31,7 @@ import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
import org.apache.lucene.codecs.PushPostingsWriterBase;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
import org.apache.lucene.index.CorruptIndexException;
@ -101,7 +101,7 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
private boolean fieldHasNorms;
private NumericDocValues norms;
private final CompetitiveFreqNormAccumulator competitiveFreqNormAccumulator = new CompetitiveFreqNormAccumulator();
private final CompetitiveImpactAccumulator competitiveFreqNormAccumulator = new CompetitiveImpactAccumulator();
/** Creates a postings writer */
public Lucene50PostingsWriter(SegmentWriteState state) throws IOException {

View File

@ -17,90 +17,143 @@
package org.apache.lucene.codecs.lucene50;
import java.io.IOException;
import java.util.AbstractList;
import java.util.Arrays;
import java.util.Objects;
import java.util.List;
import java.util.RandomAccess;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.index.Impact;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
final class Lucene50ScoreSkipReader extends Lucene50SkipReader {
private final SimScorer scorer;
private final float[] maxScore;
private final byte[][] impacts;
private final int[] impactsLength;
private final float globalMaxScore;
private final byte[][] impactData;
private final int[] impactDataLength;
private final ByteArrayDataInput badi = new ByteArrayDataInput();
private final Impacts impacts;
private int numLevels = 1;
private final MutableImpactList[] perLevelImpacts;
public Lucene50ScoreSkipReader(int version, IndexInput skipStream, int maxSkipLevels,
boolean hasPos, boolean hasOffsets, boolean hasPayloads, SimScorer scorer) {
boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
super(version, skipStream, maxSkipLevels, hasPos, hasOffsets, hasPayloads);
if (version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
throw new IllegalStateException("Cannot skip based on scores if impacts are not indexed");
}
this.scorer = Objects.requireNonNull(scorer);
this.maxScore = new float[maxSkipLevels];
this.impacts = new byte[maxSkipLevels][];
Arrays.fill(impacts, new byte[0]);
this.impactsLength = new int[maxSkipLevels];
this.globalMaxScore = scorer.score(Float.MAX_VALUE, 1);
this.impactData = new byte[maxSkipLevels][];
Arrays.fill(impactData, new byte[0]);
this.impactDataLength = new int[maxSkipLevels];
this.perLevelImpacts = new MutableImpactList[maxSkipLevels];
for (int i = 0; i < perLevelImpacts.length; ++i) {
perLevelImpacts[i] = new MutableImpactList();
}
impacts = new Impacts() {
@Override
public int numLevels() {
return numLevels;
}
@Override
public int getDocIdUpTo(int level) {
return skipDoc[level];
}
@Override
public List<Impact> getImpacts(int level) {
assert level < numLevels;
if (impactDataLength[level] > 0) {
badi.reset(impactData[level], 0, impactDataLength[level]);
perLevelImpacts[level] = readImpacts(badi, perLevelImpacts[level]);
impactDataLength[level] = 0;
}
return perLevelImpacts[level];
}
};
}
@Override
public void init(long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df) throws IOException {
super.init(skipPointer, docBasePointer, posBasePointer, payBasePointer, df);
Arrays.fill(impactsLength, 0);
Arrays.fill(maxScore, globalMaxScore);
public int skipTo(int target) throws IOException {
int result = super.skipTo(target);
if (numberOfSkipLevels > 0) {
numLevels = numberOfSkipLevels;
} else {
// End of postings don't have skip data anymore, so we fill with dummy data
// like SlowImpactsEnum.
numLevels = 1;
perLevelImpacts[0].length = 1;
perLevelImpacts[0].impacts[0].freq = Integer.MAX_VALUE;
perLevelImpacts[0].impacts[0].norm = 1L;
impactDataLength[0] = 0;
}
return result;
}
/** Upper bound of scores up to {@code upTo} included. */
public float getMaxScore(int upTo) throws IOException {
for (int level = 0; level < numberOfSkipLevels; ++level) {
if (upTo <= skipDoc[level]) {
return maxScore(level);
}
}
return globalMaxScore;
}
private float maxScore(int level) throws IOException {
assert level < numberOfSkipLevels;
if (impactsLength[level] > 0) {
badi.reset(impacts[level], 0, impactsLength[level]);
maxScore[level] = readImpacts(badi, scorer);
impactsLength[level] = 0;
}
return maxScore[level];
Impacts getImpacts() {
return impacts;
}
@Override
protected void readImpacts(int level, IndexInput skipStream) throws IOException {
int length = skipStream.readVInt();
if (impacts[level].length < length) {
impacts[level] = new byte[ArrayUtil.oversize(length, Byte.BYTES)];
if (impactData[level].length < length) {
impactData[level] = new byte[ArrayUtil.oversize(length, Byte.BYTES)];
}
skipStream.readBytes(impacts[level], 0, length);
impactsLength[level] = length;
skipStream.readBytes(impactData[level], 0, length);
impactDataLength[level] = length;
}
static float readImpacts(ByteArrayDataInput in, SimScorer scorer) throws IOException {
static MutableImpactList readImpacts(ByteArrayDataInput in, MutableImpactList reuse) {
int maxNumImpacts = in.length(); // at most one impact per byte
if (reuse.impacts.length < maxNumImpacts) {
int oldLength = reuse.impacts.length;
reuse.impacts = ArrayUtil.grow(reuse.impacts, maxNumImpacts);
for (int i = oldLength; i < reuse.impacts.length; ++i) {
reuse.impacts[i] = new Impact(Integer.MAX_VALUE, 1L);
}
}
int freq = 0;
long norm = 0;
float maxScore = 0;
int length = 0;
while (in.getPosition() < in.length()) {
int freqDelta = in.readVInt();
if ((freqDelta & 0x01) != 0) {
freq += 1 + (freqDelta >>> 1);
norm += 1 + in.readZLong();
try {
norm += 1 + in.readZLong();
} catch (IOException e) {
throw new RuntimeException(e); // cannot happen on a BADI
}
} else {
freq += 1 + (freqDelta >>> 1);
norm++;
}
maxScore = Math.max(maxScore, scorer.score(freq, norm));
Impact impact = reuse.impacts[length];
impact.freq = freq;
impact.norm = norm;
length++;
}
reuse.length = length;
return reuse;
}
static class MutableImpactList extends AbstractList<Impact> implements RandomAccess {
int length = 1;
Impact[] impacts = new Impact[] { new Impact(Integer.MAX_VALUE, 1L) };
@Override
public Impact get(int index) {
return impacts[index];
}
@Override
public int size() {
return length;
}
return maxScore;
}
}

View File

@ -200,8 +200,7 @@ class Lucene50SkipReader extends MultiLevelSkipListReader {
return delta;
}
// The default impl skips impacts since they are only useful if we have a SimScorer
// to compute the scores that impacts map to.
// The default impl skips impacts
protected void readImpacts(int level, IndexInput skipStream) throws IOException {
if (version >= Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
// The base implementation skips impacts, they are not used

View File

@ -22,9 +22,9 @@ import java.util.Arrays;
import java.util.Set;
import java.util.SortedSet;
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm;
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
import org.apache.lucene.index.Impact;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMOutputStream;
@ -65,7 +65,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
private long curPayPointer;
private int curPosBufferUpto;
private int curPayloadByteUpto;
private CompetitiveFreqNormAccumulator[] curCompetitiveFreqNorms;
private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
private boolean fieldHasPositions;
private boolean fieldHasOffsets;
private boolean fieldHasPayloads;
@ -85,9 +85,9 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
}
lastPayloadByteUpto = new int[maxSkipLevels];
}
curCompetitiveFreqNorms = new CompetitiveFreqNormAccumulator[maxSkipLevels];
curCompetitiveFreqNorms = new CompetitiveImpactAccumulator[maxSkipLevels];
for (int i = 0; i < maxSkipLevels; ++i) {
curCompetitiveFreqNorms[i] = new CompetitiveFreqNormAccumulator();
curCompetitiveFreqNorms[i] = new CompetitiveImpactAccumulator();
}
}
@ -116,7 +116,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
}
}
if (initialized) {
for (CompetitiveFreqNormAccumulator acc : curCompetitiveFreqNorms) {
for (CompetitiveImpactAccumulator acc : curCompetitiveFreqNorms) {
acc.clear();
}
}
@ -139,7 +139,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
}
// sets of competitive freq,norm pairs should be empty at this point
assert Arrays.stream(curCompetitiveFreqNorms)
.map(CompetitiveFreqNormAccumulator::getCompetitiveFreqNormPairs)
.map(CompetitiveImpactAccumulator::getCompetitiveFreqNormPairs)
.mapToInt(Set::size)
.sum() == 0;
initialized = true;
@ -149,7 +149,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
/**
* Sets the values for the current skip data.
*/
public void bufferSkip(int doc, CompetitiveFreqNormAccumulator competitiveFreqNorms,
public void bufferSkip(int doc, CompetitiveImpactAccumulator competitiveFreqNorms,
int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
initSkip();
this.curDoc = doc;
@ -191,7 +191,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
}
}
CompetitiveFreqNormAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
CompetitiveImpactAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
assert competitiveFreqNorms.getCompetitiveFreqNormPairs().size() > 0;
if (level + 1 < numberOfSkipLevels) {
curCompetitiveFreqNorms[level + 1].addAll(competitiveFreqNorms);
@ -203,14 +203,14 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
competitiveFreqNorms.clear();
}
static void writeImpacts(CompetitiveFreqNormAccumulator acc, IndexOutput out) throws IOException {
SortedSet<FreqAndNorm> freqAndNorms = acc.getCompetitiveFreqNormPairs();
FreqAndNorm previous = new FreqAndNorm(0, 0);
for (FreqAndNorm freqAndNorm : freqAndNorms) {
assert freqAndNorm.freq > previous.freq;
assert Long.compareUnsigned(freqAndNorm.norm, previous.norm) > 0;
int freqDelta = freqAndNorm.freq - previous.freq - 1;
long normDelta = freqAndNorm.norm - previous.norm - 1;
static void writeImpacts(CompetitiveImpactAccumulator acc, IndexOutput out) throws IOException {
SortedSet<Impact> impacts = acc.getCompetitiveFreqNormPairs();
Impact previous = new Impact(0, 0);
for (Impact impact : impacts) {
assert impact.freq > previous.freq;
assert Long.compareUnsigned(impact.norm, previous.norm) > 0;
int freqDelta = impact.freq - previous.freq - 1;
long normDelta = impact.norm - previous.norm - 1;
if (normDelta == 0) {
// most of time, norm only increases by 1, so we can fold everything in a single byte
out.writeVInt(freqDelta << 1);
@ -218,7 +218,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
out.writeVInt((freqDelta << 1) | 1);
out.writeZLong(normDelta);
}
previous = freqAndNorm;
previous = impact;
}
}
}

View File

@ -38,7 +38,6 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
@ -1160,7 +1159,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
throw new UnsupportedOperationException();
}

View File

@ -30,6 +30,7 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MaxScoreCache;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
@ -114,7 +115,8 @@ final class FeatureQuery extends Query {
}
SimScorer scorer = function.scorer(fieldName, boost);
ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
MaxScoreCache maxScoreCache = new MaxScoreCache(impacts, scorer);
return new Scorer(this) {
@ -135,12 +137,12 @@ final class FeatureQuery extends Query {
@Override
public int advanceShallow(int target) throws IOException {
return impacts.advanceShallow(target);
return maxScoreCache.advanceShallow(target);
}
@Override
public float getMaxScore(int upTo) throws IOException {
return impacts.getMaxScore(upTo);
return maxScoreCache.getMaxScore(upTo);
}
};

View File

@ -27,6 +27,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
@ -48,7 +49,6 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@ -1602,58 +1602,45 @@ public final class CheckIndex implements Closeable {
// Checking score blocks is heavy, we only do it on long postings lists, on every 1024th term
// or if slow checks are enabled.
if (doSlowChecks || docFreq > 1024 || (status.termCount + status.delTermCount) % 1024 == 0) {
// Test score blocks
// We only score on freq to keep things simple and not pull norms
SimScorer scorer = new SimScorer(field) {
@Override
public float score(float freq, long norm) {
return freq;
}
};
// First check max scores and block uptos
// But only if slok checks are enabled since we visit all docs
if (doSlowChecks) {
int max = -1;
float maxScore = 0;
ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
int maxFreq = 0;
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
postings = termsEnum.postings(postings, PostingsEnum.FREQS);
for (int doc = impacts.nextDoc(); ; doc = impacts.nextDoc()) {
for (int doc = impactsEnum.nextDoc(); ; doc = impactsEnum.nextDoc()) {
if (postings.nextDoc() != doc) {
throw new RuntimeException("Wrong next doc: " + doc + ", expected " + postings.docID());
}
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
if (postings.freq() != impacts.freq()) {
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq());
if (postings.freq() != impactsEnum.freq()) {
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impactsEnum.freq());
}
if (doc > max) {
max = impacts.advanceShallow(doc);
if (max < doc) {
throw new RuntimeException("max block doc id " + max + " must be greater than the target: " + doc);
}
maxScore = impacts.getMaxScore(max);
impactsEnum.advanceShallow(doc);
Impacts impacts = impactsEnum.getImpacts();
checkImpacts(impacts, doc);
max = impacts.getDocIdUpTo(0);
List<Impact> impacts0 = impacts.getImpacts(0);
maxFreq = impacts0.get(impacts0.size() - 1).freq;
}
int max2 = impacts.advanceShallow(doc);
if (max != max2) {
throw new RuntimeException("max is not stable, initially had " + max + " but now " + max2);
}
float score = scorer.score(impacts.freq(), 1);
if (score > maxScore) {
throw new RuntimeException("score " + score + " is greater than the max score " + maxScore);
if (impactsEnum.freq() > maxFreq) {
throw new RuntimeException("freq " + impactsEnum.freq() + " is greater than the max freq according to impacts " + maxFreq);
}
}
}
// Now check advancing
ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
postings = termsEnum.postings(postings, PostingsEnum.FREQS);
int max = -1;
float maxScore = 0;
int maxFreq = 0;
while (true) {
int doc = impacts.docID();
int doc = impactsEnum.docID();
boolean advance;
int target;
if (((field.hashCode() + doc) & 1) == 1) {
@ -1662,23 +1649,29 @@ public final class CheckIndex implements Closeable {
} else {
advance = true;
int delta = Math.min(1 + ((31 * field.hashCode() + doc) & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
target = impacts.docID() + delta;
target = impactsEnum.docID() + delta;
}
if (target > max && target % 2 == 1) {
int delta = Math.min((31 * field.hashCode() + target) & 0x1ff, DocIdSetIterator.NO_MORE_DOCS - target);
max = target + delta;
int m = impacts.advanceShallow(target);
if (m < target) {
throw new RuntimeException("Block max doc: " + m + " is less than the target " + target);
impactsEnum.advanceShallow(target);
Impacts impacts = impactsEnum.getImpacts();
checkImpacts(impacts, doc);
maxFreq = Integer.MAX_VALUE;
for (int level = 0; level < impacts.numLevels(); ++level) {
if (impacts.getDocIdUpTo(level) >= max) {
List<Impact> perLevelImpacts = impacts.getImpacts(level);
maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
break;
}
}
maxScore = impacts.getMaxScore(max);
}
if (advance) {
doc = impacts.advance(target);
doc = impactsEnum.advance(target);
} else {
doc = impacts.nextDoc();
doc = impactsEnum.nextDoc();
}
if (postings.advance(target) != doc) {
@ -1687,23 +1680,28 @@ public final class CheckIndex implements Closeable {
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
if (postings.freq() != impacts.freq()) {
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq());
if (postings.freq() != impactsEnum.freq()) {
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impactsEnum.freq());
}
if (doc >= max) {
int delta = Math.min((31 * field.hashCode() + target & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
max = doc + delta;
int m = impacts.advanceShallow(doc);
if (m < doc) {
throw new RuntimeException("Block max doc: " + m + " is less than the target " + doc);
impactsEnum.advanceShallow(doc);
Impacts impacts = impactsEnum.getImpacts();
checkImpacts(impacts, doc);
maxFreq = Integer.MAX_VALUE;
for (int level = 0; level < impacts.numLevels(); ++level) {
if (impacts.getDocIdUpTo(level) >= max) {
List<Impact> perLevelImpacts = impacts.getImpacts(level);
maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
break;
}
}
maxScore = impacts.getMaxScore(max);
}
float score = scorer.score(impacts.freq(), 1);
if (score > maxScore) {
throw new RuntimeException("score " + score + " is greater than the max score " + maxScore);
if (impactsEnum.freq() > maxFreq) {
throw new RuntimeException("Term frequency " + impactsEnum.freq() + " is greater than the max freq according to impacts " + maxFreq);
}
}
}
@ -1850,6 +1848,68 @@ public final class CheckIndex implements Closeable {
return status;
}
static void checkImpacts(Impacts impacts, int lastTarget) {
final int numLevels = impacts.numLevels();
if (numLevels < 1) {
throw new RuntimeException("The number of levels must be >= 1, got " + numLevels);
}
int docIdUpTo0 = impacts.getDocIdUpTo(0);
if (docIdUpTo0 < lastTarget) {
throw new RuntimeException("getDocIdUpTo returned " + docIdUpTo0 + " on level 0, which is less than the target " + lastTarget);
}
for (int level = 1; level < numLevels; ++level) {
int docIdUpTo = impacts.getDocIdUpTo(level);
int previousDocIdUpTo = impacts.getDocIdUpTo(level - 1);
if (docIdUpTo < previousDocIdUpTo) {
throw new RuntimeException("Decreasing return for getDocIdUpTo: level " + (level-1) + " returned " + previousDocIdUpTo
+ " but level " + level + " returned " + docIdUpTo + " for target " + lastTarget);
}
}
for (int level = 0; level < numLevels; ++level) {
List<Impact> perLevelImpacts = impacts.getImpacts(level);
if (perLevelImpacts.isEmpty()) {
throw new RuntimeException("Got empty list of impacts on level " + level);
}
Impact first = perLevelImpacts.get(0);
if (first.freq < 1) {
throw new RuntimeException("First impact had a freq <= 0: " + first);
}
if (first.norm == 0) {
throw new RuntimeException("First impact had a norm == 0: " + first);
}
// Impacts must be in increasing order of norm AND freq
Impact previous = first;
for (int i = 1; i < perLevelImpacts.size(); ++i) {
Impact impact = perLevelImpacts.get(i);
if (impact.freq <= previous.freq || Long.compareUnsigned(impact.norm, previous.norm) <= 0) {
throw new RuntimeException("Impacts are not ordered or contain dups, got " + previous + " then " + impact);
}
}
if (level > 0) {
// Make sure that impacts at level N trigger better scores than an level N-1
Iterator<Impact> previousIt = impacts.getImpacts(level-1).iterator();
previous = previousIt.next();
Iterator<Impact> it = perLevelImpacts.iterator();
Impact impact = it.next();
while (previousIt.hasNext()) {
previous = previousIt.next();
if (previous.freq <= impact.freq && Long.compareUnsigned(previous.norm, impact.norm) >= 0) {
// previous triggers a lower score than the current impact, all good
continue;
}
if (it.hasNext() == false) {
throw new RuntimeException("Found impact " + previous + " on level " + (level-1) + " but no impact on level "
+ level + " triggers a better score: " + perLevelImpacts);
}
impact = it.next();
}
}
}
}
/**
* Test the term index.
* @lucene.experimental

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -216,8 +215,8 @@ public abstract class FilterLeafReader extends LeafReader {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return in.impacts(scorer, flags);
public ImpactsEnum impacts(int flags) throws IOException {
return in.impacts(flags);
}
}

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
/**
@ -184,8 +183,8 @@ public abstract class FilteredTermsEnum extends TermsEnum {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return tenum.impacts(scorer, flags);
public ImpactsEnum impacts(int flags) throws IOException {
return tenum.impacts(flags);
}
/** This enum does not support seeking!

View File

@ -24,7 +24,6 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -275,7 +274,7 @@ class FreqProxFields extends Fields {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
throw new UnsupportedOperationException();
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
/**
* Per-document scoring factors.
*/
public final class Impact {
/**
* Term frequency of the term in the document.
*/
public int freq;
/**
* Norm factor of the document.
*/
public long norm;
/**
* Constructor.
*/
public Impact(int freq, long norm) {
this.freq = freq;
this.norm = norm;
}
@Override
public String toString() {
return "{freq=" + freq + ",norm=" + norm + "}";
}
@Override
public int hashCode() {
int h = freq;
h = 31 * h + Long.hashCode(norm);
return h;
}
@Override
public boolean equals(Object obj) {
if (obj == null || getClass() != obj.getClass()) return false;
Impact other = (Impact) obj;
return freq == other.freq && norm == other.norm;
}
}

View File

@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.util.List;
/**
* Information about upcoming impacts, ie. (freq, norm) pairs.
*/
public abstract class Impacts {
/**
* Return the number of levels on which we have impacts.
* The returned value is always greater than 0 and may not always be the
* same, even on a single postings list, depending on the current doc ID.
*/
public abstract int numLevels();
/**
* Return the maximum inclusive doc ID until which the list of impacts
* returned by {@link #getImpacts(int)} is valid. This is a non-decreasing
* function of {@code level}.
*/
public abstract int getDocIdUpTo(int level);
/**
* Return impacts on the given level. These impacts are sorted by increasing
* frequency and increasing unsigned norm, and only valid until the doc ID
* returned by {@link #getDocIdUpTo(int)} for the same level, included.
* The returned list is never empty.
* NOTE: There is no guarantee that these impacts actually appear in postings,
* only that they trigger scores that are greater than or equal to the impacts
* that actually appear in postings.
*/
public abstract List<Impact> getImpacts(int level);
}

View File

@ -18,11 +18,9 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
/**
* Extension of {@link PostingsEnum} which also provides information about the
* produced scores.
* Extension of {@link PostingsEnum} which also provides information about
* upcoming impacts.
* @lucene.experimental
*/
public abstract class ImpactsEnum extends PostingsEnum {
@ -31,23 +29,28 @@ public abstract class ImpactsEnum extends PostingsEnum {
protected ImpactsEnum() {}
/**
* Advance to the block of documents that contains {@code target} in order to
* get scoring information about this block. This method is implicitly called
* by {@link DocIdSetIterator#advance(int)} and
* {@link DocIdSetIterator#nextDoc()}. Calling this method doesn't modify the
* current {@link DocIdSetIterator#docID()}.
* It returns a number that is greater than or equal to all documents
* contained in the current block, but less than any doc IDS of the next block.
* {@code target} must be &gt;= {@link #docID()} as well as all targets that
* have been passed to {@link #advanceShallow(int)} so far.
* Shallow-advance to {@code target}. This is cheaper than calling
* {@link #advance(int)} and allows further calls to {@link #getImpacts()}
* to ignore doc IDs that are less than {@code target} in order to get more
* precise information about impacts.
* This method may not be called on targets that are less than the current
* {@link #docID()}.
* After this method has been called, {@link #nextDoc()} may not be called
* if the current doc ID is less than {@code target - 1} and
* {@link #advance(int)} may not be called on targets that are less than
* {@code target}.
*/
public abstract int advanceShallow(int target) throws IOException;
public abstract void advanceShallow(int target) throws IOException;
/**
* Return the maximum score that documents between the last {@code target}
* that this iterator was {@link #advanceShallow(int) shallow-advanced} to
* included and {@code upTo} included.
* Get information about upcoming impacts for doc ids that are greater than
* or equal to the maximum of {@link #docID()} and the last target that was
* passed to {@link #advanceShallow(int)}.
* This method may not be called on an unpositioned iterator on which
* {@link #advanceShallow(int)} has never been called.
* NOTE: advancing this iterator may invalidate the returned impacts, so they
* should not be used after the iterator has been advanced.
*/
public abstract float getMaxScore(int upTo) throws IOException;
public abstract Impacts getImpacts() throws IOException;
}

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -369,9 +368,9 @@ public final class MultiTermsEnum extends TermsEnum {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
// implemented to not fail CheckIndex, but you shouldn't be using impacts on a slow reader
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
return new SlowImpactsEnum(postings(null, flags));
}
final static class TermsEnumWithSlice {

View File

@ -17,23 +17,45 @@
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
/**
* {@link ImpactsEnum} that doesn't index impacts but implements the API in a
* legal way. This should typically be used for short postings that do not need
* legal way. This is typically used for short postings that do not need
* skipping.
*/
public final class SlowImpactsEnum extends ImpactsEnum {
private static final Impacts DUMMY_IMPACTS = new Impacts() {
private final List<Impact> impacts = Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
@Override
public int numLevels() {
return 1;
}
@Override
public int getDocIdUpTo(int level) {
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public List<Impact> getImpacts(int level) {
return impacts;
}
};
private final PostingsEnum delegate;
private final float maxScore;
/** Wrap the given {@link PostingsEnum}. */
public SlowImpactsEnum(PostingsEnum delegate, float maxScore) {
public SlowImpactsEnum(PostingsEnum delegate) {
this.delegate = delegate;
this.maxScore = maxScore;
}
@Override
@ -82,13 +104,10 @@ public final class SlowImpactsEnum extends ImpactsEnum {
}
@Override
public int advanceShallow(int target) {
return NO_MORE_DOCS;
}
public void advanceShallow(int target) {}
@Override
public float getMaxScore(int maxDoc) {
return maxScore;
public Impacts getImpacts() {
return DUMMY_IMPACTS;
}
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -111,7 +110,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
throw new UnsupportedOperationException();
}

View File

@ -17,7 +17,6 @@
package org.apache.lucene.index;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -111,7 +110,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
throw new UnsupportedOperationException();
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
@ -171,10 +170,10 @@ public abstract class TermsEnum implements BytesRefIterator {
public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException;
/**
* Return a {@link ImpactsEnum} that computes impacts with {@code scorer}.
* Return a {@link ImpactsEnum}.
* @see #postings(PostingsEnum, int)
*/
public abstract ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException;
public abstract ImpactsEnum impacts(int flags) throws IOException;
/**
* Expert: Returns the TermsEnums internal state to position the TermsEnum
@ -236,7 +235,7 @@ public abstract class TermsEnum implements BytesRefIterator {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
throw new IllegalStateException("this method should never be called");
}

View File

@ -38,6 +38,9 @@ final class BlockMaxConjunctionScorer extends Scorer {
BlockMaxConjunctionScorer(Weight weight, Collection<Scorer> scorersList) throws IOException {
super(weight);
this.scorers = scorersList.toArray(new Scorer[scorersList.size()]);
for (Scorer scorer : scorers) {
scorer.advanceShallow(0);
}
this.maxScorePropagator = new MaxScoreSumPropagator(scorersList);
// Put scorers with the higher max scores first

View File

@ -23,7 +23,6 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
@ -275,8 +274,8 @@ public final class FuzzyTermsEnum extends TermsEnum {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return actualEnum.impacts(scorer, flags);
public ImpactsEnum impacts(int flags) throws IOException {
return actualEnum.impacts(flags);
}
@Override

View File

@ -0,0 +1,138 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.index.Impact;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.ArrayUtil;
/**
* Compute maximum scores based on {@link Impacts} and keep them in a cache in
* order not to run expensive similarity score computations multiple times on
* the same data.
*/
public final class MaxScoreCache {
private final ImpactsEnum impactsEnum;
private final SimScorer scorer;
private final float globalMaxScore;
private float[] maxScoreCache;
private int[] maxScoreCacheUpTo;
/**
* Sole constructor.
*/
public MaxScoreCache(ImpactsEnum impactsEnum, SimScorer scorer) {
this.impactsEnum = impactsEnum;
this.scorer = scorer;
globalMaxScore = scorer.score(Integer.MAX_VALUE, 1L);
maxScoreCache = new float[0];
maxScoreCacheUpTo = new int[0];
}
private void ensureCacheSize(int size) {
if (maxScoreCache.length < size) {
int oldLength = maxScoreCache.length;
maxScoreCache = ArrayUtil.grow(maxScoreCache, size);
maxScoreCacheUpTo = Arrays.copyOf(maxScoreCacheUpTo, maxScoreCache.length);
Arrays.fill(maxScoreCacheUpTo, oldLength, maxScoreCacheUpTo.length, -1);
}
}
private float computeMaxScore(List<Impact> impacts) {
float maxScore = 0;
for (Impact impact : impacts) {
maxScore = Math.max(scorer.score(impact.freq, impact.norm), maxScore);
}
return maxScore;
}
/**
* Return the first level that includes all doc IDs up to {@code upTo},
* or -1 if there is no such level.
*/
private int getLevel(int upTo) throws IOException {
final Impacts impacts = impactsEnum.getImpacts();
for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
final int impactsUpTo = impacts.getDocIdUpTo(level);
if (upTo <= impactsUpTo) {
return level;
}
}
return -1;
}
/**
* Return the maximum score for the given {@code level}.
*/
float getMaxScoreForLevel(int level) throws IOException {
final Impacts impacts = impactsEnum.getImpacts();
ensureCacheSize(level + 1);
final int levelUpTo = impacts.getDocIdUpTo(level);
if (maxScoreCacheUpTo[level] < levelUpTo) {
maxScoreCache[level] = computeMaxScore(impacts.getImpacts(level));
maxScoreCacheUpTo[level] = levelUpTo;
}
return maxScoreCache[level];
}
/**
* Return the maximum level at which scores are all less than {@code minScore},
* or -1 if none.
*/
int getSkipLevel(float minScore) throws IOException {
final Impacts impacts = impactsEnum.getImpacts();
final int numLevels = impacts.numLevels();
for (int level = 0; level < numLevels; ++level) {
if (getMaxScoreForLevel(level) >= minScore) {
return level - 1;
}
}
return numLevels - 1;
}
/**
* Implement the contract of {@link Scorer#advanceShallow(int)} based on the
* wrapped {@link ImpactsEnum}.
* @see Scorer#advanceShallow(int)
*/
public int advanceShallow(int target) throws IOException {
impactsEnum.advanceShallow(target);
Impacts impacts = impactsEnum.getImpacts();
return impacts.getDocIdUpTo(0);
}
/**
* Implement the contract of {@link Scorer#getMaxScore(int)} based on the
* wrapped {@link ImpactsEnum} and {@link Scorer}.
* @see Scorer#getMaxScore(int)
*/
public float getMaxScore(int upTo) throws IOException {
final int level = getLevel(upTo);
if (level == -1) {
return globalMaxScore;
} else {
return getMaxScoreForLevel(level);
}
}
}

View File

@ -161,8 +161,8 @@ public abstract class Scorer {
* Advance to the block of documents that contains {@code target} in order to
* get scoring information about this block. This method is implicitly called
* by {@link DocIdSetIterator#advance(int)} and
* {@link DocIdSetIterator#nextDoc()}. Calling this method doesn't modify the
* current {@link DocIdSetIterator#docID()}.
* {@link DocIdSetIterator#nextDoc()} on the returned doc ID. Calling this
* method doesn't modify the current {@link DocIdSetIterator#docID()}.
* It returns a number that is greater than or equal to all documents
* contained in the current block, but less than any doc IDS of the next block.
* {@code target} must be &gt;= {@link #docID()} as well as all targets that

View File

@ -19,6 +19,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.Impacts;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SlowImpactsEnum;
@ -31,6 +32,7 @@ final class TermScorer extends Scorer {
private final ImpactsEnum impactsEnum;
private final DocIdSetIterator iterator;
private final LeafSimScorer docScorer;
private final MaxScoreCache maxScoreCache;
private float minCompetitiveScore;
/**
@ -47,7 +49,8 @@ final class TermScorer extends Scorer {
super(weight);
this.docScorer = docScorer;
if (scoreMode == ScoreMode.TOP_SCORES) {
impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.FREQS);
impactsEnum = te.impacts(PostingsEnum.FREQS);
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
postingsEnum = impactsEnum;
iterator = new DocIdSetIterator() {
@ -61,8 +64,10 @@ final class TermScorer extends Scorer {
}
if (target > upTo) {
upTo = impactsEnum.advanceShallow(target);
maxScore = impactsEnum.getMaxScore(upTo);
impactsEnum.advanceShallow(target);
Impacts impacts = impactsEnum.getImpacts();
upTo = impacts.getDocIdUpTo(0);
maxScore = maxScoreCache.getMaxScoreForLevel(0);
}
while (true) {
@ -76,10 +81,23 @@ final class TermScorer extends Scorer {
return NO_MORE_DOCS;
}
target = upTo + 1;
upTo = impactsEnum.advanceShallow(target);
maxScore = impactsEnum.getMaxScore(upTo);
impactsEnum.advanceShallow(upTo + 1);
Impacts impacts = impactsEnum.getImpacts();
final int level = maxScoreCache.getSkipLevel(minCompetitiveScore);
if (level >= 0) {
// we can skip more docs
int newUpTo = impacts.getDocIdUpTo(level);
if (newUpTo == NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
target = newUpTo + 1;
impactsEnum.advanceShallow(target);
impacts = impactsEnum.getImpacts();
} else {
target = upTo + 1;
}
upTo = impacts.getDocIdUpTo(0);
maxScore = maxScoreCache.getMaxScoreForLevel(0);
}
}
@ -105,7 +123,8 @@ final class TermScorer extends Scorer {
};
} else {
postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE);
impactsEnum = new SlowImpactsEnum(postingsEnum, docScorer.getSimScorer().score(Float.MAX_VALUE, 1));
impactsEnum = new SlowImpactsEnum(postingsEnum);
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
iterator = postingsEnum;
}
}
@ -132,12 +151,12 @@ final class TermScorer extends Scorer {
@Override
public int advanceShallow(int target) throws IOException {
return impactsEnum.advanceShallow(target);
return maxScoreCache.advanceShallow(target);
}
@Override
public float getMaxScore(int upTo) throws IOException {
return impactsEnum.getMaxScore(upTo);
return maxScoreCache.getMaxScore(upTo);
}
@Override
@ -148,4 +167,5 @@ final class TermScorer extends Scorer {
/** Returns a string representation of this <code>TermScorer</code>. */
@Override
public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; }
}

View File

@ -139,6 +139,7 @@ final class WANDScorer extends Scorer {
OptionalInt scalingFactor = OptionalInt.empty();
for (Scorer scorer : scorers) {
scorer.advanceShallow(0);
float maxScore = scorer.getMaxScore(DocIdSetIterator.NO_MORE_DOCS);
if (maxScore != 0 && Float.isFinite(maxScore)) {
// 0 and +Infty should not impact the scale

View File

@ -20,85 +20,85 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm;
import org.apache.lucene.index.Impact;
import org.apache.lucene.util.LuceneTestCase;
public class TestCompetitiveFreqNormAccumulator extends LuceneTestCase {
public void testBasics() {
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
Set<FreqAndNorm> expected = new HashSet<>();
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
Set<Impact> expected = new HashSet<>();
acc.add(3, 5);
expected.add(new FreqAndNorm(3, 5));
expected.add(new Impact(3, 5));
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
acc.add(6, 11);
expected.add(new FreqAndNorm(6, 11));
expected.add(new Impact(6, 11));
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
acc.add(10, 13);
expected.add(new FreqAndNorm(10, 13));
expected.add(new Impact(10, 13));
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
acc.add(1, 2);
expected.add(new FreqAndNorm(1, 2));
expected.add(new Impact(1, 2));
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
acc.add(7, 9);
expected.remove(new FreqAndNorm(6, 11));
expected.add(new FreqAndNorm(7, 9));
expected.remove(new Impact(6, 11));
expected.add(new Impact(7, 9));
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
acc.add(8, 2);
expected.clear();
expected.add(new FreqAndNorm(10, 13));
expected.add(new FreqAndNorm(8, 2));
expected.add(new Impact(10, 13));
expected.add(new Impact(8, 2));
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
}
public void testExtremeNorms() {
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
Set<FreqAndNorm> expected = new HashSet<>();
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
Set<Impact> expected = new HashSet<>();
acc.add(3, 5);
expected.add(new FreqAndNorm(3, 5));
expected.add(new Impact(3, 5));
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
acc.add(10, 10000);
expected.add(new FreqAndNorm(10, 10000));
expected.add(new Impact(10, 10000));
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
acc.add(5, 200);
expected.add(new FreqAndNorm(5, 200));
expected.add(new Impact(5, 200));
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
acc.add(20, -100);
expected.add(new FreqAndNorm(20, -100));
expected.add(new Impact(20, -100));
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
acc.add(30, -3);
expected.add(new FreqAndNorm(30, -3));
expected.add(new Impact(30, -3));
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
}
public void testOmitFreqs() {
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
acc.add(1, 5);
acc.add(1, 7);
acc.add(1, 4);
assertEquals(Collections.singleton(new FreqAndNorm(1, 4)), acc.getCompetitiveFreqNormPairs());
assertEquals(Collections.singleton(new Impact(1, 4)), acc.getCompetitiveFreqNormPairs());
}
public void testOmitNorms() {
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
acc.add(5, 1);
acc.add(7, 1);
acc.add(4, 1);
assertEquals(Collections.singleton(new FreqAndNorm(7, 1)), acc.getCompetitiveFreqNormPairs());
assertEquals(Collections.singleton(new Impact(7, 1)), acc.getCompetitiveFreqNormPairs());
}
}

View File

@ -18,19 +18,23 @@ package org.apache.lucene.codecs.lucene50;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
import org.apache.lucene.codecs.blocktree.FieldReader;
import org.apache.lucene.codecs.blocktree.Stats;
import org.apache.lucene.codecs.lucene50.Lucene50ScoreSkipReader.MutableImpactList;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Impact;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@ -89,33 +93,43 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
public void testImpactSerialization() throws IOException {
// omit norms and omit freqs
doTestImpactSerialization(new int[] { 1 }, new long[] { 1L });
doTestImpactSerialization(Collections.singletonList(new Impact(1, 1L)));
// omit freqs
doTestImpactSerialization(new int[] { 1 }, new long[] { 42L });
doTestImpactSerialization(Collections.singletonList(new Impact(1, 42L)));
// omit freqs with very large norms
doTestImpactSerialization(new int[] { 1 }, new long[] { -100L });
doTestImpactSerialization(Collections.singletonList(new Impact(1, -100L)));
// omit norms
doTestImpactSerialization(new int[] { 30 }, new long[] { 1L });
doTestImpactSerialization(Collections.singletonList(new Impact(30, 1L)));
// omit norms with large freq
doTestImpactSerialization(new int[] { 500 }, new long[] { 1L });
doTestImpactSerialization(Collections.singletonList(new Impact(500, 1L)));
// freqs and norms, basic
doTestImpactSerialization(
new int[] { 1, 3, 7, 15, 20, 28 },
new long[] { 7L, 9L, 10L, 11L, 13L, 14L });
Arrays.asList(
new Impact(1, 7L),
new Impact(3, 9L),
new Impact(7, 10L),
new Impact(15, 11L),
new Impact(20, 13L),
new Impact(28, 14L)));
// freqs and norms, high values
doTestImpactSerialization(
new int[] { 2, 10, 12, 50, 1000, 1005 },
new long[] { 2L, 10L, 50L, -100L, -80L, -3L });
Arrays.asList(
new Impact(2, 2L),
new Impact(10, 10L),
new Impact(12, 50L),
new Impact(50, -100L),
new Impact(1000, -80L),
new Impact(1005, -3L)));
}
private void doTestImpactSerialization(int[] freqs, long[] norms) throws IOException {
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
for (int i = 0; i < freqs.length; ++i) {
acc.add(freqs[i], norms[i]);
private void doTestImpactSerialization(List<Impact> impacts) throws IOException {
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
for (Impact impact : impacts) {
acc.add(impact.freq, impact.norm);
}
try(Directory dir = newDirectory()) {
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
@ -124,17 +138,8 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
byte[] b = new byte[Math.toIntExact(in.length())];
in.readBytes(b, 0, b.length);
Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new SimScorer("") {
int i = 0;
@Override
public float score(float freq, long norm) {
assert freq == freqs[i];
assert norm == norms[i];
i++;
return 0;
}
});
List<Impact> impacts2 = Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new MutableImpactList());
assertEquals(impacts, impacts2);
}
}
}

View File

@ -34,7 +34,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@ -680,7 +679,7 @@ public class TestCodecs extends LuceneTestCase {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
throw new UnsupportedOperationException();
}
}

View File

@ -42,7 +42,6 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
@ -1429,8 +1428,8 @@ public class MemoryIndex {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1L));
public ImpactsEnum impacts(int flags) throws IOException {
return new SlowImpactsEnum(postings(null, flags));
}
@Override

View File

@ -21,11 +21,10 @@ import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
@ -90,7 +89,7 @@ final class IDVersionPostingsReader extends PostingsReaderBase {
}
@Override
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException {
throw new UnsupportedOperationException("Should never be called, IDVersionSegmentTermsEnum implements impacts directly");
}

View File

@ -25,7 +25,6 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
@ -1009,10 +1008,10 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
// Only one posting, the slow impl is fine
// We could make this throw UOE but then CheckIndex is angry
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
return new SlowImpactsEnum(postings(null, flags));
}
@Override

View File

@ -45,7 +45,6 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Accountable;
@ -477,8 +476,8 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1));
public ImpactsEnum impacts(int flags) throws IOException {
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS));
}
}

View File

@ -18,12 +18,12 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
@ -211,12 +211,12 @@ public class AssertingLeafReader extends FilterLeafReader {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
assertThread("Terms enums", creationThread);
assert state == State.POSITIONED: "docs(...) called on unpositioned TermsEnum";
assert (flags & PostingsEnum.FREQS) != 0 : "Freqs should be requested on impacts";
return new AssertingImpactsEnum(super.impacts(scorer, flags));
return new AssertingImpactsEnum(super.impacts(flags));
}
// TODO: we should separately track if we are 'at the end' ?
@ -454,7 +454,7 @@ public class AssertingLeafReader extends FilterLeafReader {
private final AssertingPostingsEnum assertingPostings;
private final ImpactsEnum in;
private int lastShallowTarget;
private int lastShallowTarget = -1;
AssertingImpactsEnum(ImpactsEnum impacts) {
in = impacts;
@ -463,20 +463,19 @@ public class AssertingLeafReader extends FilterLeafReader {
}
@Override
public int advanceShallow(int target) throws IOException {
public void advanceShallow(int target) throws IOException {
assert target >= lastShallowTarget : "called on decreasing targets: target = " + target + " < last target = " + lastShallowTarget;
assert target >= docID() : "target = " + target + " < docID = " + docID();
int upTo = in.advanceShallow(target);
assert upTo >= target : "upTo = " + upTo + " < target = " + target;
lastShallowTarget = target;
return upTo;
in.advanceShallow(target);
}
@Override
public float getMaxScore(int upTo) throws IOException {
assert upTo >= lastShallowTarget : "uTo = " + upTo + " < last shallow target = " + lastShallowTarget;
float maxScore = in.getMaxScore(upTo);
return maxScore;
public Impacts getImpacts() throws IOException {
assert docID() >= 0 || lastShallowTarget >= 0 : "Cannot get impacts until the iterator is positioned or advanceShallow has been called";
Impacts impacts = in.getImpacts();
CheckIndex.checkImpacts(impacts, Math.max(docID(), lastShallowTarget));
return new AssertingImpacts(impacts, this);
}
@Override
@ -527,6 +526,38 @@ public class AssertingLeafReader extends FilterLeafReader {
}
}
static class AssertingImpacts extends Impacts {
private final Impacts in;
private final AssertingImpactsEnum impactsEnum;
private final int validFor;
AssertingImpacts(Impacts in, AssertingImpactsEnum impactsEnum) {
this.in = in;
this.impactsEnum = impactsEnum;
validFor = Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget);
}
@Override
public int numLevels() {
assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget) : "Cannot reuse impacts after advancing the iterator";
return in.numLevels();
}
@Override
public int getDocIdUpTo(int level) {
assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget) : "Cannot reuse impacts after advancing the iterator";
return in.getDocIdUpTo(level);
}
@Override
public List<Impact> getImpacts(int level) {
assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget) : "Cannot reuse impacts after advancing the iterator";
return in.getImpacts(level);
}
}
/** Wraps a NumericDocValues but with additional asserts */
public static class AssertingNumericDocValues extends NumericDocValues {
private final Thread creationThread = Thread.currentThread();

View File

@ -16,11 +16,18 @@
*/
package org.apache.lucene.index;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
@ -33,13 +40,13 @@ import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.function.IntToLongFunction;
import java.util.stream.Collectors;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
@ -55,12 +62,6 @@ import org.apache.lucene.util.automaton.AutomatonTestUtil;
import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
/** Helper class extracted from BasePostingsFormatTestCase to exercise a postings format. */
public class RandomPostingsTester {
@ -608,7 +609,7 @@ public class RandomPostingsTester {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
public ImpactsEnum impacts(int flags) throws IOException {
throw new UnsupportedOperationException();
}
}
@ -1055,126 +1056,146 @@ public class RandomPostingsTester {
} else {
docToNorm = doc -> 1L;
}
for (int s = 0; s < 3; ++s) {
final int scoreMode = s;
SimScorer scorer = new SimScorer(field) {
@Override
public float score(float freq, long norm) {
switch (scoreMode) {
case 0:
return freq; // make sure the postings record the best freq
case 1:
return 1f / norm; // make sure the postings record the best norm
default:
return freq - norm + MAX_NORM; // now a combination that could make intermediate pairs more competitive
}
}
};
// First check max scores and block uptos
int max = -1;
float maxScore = 0;
int flags = PostingsEnum.FREQS;
if (doCheckPositions) {
flags |= PostingsEnum.POSITIONS;
// First check impacts and block uptos
int max = -1;
List<Impact> impactsCopy = null;
int flags = PostingsEnum.FREQS;
if (doCheckPositions) {
flags |= PostingsEnum.POSITIONS;
if (doCheckOffsets) {
flags |= PostingsEnum.OFFSETS;
}
if (doCheckPayloads) {
flags |= PostingsEnum.PAYLOADS;
}
}
ImpactsEnum impactsEnum = termsEnum.impacts(flags);
PostingsEnum postings = termsEnum.postings(null, flags);
for (int doc = impactsEnum.nextDoc(); ; doc = impactsEnum.nextDoc()) {
assertEquals(postings.nextDoc(), doc);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
int freq = postings.freq();
assertEquals("freq is wrong", freq, impactsEnum.freq());
for (int i = 0; i < freq; ++i) {
int pos = postings.nextPosition();
assertEquals("position is wrong", pos, impactsEnum.nextPosition());
if (doCheckOffsets) {
flags |= PostingsEnum.OFFSETS;
assertEquals("startOffset is wrong", postings.startOffset(), impactsEnum.startOffset());
assertEquals("endOffset is wrong", postings.endOffset(), impactsEnum.endOffset());
}
if (doCheckPayloads) {
flags |= PostingsEnum.PAYLOADS;
assertEquals("payload is wrong", postings.getPayload(), impactsEnum.getPayload());
}
}
if (doc > max) {
impactsEnum.advanceShallow(doc);
Impacts impacts = impactsEnum.getImpacts();
CheckIndex.checkImpacts(impacts, doc);
impactsCopy = impacts.getImpacts(0)
.stream()
.map(i -> new Impact(i.freq, i.norm))
.collect(Collectors.toList());
}
freq = impactsEnum.freq();
long norm = docToNorm.applyAsLong(doc);
int idx = Collections.binarySearch(impactsCopy, new Impact(freq, norm), Comparator.comparing(i -> i.freq));
if (idx < 0) {
idx = -1 - idx;
}
assertTrue("Got " + new Impact(freq, norm) + " in postings, but no impact triggers equal or better scores in " + impactsCopy,
idx <= impactsCopy.size() && impactsCopy.get(idx).norm <= norm);
}
// Now check advancing
impactsEnum = termsEnum.impacts(flags);
postings = termsEnum.postings(postings, flags);
max = -1;
while (true) {
int doc = impactsEnum.docID();
boolean advance;
int target;
if (random.nextBoolean()) {
advance = false;
target = doc + 1;
} else {
advance = true;
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
target = impactsEnum.docID() + delta;
}
if (target > max && random.nextBoolean()) {
int delta = Math.min(random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - target);
max = target + delta;
impactsEnum.advanceShallow(target);
Impacts impacts = impactsEnum.getImpacts();
CheckIndex.checkImpacts(impacts, target);
impactsCopy = Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
for (int level = 0; level < impacts.numLevels(); ++level) {
if (impacts.getDocIdUpTo(level) >= max) {
impactsCopy = impacts.getImpacts(level)
.stream()
.map(i -> new Impact(i.freq, i.norm))
.collect(Collectors.toList());
break;
}
}
}
ImpactsEnum impacts = termsEnum.impacts(scorer, flags);
PostingsEnum postings = termsEnum.postings(null, flags);
for (int doc = impacts.nextDoc(); ; doc = impacts.nextDoc()) {
assertEquals(postings.nextDoc(), doc);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
int freq = postings.freq();
assertEquals("freq is wrong", freq, impacts.freq());
for (int i = 0; i < freq; ++i) {
int pos = postings.nextPosition();
assertEquals("position is wrong", pos, impacts.nextPosition());
if (doCheckOffsets) {
assertEquals("startOffset is wrong", postings.startOffset(), impacts.startOffset());
assertEquals("endOffset is wrong", postings.endOffset(), impacts.endOffset());
}
if (doCheckPayloads) {
assertEquals("payload is wrong", postings.getPayload(), impacts.getPayload());
}
}
if (doc > max) {
max = impacts.advanceShallow(doc);
assertTrue(max >= doc);
maxScore = impacts.getMaxScore(max);
}
assertEquals(max, impacts.advanceShallow(doc));
assertTrue(scorer.score(impacts.freq(), docToNorm.applyAsLong(doc)) <= maxScore);
if (advance) {
doc = impactsEnum.advance(target);
} else {
doc = impactsEnum.nextDoc();
}
// Now check advancing
impacts = termsEnum.impacts(scorer, flags);
postings = termsEnum.postings(postings, flags);
max = -1;
while (true) {
int doc = impacts.docID();
boolean advance;
int target;
if (random.nextBoolean()) {
advance = false;
target = doc + 1;
} else {
advance = true;
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
target = impacts.docID() + delta;
}
if (target > max && random.nextBoolean()) {
int delta = Math.min(random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - target);
max = target + delta;
int m = impacts.advanceShallow(target);
assertTrue(m >= target);
maxScore = impacts.getMaxScore(max);
}
if (advance) {
doc = impacts.advance(target);
} else {
doc = impacts.nextDoc();
}
assertEquals(postings.advance(target), doc);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
int freq = postings.freq();
assertEquals("freq is wrong", freq, impacts.freq());
for (int i = 0; i < postings.freq(); ++i) {
int pos = postings.nextPosition();
assertEquals("position is wrong", pos, impacts.nextPosition());
if (doCheckOffsets) {
assertEquals("startOffset is wrong", postings.startOffset(), impacts.startOffset());
assertEquals("endOffset is wrong", postings.endOffset(), impacts.endOffset());
}
if (doCheckPayloads) {
assertEquals("payload is wrong", postings.getPayload(), impacts.getPayload());
}
}
if (doc > max) {
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
max = doc + delta;
int m = impacts.advanceShallow(doc);
assertTrue(m >= doc);
maxScore = impacts.getMaxScore(max);
}
float score = scorer.score(impacts.freq(), docToNorm.applyAsLong(doc));
assertTrue(score <= maxScore);
assertEquals(postings.advance(target), doc);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
int freq = postings.freq();
assertEquals("freq is wrong", freq, impactsEnum.freq());
for (int i = 0; i < postings.freq(); ++i) {
int pos = postings.nextPosition();
assertEquals("position is wrong", pos, impactsEnum.nextPosition());
if (doCheckOffsets) {
assertEquals("startOffset is wrong", postings.startOffset(), impactsEnum.startOffset());
assertEquals("endOffset is wrong", postings.endOffset(), impactsEnum.endOffset());
}
if (doCheckPayloads) {
assertEquals("payload is wrong", postings.getPayload(), impactsEnum.getPayload());
}
}
if (doc > max) {
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
max = doc + delta;
Impacts impacts = impactsEnum.getImpacts();
CheckIndex.checkImpacts(impacts, doc);
impactsCopy = Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
for (int level = 0; level < impacts.numLevels(); ++level) {
if (impacts.getDocIdUpTo(level) >= max) {
impactsCopy = impacts.getImpacts(level)
.stream()
.map(i -> new Impact(i.freq, i.norm))
.collect(Collectors.toList());
break;
}
}
}
freq = impactsEnum.freq();
long norm = docToNorm.applyAsLong(doc);
int idx = Collections.binarySearch(impactsCopy, new Impact(freq, norm), Comparator.comparing(i -> i.freq));
if (idx < 0) {
idx = -1 - idx;
}
assertTrue("Got " + new Impact(freq, norm) + " in postings, but no impact triggers equal or better scores in " + impactsCopy,
idx <= impactsCopy.size() && impactsCopy.get(idx).norm <= norm);
}
}
}

View File

@ -88,6 +88,7 @@ public class AssertingScorer extends Scorer {
@Override
public float getMaxScore(int upTo) throws IOException {
assert upTo >= lastShallowTarget : "uTo = " + upTo + " < last target = " + lastShallowTarget;
assert docID() >= 0 || lastShallowTarget >= 0 : "Cannot get max scores until the iterator is positioned or advanceShallow has been called";
float maxScore = in.getMaxScore(upTo);
return maxScore;
}

View File

@ -632,7 +632,7 @@ public class CheckHits {
Assert.assertTrue(twoPhase1 == null || twoPhase1.matches());
float score = s2.score();
Assert.assertEquals(s1.score(), score);
Assert.assertTrue(score <= maxScore);
Assert.assertTrue(score + " > " + maxScore + " up to " + upTo, score <= maxScore);
if (score >= minScore && random.nextInt(10) == 0) {
// On some scorers, changing the min score changes the way that docs are iterated

View File

@ -43,7 +43,6 @@ import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DocIdSetBuilder;
@ -241,8 +240,8 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return te.impacts(scorer, flags);
public ImpactsEnum impacts(int flags) throws IOException {
return te.impacts(flags);
}
@Override

View File

@ -35,7 +35,6 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -609,8 +608,8 @@ public class DocTermOrds implements Accountable {
}
@Override
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
return termsEnum.impacts(scorer, flags);
public ImpactsEnum impacts(int flags) throws IOException {
return termsEnum.impacts(flags);
}
@Override