mirror of https://github.com/apache/lucene.git
LUCENE-8142: Make postings APIs expose raw impacts rather than scores.
This commit is contained in:
parent
555b7ef270
commit
af680af77f
|
@ -30,16 +30,15 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
|
@ -661,9 +660,9 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
decodeMetaData();
|
||||
return postingsReader.impacts(fieldInfo, state, scorer, flags);
|
||||
return postingsReader.impacts(fieldInfo, state, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.index.ImpactsEnum;
|
|||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -208,9 +207,9 @@ final class OrdsIntersectTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
currentFrame.decodeMetaData();
|
||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, scorer, flags);
|
||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, flags);
|
||||
}
|
||||
|
||||
private int getState() {
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.index.ImpactsEnum;
|
|||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -936,7 +935,7 @@ public final class OrdsSegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
assert !eof;
|
||||
//if (DEBUG) {
|
||||
//System.out.println("BTTR.docs seg=" + segment);
|
||||
|
@ -945,7 +944,7 @@ public final class OrdsSegmentTermsEnum extends TermsEnum {
|
|||
//if (DEBUG) {
|
||||
//System.out.println(" state=" + currentFrame.state);
|
||||
//}
|
||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, scorer, flags);
|
||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -32,16 +32,15 @@ import org.apache.lucene.codecs.FieldsProducer;
|
|||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
@ -375,8 +374,8 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return delegate().impacts(scorer, flags);
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return delegate().impacts(flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -39,7 +39,6 @@ import org.apache.lucene.index.SlowImpactsEnum;
|
|||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
|
@ -948,8 +947,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, flags));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1503,8 +1502,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, flags));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -31,18 +31,17 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -435,9 +434,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
decodeMetaData();
|
||||
return postingsReader.impacts(fieldInfo, state, scorer, flags);
|
||||
return postingsReader.impacts(fieldInfo, state, flags);
|
||||
}
|
||||
|
||||
// TODO: this can be achieved by making use of Util.getByOutput()
|
||||
|
|
|
@ -42,7 +42,6 @@ import org.apache.lucene.index.SegmentReadState;
|
|||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
|
@ -301,9 +300,9 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
decodeMetaData();
|
||||
return postingsReader.impacts(fieldInfo, state, scorer, flags);
|
||||
return postingsReader.impacts(fieldInfo, state, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -29,7 +29,6 @@ import java.util.concurrent.atomic.AtomicLong;
|
|||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesProducer;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -44,11 +43,11 @@ import org.apache.lucene.util.IntsRef;
|
|||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.PagedBytes;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FST.Arc;
|
||||
import org.apache.lucene.util.fst.FST.BytesReader;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.util.packed.BlockPackedReader;
|
||||
|
@ -871,7 +870,7 @@ class MemoryDocValuesProducer extends DocValuesProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,19 +31,18 @@ import org.apache.lucene.codecs.NormsProducer;
|
|||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.TermStats;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
|
@ -819,8 +818,8 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, flags));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -36,7 +36,6 @@ import org.apache.lucene.index.SegmentReadState;
|
|||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -234,8 +233,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, flags));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,15 +25,14 @@ import java.util.SortedMap;
|
|||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
|
@ -414,8 +413,8 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1));
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,23 +23,25 @@ import java.util.Iterator;
|
|||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.index.Impact;
|
||||
|
||||
/**
|
||||
* This class accumulates the (freq, norm) pairs that may produce competitive scores.
|
||||
*/
|
||||
public final class CompetitiveFreqNormAccumulator {
|
||||
public final class CompetitiveImpactAccumulator {
|
||||
|
||||
// We speed up accumulation for common norm values by first computing
|
||||
// the max freq for all norms in -128..127
|
||||
private final int[] maxFreqs;
|
||||
private boolean dirty;
|
||||
private final TreeSet<FreqAndNorm> freqNormPairs;
|
||||
private final TreeSet<Impact> freqNormPairs;
|
||||
|
||||
/** Sole constructor. */
|
||||
public CompetitiveFreqNormAccumulator() {
|
||||
public CompetitiveImpactAccumulator() {
|
||||
maxFreqs = new int[256];
|
||||
Comparator<FreqAndNorm> comparator = new Comparator<CompetitiveFreqNormAccumulator.FreqAndNorm>() {
|
||||
Comparator<Impact> comparator = new Comparator<Impact>() {
|
||||
@Override
|
||||
public int compare(FreqAndNorm o1, FreqAndNorm o2) {
|
||||
public int compare(Impact o1, Impact o2) {
|
||||
// greater freqs compare greater
|
||||
int cmp = Integer.compare(o1.freq, o2.freq);
|
||||
if (cmp == 0) {
|
||||
|
@ -59,44 +61,6 @@ public final class CompetitiveFreqNormAccumulator {
|
|||
freqNormPairs.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* A (freq, norm) pair.
|
||||
*/
|
||||
public static class FreqAndNorm {
|
||||
/** Doc-term frequency. */
|
||||
public final int freq;
|
||||
/** Normalization factor. */
|
||||
public final long norm;
|
||||
|
||||
/** Sole constructor. */
|
||||
public FreqAndNorm(int freq, long norm) {
|
||||
this.freq = freq;
|
||||
this.norm = norm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == null || obj instanceof FreqAndNorm == false) {
|
||||
return false;
|
||||
}
|
||||
FreqAndNorm that = (FreqAndNorm) obj;
|
||||
return freq == that.freq && norm == that.norm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = getClass().hashCode();
|
||||
h = 31 * h + freq;
|
||||
h = 31 * h + Long.hashCode(norm);
|
||||
return h;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "{" + freq + "," + norm + "}";
|
||||
}
|
||||
}
|
||||
|
||||
/** Accumulate a (freq,norm) pair, updating this structure if there is no
|
||||
* equivalent or more competitive entry already. */
|
||||
public void add(int freq, long norm) {
|
||||
|
@ -105,23 +69,23 @@ public final class CompetitiveFreqNormAccumulator {
|
|||
maxFreqs[index] = Math.max(maxFreqs[index], freq);
|
||||
dirty = true;
|
||||
} else {
|
||||
add(new FreqAndNorm(freq, norm));
|
||||
add(new Impact(freq, norm));
|
||||
}
|
||||
}
|
||||
|
||||
/** Merge {@code acc} into this. */
|
||||
public void addAll(CompetitiveFreqNormAccumulator acc) {
|
||||
for (FreqAndNorm entry : acc.getCompetitiveFreqNormPairs()) {
|
||||
public void addAll(CompetitiveImpactAccumulator acc) {
|
||||
for (Impact entry : acc.getCompetitiveFreqNormPairs()) {
|
||||
add(entry);
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the set of competitive freq and norm pairs, orderer by increasing freq and norm. */
|
||||
public SortedSet<FreqAndNorm> getCompetitiveFreqNormPairs() {
|
||||
public SortedSet<Impact> getCompetitiveFreqNormPairs() {
|
||||
if (dirty) {
|
||||
for (int i = 0; i < maxFreqs.length; ++i) {
|
||||
if (maxFreqs[i] > 0) {
|
||||
add(new FreqAndNorm(maxFreqs[i], (byte) i));
|
||||
add(new Impact(maxFreqs[i], (byte) i));
|
||||
maxFreqs[i] = 0;
|
||||
}
|
||||
}
|
||||
|
@ -130,8 +94,8 @@ public final class CompetitiveFreqNormAccumulator {
|
|||
return Collections.unmodifiableSortedSet(freqNormPairs);
|
||||
}
|
||||
|
||||
private void add(FreqAndNorm newEntry) {
|
||||
FreqAndNorm next = freqNormPairs.ceiling(newEntry);
|
||||
private void add(Impact newEntry) {
|
||||
Impact next = freqNormPairs.ceiling(newEntry);
|
||||
if (next == null) {
|
||||
// nothing is more competitive
|
||||
freqNormPairs.add(newEntry);
|
||||
|
@ -144,8 +108,8 @@ public final class CompetitiveFreqNormAccumulator {
|
|||
freqNormPairs.add(newEntry);
|
||||
}
|
||||
|
||||
for (Iterator<FreqAndNorm> it = freqNormPairs.headSet(newEntry, false).descendingIterator(); it.hasNext(); ) {
|
||||
FreqAndNorm entry = it.next();
|
||||
for (Iterator<Impact> it = freqNormPairs.headSet(newEntry, false).descendingIterator(); it.hasNext(); ) {
|
||||
Impact entry = it.next();
|
||||
if (Long.compareUnsigned(entry.norm, newEntry.norm) >= 0) {
|
||||
// less competitive
|
||||
it.remove();
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.index.PostingsEnum;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
|
@ -72,7 +71,7 @@ public abstract class PostingsReaderBase implements Closeable, Accountable {
|
|||
* Return a {@link ImpactsEnum} that computes impacts with {@code scorer}.
|
||||
* @see #postings(FieldInfo, BlockTermState, PostingsEnum, int)
|
||||
*/
|
||||
public abstract ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, Similarity.SimScorer scorer, int flags) throws IOException;
|
||||
public abstract ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException;
|
||||
|
||||
/**
|
||||
* Checks consistency of this reader.
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.index.PostingsEnum;
|
|||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -235,9 +234,9 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
currentFrame.decodeMetaData();
|
||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, scorer, flags);
|
||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, flags);
|
||||
}
|
||||
|
||||
private int getState() {
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.index.ImpactsEnum;
|
|||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -1005,7 +1004,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
assert !eof;
|
||||
//if (DEBUG) {
|
||||
//System.out.println("BTTR.docs seg=" + segment);
|
||||
|
@ -1014,7 +1013,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
//if (DEBUG) {
|
||||
//System.out.println(" state=" + currentFrame.state);
|
||||
//}
|
||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, scorer, flags);
|
||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -37,7 +37,6 @@ import org.apache.lucene.index.IndexFileNames;
|
|||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
|
@ -946,10 +945,9 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
final PostingsEnum delegate = postings(null, PostingsEnum.FREQS);
|
||||
final float maxScore = scorer.score(Float.MAX_VALUE, 1);
|
||||
return new SlowImpactsEnum(delegate, maxScore);
|
||||
return new SlowImpactsEnum(delegate);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,20 +19,19 @@ package org.apache.lucene.codecs.lucene50;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -239,13 +238,12 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, SimScorer scorer, int flags) throws IOException {
|
||||
Objects.requireNonNull(scorer);
|
||||
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException {
|
||||
if (state.docFreq <= BLOCK_SIZE || version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
|
||||
// no skip data
|
||||
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags), scorer.score(Float.MAX_VALUE, 1));
|
||||
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
|
||||
}
|
||||
return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, scorer, flags);
|
||||
return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, flags);
|
||||
}
|
||||
|
||||
final class BlockDocsEnum extends PostingsEnum {
|
||||
|
@ -1367,7 +1365,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
|
||||
private long seekTo = -1;
|
||||
|
||||
public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, SimScorer scorer, int flags) throws IOException {
|
||||
public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, int flags) throws IOException {
|
||||
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
|
@ -1440,8 +1438,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
MAX_SKIP_LEVELS,
|
||||
indexHasPos,
|
||||
indexHasOffsets,
|
||||
indexHasPayloads,
|
||||
scorer);
|
||||
indexHasPayloads);
|
||||
skipper.init(docTermStartFP+termState.skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
|
||||
|
||||
if (indexHasFreq == false) {
|
||||
|
@ -1544,17 +1541,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
return skipper.getMaxScore(upTo);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
public void advanceShallow(int target) throws IOException {
|
||||
if (target > nextSkipDoc) {
|
||||
// always plus one to fix the result, since skip position in Lucene50SkipReader
|
||||
// is a little different from MultiLevelSkipListReader
|
||||
|
@ -1580,7 +1567,17 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
nextSkipDoc = skipper.getNextSkipDoc();
|
||||
}
|
||||
assert nextSkipDoc >= target;
|
||||
return nextSkipDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Impacts getImpacts() throws IOException {
|
||||
advanceShallow(doc);
|
||||
return skipper.getImpacts();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -31,7 +31,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
|
||||
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
|
||||
import org.apache.lucene.codecs.PushPostingsWriterBase;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
|
@ -101,7 +101,7 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
|
|||
|
||||
private boolean fieldHasNorms;
|
||||
private NumericDocValues norms;
|
||||
private final CompetitiveFreqNormAccumulator competitiveFreqNormAccumulator = new CompetitiveFreqNormAccumulator();
|
||||
private final CompetitiveImpactAccumulator competitiveFreqNormAccumulator = new CompetitiveImpactAccumulator();
|
||||
|
||||
/** Creates a postings writer */
|
||||
public Lucene50PostingsWriter(SegmentWriteState state) throws IOException {
|
||||
|
|
|
@ -17,90 +17,143 @@
|
|||
package org.apache.lucene.codecs.lucene50;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.AbstractList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.List;
|
||||
import java.util.RandomAccess;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
final class Lucene50ScoreSkipReader extends Lucene50SkipReader {
|
||||
|
||||
private final SimScorer scorer;
|
||||
private final float[] maxScore;
|
||||
private final byte[][] impacts;
|
||||
private final int[] impactsLength;
|
||||
private final float globalMaxScore;
|
||||
private final byte[][] impactData;
|
||||
private final int[] impactDataLength;
|
||||
private final ByteArrayDataInput badi = new ByteArrayDataInput();
|
||||
private final Impacts impacts;
|
||||
private int numLevels = 1;
|
||||
private final MutableImpactList[] perLevelImpacts;
|
||||
|
||||
public Lucene50ScoreSkipReader(int version, IndexInput skipStream, int maxSkipLevels,
|
||||
boolean hasPos, boolean hasOffsets, boolean hasPayloads, SimScorer scorer) {
|
||||
boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
|
||||
super(version, skipStream, maxSkipLevels, hasPos, hasOffsets, hasPayloads);
|
||||
if (version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
|
||||
throw new IllegalStateException("Cannot skip based on scores if impacts are not indexed");
|
||||
}
|
||||
this.scorer = Objects.requireNonNull(scorer);
|
||||
this.maxScore = new float[maxSkipLevels];
|
||||
this.impacts = new byte[maxSkipLevels][];
|
||||
Arrays.fill(impacts, new byte[0]);
|
||||
this.impactsLength = new int[maxSkipLevels];
|
||||
this.globalMaxScore = scorer.score(Float.MAX_VALUE, 1);
|
||||
this.impactData = new byte[maxSkipLevels][];
|
||||
Arrays.fill(impactData, new byte[0]);
|
||||
this.impactDataLength = new int[maxSkipLevels];
|
||||
this.perLevelImpacts = new MutableImpactList[maxSkipLevels];
|
||||
for (int i = 0; i < perLevelImpacts.length; ++i) {
|
||||
perLevelImpacts[i] = new MutableImpactList();
|
||||
}
|
||||
impacts = new Impacts() {
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
return numLevels;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocIdUpTo(int level) {
|
||||
return skipDoc[level];
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Impact> getImpacts(int level) {
|
||||
assert level < numLevels;
|
||||
if (impactDataLength[level] > 0) {
|
||||
badi.reset(impactData[level], 0, impactDataLength[level]);
|
||||
perLevelImpacts[level] = readImpacts(badi, perLevelImpacts[level]);
|
||||
impactDataLength[level] = 0;
|
||||
}
|
||||
return perLevelImpacts[level];
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df) throws IOException {
|
||||
super.init(skipPointer, docBasePointer, posBasePointer, payBasePointer, df);
|
||||
Arrays.fill(impactsLength, 0);
|
||||
Arrays.fill(maxScore, globalMaxScore);
|
||||
public int skipTo(int target) throws IOException {
|
||||
int result = super.skipTo(target);
|
||||
if (numberOfSkipLevels > 0) {
|
||||
numLevels = numberOfSkipLevels;
|
||||
} else {
|
||||
// End of postings don't have skip data anymore, so we fill with dummy data
|
||||
// like SlowImpactsEnum.
|
||||
numLevels = 1;
|
||||
perLevelImpacts[0].length = 1;
|
||||
perLevelImpacts[0].impacts[0].freq = Integer.MAX_VALUE;
|
||||
perLevelImpacts[0].impacts[0].norm = 1L;
|
||||
impactDataLength[0] = 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Upper bound of scores up to {@code upTo} included. */
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
for (int level = 0; level < numberOfSkipLevels; ++level) {
|
||||
if (upTo <= skipDoc[level]) {
|
||||
return maxScore(level);
|
||||
}
|
||||
}
|
||||
return globalMaxScore;
|
||||
}
|
||||
|
||||
private float maxScore(int level) throws IOException {
|
||||
assert level < numberOfSkipLevels;
|
||||
if (impactsLength[level] > 0) {
|
||||
badi.reset(impacts[level], 0, impactsLength[level]);
|
||||
maxScore[level] = readImpacts(badi, scorer);
|
||||
impactsLength[level] = 0;
|
||||
}
|
||||
return maxScore[level];
|
||||
Impacts getImpacts() {
|
||||
return impacts;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void readImpacts(int level, IndexInput skipStream) throws IOException {
|
||||
int length = skipStream.readVInt();
|
||||
if (impacts[level].length < length) {
|
||||
impacts[level] = new byte[ArrayUtil.oversize(length, Byte.BYTES)];
|
||||
if (impactData[level].length < length) {
|
||||
impactData[level] = new byte[ArrayUtil.oversize(length, Byte.BYTES)];
|
||||
}
|
||||
skipStream.readBytes(impacts[level], 0, length);
|
||||
impactsLength[level] = length;
|
||||
skipStream.readBytes(impactData[level], 0, length);
|
||||
impactDataLength[level] = length;
|
||||
}
|
||||
|
||||
static float readImpacts(ByteArrayDataInput in, SimScorer scorer) throws IOException {
|
||||
static MutableImpactList readImpacts(ByteArrayDataInput in, MutableImpactList reuse) {
|
||||
int maxNumImpacts = in.length(); // at most one impact per byte
|
||||
if (reuse.impacts.length < maxNumImpacts) {
|
||||
int oldLength = reuse.impacts.length;
|
||||
reuse.impacts = ArrayUtil.grow(reuse.impacts, maxNumImpacts);
|
||||
for (int i = oldLength; i < reuse.impacts.length; ++i) {
|
||||
reuse.impacts[i] = new Impact(Integer.MAX_VALUE, 1L);
|
||||
}
|
||||
}
|
||||
|
||||
int freq = 0;
|
||||
long norm = 0;
|
||||
float maxScore = 0;
|
||||
int length = 0;
|
||||
while (in.getPosition() < in.length()) {
|
||||
int freqDelta = in.readVInt();
|
||||
if ((freqDelta & 0x01) != 0) {
|
||||
freq += 1 + (freqDelta >>> 1);
|
||||
norm += 1 + in.readZLong();
|
||||
try {
|
||||
norm += 1 + in.readZLong();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e); // cannot happen on a BADI
|
||||
}
|
||||
} else {
|
||||
freq += 1 + (freqDelta >>> 1);
|
||||
norm++;
|
||||
}
|
||||
maxScore = Math.max(maxScore, scorer.score(freq, norm));
|
||||
Impact impact = reuse.impacts[length];
|
||||
impact.freq = freq;
|
||||
impact.norm = norm;
|
||||
length++;
|
||||
}
|
||||
reuse.length = length;
|
||||
return reuse;
|
||||
}
|
||||
|
||||
static class MutableImpactList extends AbstractList<Impact> implements RandomAccess {
|
||||
int length = 1;
|
||||
Impact[] impacts = new Impact[] { new Impact(Integer.MAX_VALUE, 1L) };
|
||||
|
||||
@Override
|
||||
public Impact get(int index) {
|
||||
return impacts[index];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return length;
|
||||
}
|
||||
return maxScore;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -200,8 +200,7 @@ class Lucene50SkipReader extends MultiLevelSkipListReader {
|
|||
return delta;
|
||||
}
|
||||
|
||||
// The default impl skips impacts since they are only useful if we have a SimScorer
|
||||
// to compute the scores that impacts map to.
|
||||
// The default impl skips impacts
|
||||
protected void readImpacts(int level, IndexInput skipStream) throws IOException {
|
||||
if (version >= Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
|
||||
// The base implementation skips impacts, they are not used
|
||||
|
|
|
@ -22,9 +22,9 @@ import java.util.Arrays;
|
|||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
|
||||
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
|
||||
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm;
|
||||
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
|
||||
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
|
||||
|
@ -65,7 +65,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
|||
private long curPayPointer;
|
||||
private int curPosBufferUpto;
|
||||
private int curPayloadByteUpto;
|
||||
private CompetitiveFreqNormAccumulator[] curCompetitiveFreqNorms;
|
||||
private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
|
||||
private boolean fieldHasPositions;
|
||||
private boolean fieldHasOffsets;
|
||||
private boolean fieldHasPayloads;
|
||||
|
@ -85,9 +85,9 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
|||
}
|
||||
lastPayloadByteUpto = new int[maxSkipLevels];
|
||||
}
|
||||
curCompetitiveFreqNorms = new CompetitiveFreqNormAccumulator[maxSkipLevels];
|
||||
curCompetitiveFreqNorms = new CompetitiveImpactAccumulator[maxSkipLevels];
|
||||
for (int i = 0; i < maxSkipLevels; ++i) {
|
||||
curCompetitiveFreqNorms[i] = new CompetitiveFreqNormAccumulator();
|
||||
curCompetitiveFreqNorms[i] = new CompetitiveImpactAccumulator();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -116,7 +116,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
|||
}
|
||||
}
|
||||
if (initialized) {
|
||||
for (CompetitiveFreqNormAccumulator acc : curCompetitiveFreqNorms) {
|
||||
for (CompetitiveImpactAccumulator acc : curCompetitiveFreqNorms) {
|
||||
acc.clear();
|
||||
}
|
||||
}
|
||||
|
@ -139,7 +139,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
|||
}
|
||||
// sets of competitive freq,norm pairs should be empty at this point
|
||||
assert Arrays.stream(curCompetitiveFreqNorms)
|
||||
.map(CompetitiveFreqNormAccumulator::getCompetitiveFreqNormPairs)
|
||||
.map(CompetitiveImpactAccumulator::getCompetitiveFreqNormPairs)
|
||||
.mapToInt(Set::size)
|
||||
.sum() == 0;
|
||||
initialized = true;
|
||||
|
@ -149,7 +149,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
|||
/**
|
||||
* Sets the values for the current skip data.
|
||||
*/
|
||||
public void bufferSkip(int doc, CompetitiveFreqNormAccumulator competitiveFreqNorms,
|
||||
public void bufferSkip(int doc, CompetitiveImpactAccumulator competitiveFreqNorms,
|
||||
int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
|
||||
initSkip();
|
||||
this.curDoc = doc;
|
||||
|
@ -191,7 +191,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
|||
}
|
||||
}
|
||||
|
||||
CompetitiveFreqNormAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
|
||||
CompetitiveImpactAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
|
||||
assert competitiveFreqNorms.getCompetitiveFreqNormPairs().size() > 0;
|
||||
if (level + 1 < numberOfSkipLevels) {
|
||||
curCompetitiveFreqNorms[level + 1].addAll(competitiveFreqNorms);
|
||||
|
@ -203,14 +203,14 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
|||
competitiveFreqNorms.clear();
|
||||
}
|
||||
|
||||
static void writeImpacts(CompetitiveFreqNormAccumulator acc, IndexOutput out) throws IOException {
|
||||
SortedSet<FreqAndNorm> freqAndNorms = acc.getCompetitiveFreqNormPairs();
|
||||
FreqAndNorm previous = new FreqAndNorm(0, 0);
|
||||
for (FreqAndNorm freqAndNorm : freqAndNorms) {
|
||||
assert freqAndNorm.freq > previous.freq;
|
||||
assert Long.compareUnsigned(freqAndNorm.norm, previous.norm) > 0;
|
||||
int freqDelta = freqAndNorm.freq - previous.freq - 1;
|
||||
long normDelta = freqAndNorm.norm - previous.norm - 1;
|
||||
static void writeImpacts(CompetitiveImpactAccumulator acc, IndexOutput out) throws IOException {
|
||||
SortedSet<Impact> impacts = acc.getCompetitiveFreqNormPairs();
|
||||
Impact previous = new Impact(0, 0);
|
||||
for (Impact impact : impacts) {
|
||||
assert impact.freq > previous.freq;
|
||||
assert Long.compareUnsigned(impact.norm, previous.norm) > 0;
|
||||
int freqDelta = impact.freq - previous.freq - 1;
|
||||
long normDelta = impact.norm - previous.norm - 1;
|
||||
if (normDelta == 0) {
|
||||
// most of time, norm only increases by 1, so we can fold everything in a single byte
|
||||
out.writeVInt(freqDelta << 1);
|
||||
|
@ -218,7 +218,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
|||
out.writeVInt((freqDelta << 1) | 1);
|
||||
out.writeZLong(normDelta);
|
||||
}
|
||||
previous = freqAndNorm;
|
||||
previous = impact;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,7 +38,6 @@ import org.apache.lucene.index.SortedNumericDocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RandomAccessInput;
|
||||
|
@ -1160,7 +1159,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MaxScoreCache;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
@ -114,7 +115,8 @@ final class FeatureQuery extends Query {
|
|||
}
|
||||
|
||||
SimScorer scorer = function.scorer(fieldName, boost);
|
||||
ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
|
||||
ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
|
||||
MaxScoreCache maxScoreCache = new MaxScoreCache(impacts, scorer);
|
||||
|
||||
return new Scorer(this) {
|
||||
|
||||
|
@ -135,12 +137,12 @@ final class FeatureQuery extends Query {
|
|||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
return impacts.advanceShallow(target);
|
||||
return maxScoreCache.advanceShallow(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
return impacts.getMaxScore(upTo);
|
||||
return maxScoreCache.getMaxScore(upTo);
|
||||
}
|
||||
|
||||
};
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.Deque;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
@ -48,7 +49,6 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
|
@ -1602,58 +1602,45 @@ public final class CheckIndex implements Closeable {
|
|||
// Checking score blocks is heavy, we only do it on long postings lists, on every 1024th term
|
||||
// or if slow checks are enabled.
|
||||
if (doSlowChecks || docFreq > 1024 || (status.termCount + status.delTermCount) % 1024 == 0) {
|
||||
// Test score blocks
|
||||
// We only score on freq to keep things simple and not pull norms
|
||||
SimScorer scorer = new SimScorer(field) {
|
||||
@Override
|
||||
public float score(float freq, long norm) {
|
||||
return freq;
|
||||
}
|
||||
};
|
||||
|
||||
// First check max scores and block uptos
|
||||
// But only if slok checks are enabled since we visit all docs
|
||||
if (doSlowChecks) {
|
||||
int max = -1;
|
||||
float maxScore = 0;
|
||||
ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
|
||||
int maxFreq = 0;
|
||||
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
|
||||
postings = termsEnum.postings(postings, PostingsEnum.FREQS);
|
||||
for (int doc = impacts.nextDoc(); ; doc = impacts.nextDoc()) {
|
||||
for (int doc = impactsEnum.nextDoc(); ; doc = impactsEnum.nextDoc()) {
|
||||
if (postings.nextDoc() != doc) {
|
||||
throw new RuntimeException("Wrong next doc: " + doc + ", expected " + postings.docID());
|
||||
}
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
if (postings.freq() != impacts.freq()) {
|
||||
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq());
|
||||
if (postings.freq() != impactsEnum.freq()) {
|
||||
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impactsEnum.freq());
|
||||
}
|
||||
if (doc > max) {
|
||||
max = impacts.advanceShallow(doc);
|
||||
if (max < doc) {
|
||||
throw new RuntimeException("max block doc id " + max + " must be greater than the target: " + doc);
|
||||
}
|
||||
maxScore = impacts.getMaxScore(max);
|
||||
impactsEnum.advanceShallow(doc);
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
checkImpacts(impacts, doc);
|
||||
max = impacts.getDocIdUpTo(0);
|
||||
List<Impact> impacts0 = impacts.getImpacts(0);
|
||||
maxFreq = impacts0.get(impacts0.size() - 1).freq;
|
||||
}
|
||||
int max2 = impacts.advanceShallow(doc);
|
||||
if (max != max2) {
|
||||
throw new RuntimeException("max is not stable, initially had " + max + " but now " + max2);
|
||||
}
|
||||
float score = scorer.score(impacts.freq(), 1);
|
||||
if (score > maxScore) {
|
||||
throw new RuntimeException("score " + score + " is greater than the max score " + maxScore);
|
||||
if (impactsEnum.freq() > maxFreq) {
|
||||
throw new RuntimeException("freq " + impactsEnum.freq() + " is greater than the max freq according to impacts " + maxFreq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now check advancing
|
||||
ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
|
||||
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
|
||||
postings = termsEnum.postings(postings, PostingsEnum.FREQS);
|
||||
|
||||
int max = -1;
|
||||
float maxScore = 0;
|
||||
int maxFreq = 0;
|
||||
while (true) {
|
||||
int doc = impacts.docID();
|
||||
int doc = impactsEnum.docID();
|
||||
boolean advance;
|
||||
int target;
|
||||
if (((field.hashCode() + doc) & 1) == 1) {
|
||||
|
@ -1662,23 +1649,29 @@ public final class CheckIndex implements Closeable {
|
|||
} else {
|
||||
advance = true;
|
||||
int delta = Math.min(1 + ((31 * field.hashCode() + doc) & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
|
||||
target = impacts.docID() + delta;
|
||||
target = impactsEnum.docID() + delta;
|
||||
}
|
||||
|
||||
if (target > max && target % 2 == 1) {
|
||||
int delta = Math.min((31 * field.hashCode() + target) & 0x1ff, DocIdSetIterator.NO_MORE_DOCS - target);
|
||||
max = target + delta;
|
||||
int m = impacts.advanceShallow(target);
|
||||
if (m < target) {
|
||||
throw new RuntimeException("Block max doc: " + m + " is less than the target " + target);
|
||||
impactsEnum.advanceShallow(target);
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
checkImpacts(impacts, doc);
|
||||
maxFreq = Integer.MAX_VALUE;
|
||||
for (int level = 0; level < impacts.numLevels(); ++level) {
|
||||
if (impacts.getDocIdUpTo(level) >= max) {
|
||||
List<Impact> perLevelImpacts = impacts.getImpacts(level);
|
||||
maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
|
||||
break;
|
||||
}
|
||||
}
|
||||
maxScore = impacts.getMaxScore(max);
|
||||
}
|
||||
|
||||
if (advance) {
|
||||
doc = impacts.advance(target);
|
||||
doc = impactsEnum.advance(target);
|
||||
} else {
|
||||
doc = impacts.nextDoc();
|
||||
doc = impactsEnum.nextDoc();
|
||||
}
|
||||
|
||||
if (postings.advance(target) != doc) {
|
||||
|
@ -1687,23 +1680,28 @@ public final class CheckIndex implements Closeable {
|
|||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
if (postings.freq() != impacts.freq()) {
|
||||
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq());
|
||||
if (postings.freq() != impactsEnum.freq()) {
|
||||
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impactsEnum.freq());
|
||||
}
|
||||
|
||||
if (doc >= max) {
|
||||
int delta = Math.min((31 * field.hashCode() + target & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
|
||||
max = doc + delta;
|
||||
int m = impacts.advanceShallow(doc);
|
||||
if (m < doc) {
|
||||
throw new RuntimeException("Block max doc: " + m + " is less than the target " + doc);
|
||||
impactsEnum.advanceShallow(doc);
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
checkImpacts(impacts, doc);
|
||||
maxFreq = Integer.MAX_VALUE;
|
||||
for (int level = 0; level < impacts.numLevels(); ++level) {
|
||||
if (impacts.getDocIdUpTo(level) >= max) {
|
||||
List<Impact> perLevelImpacts = impacts.getImpacts(level);
|
||||
maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
|
||||
break;
|
||||
}
|
||||
}
|
||||
maxScore = impacts.getMaxScore(max);
|
||||
}
|
||||
|
||||
float score = scorer.score(impacts.freq(), 1);
|
||||
if (score > maxScore) {
|
||||
throw new RuntimeException("score " + score + " is greater than the max score " + maxScore);
|
||||
if (impactsEnum.freq() > maxFreq) {
|
||||
throw new RuntimeException("Term frequency " + impactsEnum.freq() + " is greater than the max freq according to impacts " + maxFreq);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1850,6 +1848,68 @@ public final class CheckIndex implements Closeable {
|
|||
return status;
|
||||
}
|
||||
|
||||
static void checkImpacts(Impacts impacts, int lastTarget) {
|
||||
final int numLevels = impacts.numLevels();
|
||||
if (numLevels < 1) {
|
||||
throw new RuntimeException("The number of levels must be >= 1, got " + numLevels);
|
||||
}
|
||||
|
||||
int docIdUpTo0 = impacts.getDocIdUpTo(0);
|
||||
if (docIdUpTo0 < lastTarget) {
|
||||
throw new RuntimeException("getDocIdUpTo returned " + docIdUpTo0 + " on level 0, which is less than the target " + lastTarget);
|
||||
}
|
||||
|
||||
for (int level = 1; level < numLevels; ++level) {
|
||||
int docIdUpTo = impacts.getDocIdUpTo(level);
|
||||
int previousDocIdUpTo = impacts.getDocIdUpTo(level - 1);
|
||||
if (docIdUpTo < previousDocIdUpTo) {
|
||||
throw new RuntimeException("Decreasing return for getDocIdUpTo: level " + (level-1) + " returned " + previousDocIdUpTo
|
||||
+ " but level " + level + " returned " + docIdUpTo + " for target " + lastTarget);
|
||||
}
|
||||
}
|
||||
|
||||
for (int level = 0; level < numLevels; ++level) {
|
||||
List<Impact> perLevelImpacts = impacts.getImpacts(level);
|
||||
if (perLevelImpacts.isEmpty()) {
|
||||
throw new RuntimeException("Got empty list of impacts on level " + level);
|
||||
}
|
||||
Impact first = perLevelImpacts.get(0);
|
||||
if (first.freq < 1) {
|
||||
throw new RuntimeException("First impact had a freq <= 0: " + first);
|
||||
}
|
||||
if (first.norm == 0) {
|
||||
throw new RuntimeException("First impact had a norm == 0: " + first);
|
||||
}
|
||||
// Impacts must be in increasing order of norm AND freq
|
||||
Impact previous = first;
|
||||
for (int i = 1; i < perLevelImpacts.size(); ++i) {
|
||||
Impact impact = perLevelImpacts.get(i);
|
||||
if (impact.freq <= previous.freq || Long.compareUnsigned(impact.norm, previous.norm) <= 0) {
|
||||
throw new RuntimeException("Impacts are not ordered or contain dups, got " + previous + " then " + impact);
|
||||
}
|
||||
}
|
||||
if (level > 0) {
|
||||
// Make sure that impacts at level N trigger better scores than an level N-1
|
||||
Iterator<Impact> previousIt = impacts.getImpacts(level-1).iterator();
|
||||
previous = previousIt.next();
|
||||
Iterator<Impact> it = perLevelImpacts.iterator();
|
||||
Impact impact = it.next();
|
||||
while (previousIt.hasNext()) {
|
||||
previous = previousIt.next();
|
||||
if (previous.freq <= impact.freq && Long.compareUnsigned(previous.norm, impact.norm) >= 0) {
|
||||
// previous triggers a lower score than the current impact, all good
|
||||
continue;
|
||||
}
|
||||
if (it.hasNext() == false) {
|
||||
throw new RuntimeException("Found impact " + previous + " on level " + (level-1) + " but no impact on level "
|
||||
+ level + " triggers a better score: " + perLevelImpacts);
|
||||
}
|
||||
impact = it.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the term index.
|
||||
* @lucene.experimental
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -216,8 +215,8 @@ public abstract class FilterLeafReader extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return in.impacts(scorer, flags);
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return in.impacts(flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/**
|
||||
|
@ -184,8 +183,8 @@ public abstract class FilteredTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return tenum.impacts(scorer, flags);
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return tenum.impacts(flags);
|
||||
}
|
||||
|
||||
/** This enum does not support seeking!
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -275,7 +274,7 @@ class FreqProxFields extends Fields {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Per-document scoring factors.
|
||||
*/
|
||||
public final class Impact {
|
||||
|
||||
/**
|
||||
* Term frequency of the term in the document.
|
||||
*/
|
||||
public int freq;
|
||||
|
||||
/**
|
||||
* Norm factor of the document.
|
||||
*/
|
||||
public long norm;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
public Impact(int freq, long norm) {
|
||||
this.freq = freq;
|
||||
this.norm = norm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "{freq=" + freq + ",norm=" + norm + "}";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = freq;
|
||||
h = 31 * h + Long.hashCode(norm);
|
||||
return h;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == null || getClass() != obj.getClass()) return false;
|
||||
Impact other = (Impact) obj;
|
||||
return freq == other.freq && norm == other.norm;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Information about upcoming impacts, ie. (freq, norm) pairs.
|
||||
*/
|
||||
public abstract class Impacts {
|
||||
|
||||
/**
|
||||
* Return the number of levels on which we have impacts.
|
||||
* The returned value is always greater than 0 and may not always be the
|
||||
* same, even on a single postings list, depending on the current doc ID.
|
||||
*/
|
||||
public abstract int numLevels();
|
||||
|
||||
/**
|
||||
* Return the maximum inclusive doc ID until which the list of impacts
|
||||
* returned by {@link #getImpacts(int)} is valid. This is a non-decreasing
|
||||
* function of {@code level}.
|
||||
*/
|
||||
public abstract int getDocIdUpTo(int level);
|
||||
|
||||
/**
|
||||
* Return impacts on the given level. These impacts are sorted by increasing
|
||||
* frequency and increasing unsigned norm, and only valid until the doc ID
|
||||
* returned by {@link #getDocIdUpTo(int)} for the same level, included.
|
||||
* The returned list is never empty.
|
||||
* NOTE: There is no guarantee that these impacts actually appear in postings,
|
||||
* only that they trigger scores that are greater than or equal to the impacts
|
||||
* that actually appear in postings.
|
||||
*/
|
||||
public abstract List<Impact> getImpacts(int level);
|
||||
|
||||
}
|
|
@ -18,11 +18,9 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/**
|
||||
* Extension of {@link PostingsEnum} which also provides information about the
|
||||
* produced scores.
|
||||
* Extension of {@link PostingsEnum} which also provides information about
|
||||
* upcoming impacts.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class ImpactsEnum extends PostingsEnum {
|
||||
|
@ -31,23 +29,28 @@ public abstract class ImpactsEnum extends PostingsEnum {
|
|||
protected ImpactsEnum() {}
|
||||
|
||||
/**
|
||||
* Advance to the block of documents that contains {@code target} in order to
|
||||
* get scoring information about this block. This method is implicitly called
|
||||
* by {@link DocIdSetIterator#advance(int)} and
|
||||
* {@link DocIdSetIterator#nextDoc()}. Calling this method doesn't modify the
|
||||
* current {@link DocIdSetIterator#docID()}.
|
||||
* It returns a number that is greater than or equal to all documents
|
||||
* contained in the current block, but less than any doc IDS of the next block.
|
||||
* {@code target} must be >= {@link #docID()} as well as all targets that
|
||||
* have been passed to {@link #advanceShallow(int)} so far.
|
||||
* Shallow-advance to {@code target}. This is cheaper than calling
|
||||
* {@link #advance(int)} and allows further calls to {@link #getImpacts()}
|
||||
* to ignore doc IDs that are less than {@code target} in order to get more
|
||||
* precise information about impacts.
|
||||
* This method may not be called on targets that are less than the current
|
||||
* {@link #docID()}.
|
||||
* After this method has been called, {@link #nextDoc()} may not be called
|
||||
* if the current doc ID is less than {@code target - 1} and
|
||||
* {@link #advance(int)} may not be called on targets that are less than
|
||||
* {@code target}.
|
||||
*/
|
||||
public abstract int advanceShallow(int target) throws IOException;
|
||||
public abstract void advanceShallow(int target) throws IOException;
|
||||
|
||||
/**
|
||||
* Return the maximum score that documents between the last {@code target}
|
||||
* that this iterator was {@link #advanceShallow(int) shallow-advanced} to
|
||||
* included and {@code upTo} included.
|
||||
* Get information about upcoming impacts for doc ids that are greater than
|
||||
* or equal to the maximum of {@link #docID()} and the last target that was
|
||||
* passed to {@link #advanceShallow(int)}.
|
||||
* This method may not be called on an unpositioned iterator on which
|
||||
* {@link #advanceShallow(int)} has never been called.
|
||||
* NOTE: advancing this iterator may invalidate the returned impacts, so they
|
||||
* should not be used after the iterator has been advanced.
|
||||
*/
|
||||
public abstract float getMaxScore(int upTo) throws IOException;
|
||||
public abstract Impacts getImpacts() throws IOException;
|
||||
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -369,9 +368,9 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
// implemented to not fail CheckIndex, but you shouldn't be using impacts on a slow reader
|
||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
||||
return new SlowImpactsEnum(postings(null, flags));
|
||||
}
|
||||
|
||||
final static class TermsEnumWithSlice {
|
||||
|
|
|
@ -17,23 +17,45 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* {@link ImpactsEnum} that doesn't index impacts but implements the API in a
|
||||
* legal way. This should typically be used for short postings that do not need
|
||||
* legal way. This is typically used for short postings that do not need
|
||||
* skipping.
|
||||
*/
|
||||
public final class SlowImpactsEnum extends ImpactsEnum {
|
||||
|
||||
private static final Impacts DUMMY_IMPACTS = new Impacts() {
|
||||
|
||||
private final List<Impact> impacts = Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocIdUpTo(int level) {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Impact> getImpacts(int level) {
|
||||
return impacts;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
private final PostingsEnum delegate;
|
||||
private final float maxScore;
|
||||
|
||||
/** Wrap the given {@link PostingsEnum}. */
|
||||
public SlowImpactsEnum(PostingsEnum delegate, float maxScore) {
|
||||
public SlowImpactsEnum(PostingsEnum delegate) {
|
||||
this.delegate = delegate;
|
||||
this.maxScore = maxScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -82,13 +104,10 @@ public final class SlowImpactsEnum extends ImpactsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int advanceShallow(int target) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
public void advanceShallow(int target) {}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int maxDoc) {
|
||||
return maxScore;
|
||||
public Impacts getImpacts() {
|
||||
return DUMMY_IMPACTS;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
||||
|
@ -111,7 +110,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
||||
|
@ -111,7 +110,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefIterator;
|
||||
|
@ -171,10 +170,10 @@ public abstract class TermsEnum implements BytesRefIterator {
|
|||
public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException;
|
||||
|
||||
/**
|
||||
* Return a {@link ImpactsEnum} that computes impacts with {@code scorer}.
|
||||
* Return a {@link ImpactsEnum}.
|
||||
* @see #postings(PostingsEnum, int)
|
||||
*/
|
||||
public abstract ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException;
|
||||
public abstract ImpactsEnum impacts(int flags) throws IOException;
|
||||
|
||||
/**
|
||||
* Expert: Returns the TermsEnums internal state to position the TermsEnum
|
||||
|
@ -236,7 +235,7 @@ public abstract class TermsEnum implements BytesRefIterator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
throw new IllegalStateException("this method should never be called");
|
||||
}
|
||||
|
||||
|
|
|
@ -38,6 +38,9 @@ final class BlockMaxConjunctionScorer extends Scorer {
|
|||
BlockMaxConjunctionScorer(Weight weight, Collection<Scorer> scorersList) throws IOException {
|
||||
super(weight);
|
||||
this.scorers = scorersList.toArray(new Scorer[scorersList.size()]);
|
||||
for (Scorer scorer : scorers) {
|
||||
scorer.advanceShallow(0);
|
||||
}
|
||||
this.maxScorePropagator = new MaxScoreSumPropagator(scorersList);
|
||||
|
||||
// Put scorers with the higher max scores first
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
|
@ -275,8 +274,8 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return actualEnum.impacts(scorer, flags);
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return actualEnum.impacts(flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
/**
|
||||
* Compute maximum scores based on {@link Impacts} and keep them in a cache in
|
||||
* order not to run expensive similarity score computations multiple times on
|
||||
* the same data.
|
||||
*/
|
||||
public final class MaxScoreCache {
|
||||
|
||||
private final ImpactsEnum impactsEnum;
|
||||
private final SimScorer scorer;
|
||||
private final float globalMaxScore;
|
||||
private float[] maxScoreCache;
|
||||
private int[] maxScoreCacheUpTo;
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
*/
|
||||
public MaxScoreCache(ImpactsEnum impactsEnum, SimScorer scorer) {
|
||||
this.impactsEnum = impactsEnum;
|
||||
this.scorer = scorer;
|
||||
globalMaxScore = scorer.score(Integer.MAX_VALUE, 1L);
|
||||
maxScoreCache = new float[0];
|
||||
maxScoreCacheUpTo = new int[0];
|
||||
}
|
||||
|
||||
private void ensureCacheSize(int size) {
|
||||
if (maxScoreCache.length < size) {
|
||||
int oldLength = maxScoreCache.length;
|
||||
maxScoreCache = ArrayUtil.grow(maxScoreCache, size);
|
||||
maxScoreCacheUpTo = Arrays.copyOf(maxScoreCacheUpTo, maxScoreCache.length);
|
||||
Arrays.fill(maxScoreCacheUpTo, oldLength, maxScoreCacheUpTo.length, -1);
|
||||
}
|
||||
}
|
||||
|
||||
private float computeMaxScore(List<Impact> impacts) {
|
||||
float maxScore = 0;
|
||||
for (Impact impact : impacts) {
|
||||
maxScore = Math.max(scorer.score(impact.freq, impact.norm), maxScore);
|
||||
}
|
||||
return maxScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the first level that includes all doc IDs up to {@code upTo},
|
||||
* or -1 if there is no such level.
|
||||
*/
|
||||
private int getLevel(int upTo) throws IOException {
|
||||
final Impacts impacts = impactsEnum.getImpacts();
|
||||
for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
|
||||
final int impactsUpTo = impacts.getDocIdUpTo(level);
|
||||
if (upTo <= impactsUpTo) {
|
||||
return level;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the maximum score for the given {@code level}.
|
||||
*/
|
||||
float getMaxScoreForLevel(int level) throws IOException {
|
||||
final Impacts impacts = impactsEnum.getImpacts();
|
||||
ensureCacheSize(level + 1);
|
||||
final int levelUpTo = impacts.getDocIdUpTo(level);
|
||||
if (maxScoreCacheUpTo[level] < levelUpTo) {
|
||||
maxScoreCache[level] = computeMaxScore(impacts.getImpacts(level));
|
||||
maxScoreCacheUpTo[level] = levelUpTo;
|
||||
}
|
||||
return maxScoreCache[level];
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the maximum level at which scores are all less than {@code minScore},
|
||||
* or -1 if none.
|
||||
*/
|
||||
int getSkipLevel(float minScore) throws IOException {
|
||||
final Impacts impacts = impactsEnum.getImpacts();
|
||||
final int numLevels = impacts.numLevels();
|
||||
for (int level = 0; level < numLevels; ++level) {
|
||||
if (getMaxScoreForLevel(level) >= minScore) {
|
||||
return level - 1;
|
||||
}
|
||||
}
|
||||
return numLevels - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement the contract of {@link Scorer#advanceShallow(int)} based on the
|
||||
* wrapped {@link ImpactsEnum}.
|
||||
* @see Scorer#advanceShallow(int)
|
||||
*/
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
impactsEnum.advanceShallow(target);
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
return impacts.getDocIdUpTo(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement the contract of {@link Scorer#getMaxScore(int)} based on the
|
||||
* wrapped {@link ImpactsEnum} and {@link Scorer}.
|
||||
* @see Scorer#getMaxScore(int)
|
||||
*/
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
final int level = getLevel(upTo);
|
||||
if (level == -1) {
|
||||
return globalMaxScore;
|
||||
} else {
|
||||
return getMaxScoreForLevel(level);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -161,8 +161,8 @@ public abstract class Scorer {
|
|||
* Advance to the block of documents that contains {@code target} in order to
|
||||
* get scoring information about this block. This method is implicitly called
|
||||
* by {@link DocIdSetIterator#advance(int)} and
|
||||
* {@link DocIdSetIterator#nextDoc()}. Calling this method doesn't modify the
|
||||
* current {@link DocIdSetIterator#docID()}.
|
||||
* {@link DocIdSetIterator#nextDoc()} on the returned doc ID. Calling this
|
||||
* method doesn't modify the current {@link DocIdSetIterator#docID()}.
|
||||
* It returns a number that is greater than or equal to all documents
|
||||
* contained in the current block, but less than any doc IDS of the next block.
|
||||
* {@code target} must be >= {@link #docID()} as well as all targets that
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.Impacts;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
|
@ -31,6 +32,7 @@ final class TermScorer extends Scorer {
|
|||
private final ImpactsEnum impactsEnum;
|
||||
private final DocIdSetIterator iterator;
|
||||
private final LeafSimScorer docScorer;
|
||||
private final MaxScoreCache maxScoreCache;
|
||||
private float minCompetitiveScore;
|
||||
|
||||
/**
|
||||
|
@ -47,7 +49,8 @@ final class TermScorer extends Scorer {
|
|||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||
impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.FREQS);
|
||||
impactsEnum = te.impacts(PostingsEnum.FREQS);
|
||||
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
|
||||
postingsEnum = impactsEnum;
|
||||
iterator = new DocIdSetIterator() {
|
||||
|
||||
|
@ -61,8 +64,10 @@ final class TermScorer extends Scorer {
|
|||
}
|
||||
|
||||
if (target > upTo) {
|
||||
upTo = impactsEnum.advanceShallow(target);
|
||||
maxScore = impactsEnum.getMaxScore(upTo);
|
||||
impactsEnum.advanceShallow(target);
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
upTo = impacts.getDocIdUpTo(0);
|
||||
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
|
@ -76,10 +81,23 @@ final class TermScorer extends Scorer {
|
|||
return NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
target = upTo + 1;
|
||||
|
||||
upTo = impactsEnum.advanceShallow(target);
|
||||
maxScore = impactsEnum.getMaxScore(upTo);
|
||||
impactsEnum.advanceShallow(upTo + 1);
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
final int level = maxScoreCache.getSkipLevel(minCompetitiveScore);
|
||||
if (level >= 0) {
|
||||
// we can skip more docs
|
||||
int newUpTo = impacts.getDocIdUpTo(level);
|
||||
if (newUpTo == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
target = newUpTo + 1;
|
||||
impactsEnum.advanceShallow(target);
|
||||
impacts = impactsEnum.getImpacts();
|
||||
} else {
|
||||
target = upTo + 1;
|
||||
}
|
||||
upTo = impacts.getDocIdUpTo(0);
|
||||
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -105,7 +123,8 @@ final class TermScorer extends Scorer {
|
|||
};
|
||||
} else {
|
||||
postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE);
|
||||
impactsEnum = new SlowImpactsEnum(postingsEnum, docScorer.getSimScorer().score(Float.MAX_VALUE, 1));
|
||||
impactsEnum = new SlowImpactsEnum(postingsEnum);
|
||||
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
|
||||
iterator = postingsEnum;
|
||||
}
|
||||
}
|
||||
|
@ -132,12 +151,12 @@ final class TermScorer extends Scorer {
|
|||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
return impactsEnum.advanceShallow(target);
|
||||
return maxScoreCache.advanceShallow(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
return impactsEnum.getMaxScore(upTo);
|
||||
return maxScoreCache.getMaxScore(upTo);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -148,4 +167,5 @@ final class TermScorer extends Scorer {
|
|||
/** Returns a string representation of this <code>TermScorer</code>. */
|
||||
@Override
|
||||
public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; }
|
||||
|
||||
}
|
||||
|
|
|
@ -139,6 +139,7 @@ final class WANDScorer extends Scorer {
|
|||
|
||||
OptionalInt scalingFactor = OptionalInt.empty();
|
||||
for (Scorer scorer : scorers) {
|
||||
scorer.advanceShallow(0);
|
||||
float maxScore = scorer.getMaxScore(DocIdSetIterator.NO_MORE_DOCS);
|
||||
if (maxScore != 0 && Float.isFinite(maxScore)) {
|
||||
// 0 and +Infty should not impact the scale
|
||||
|
|
|
@ -20,85 +20,85 @@ import java.util.Collections;
|
|||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm;
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestCompetitiveFreqNormAccumulator extends LuceneTestCase {
|
||||
|
||||
public void testBasics() {
|
||||
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
|
||||
Set<FreqAndNorm> expected = new HashSet<>();
|
||||
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
|
||||
Set<Impact> expected = new HashSet<>();
|
||||
|
||||
acc.add(3, 5);
|
||||
expected.add(new FreqAndNorm(3, 5));
|
||||
expected.add(new Impact(3, 5));
|
||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||
|
||||
acc.add(6, 11);
|
||||
expected.add(new FreqAndNorm(6, 11));
|
||||
expected.add(new Impact(6, 11));
|
||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||
|
||||
acc.add(10, 13);
|
||||
expected.add(new FreqAndNorm(10, 13));
|
||||
expected.add(new Impact(10, 13));
|
||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||
|
||||
acc.add(1, 2);
|
||||
expected.add(new FreqAndNorm(1, 2));
|
||||
expected.add(new Impact(1, 2));
|
||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||
|
||||
acc.add(7, 9);
|
||||
expected.remove(new FreqAndNorm(6, 11));
|
||||
expected.add(new FreqAndNorm(7, 9));
|
||||
expected.remove(new Impact(6, 11));
|
||||
expected.add(new Impact(7, 9));
|
||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||
|
||||
acc.add(8, 2);
|
||||
expected.clear();
|
||||
expected.add(new FreqAndNorm(10, 13));
|
||||
expected.add(new FreqAndNorm(8, 2));
|
||||
expected.add(new Impact(10, 13));
|
||||
expected.add(new Impact(8, 2));
|
||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||
}
|
||||
|
||||
public void testExtremeNorms() {
|
||||
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
|
||||
Set<FreqAndNorm> expected = new HashSet<>();
|
||||
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
|
||||
Set<Impact> expected = new HashSet<>();
|
||||
|
||||
acc.add(3, 5);
|
||||
expected.add(new FreqAndNorm(3, 5));
|
||||
expected.add(new Impact(3, 5));
|
||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||
|
||||
acc.add(10, 10000);
|
||||
expected.add(new FreqAndNorm(10, 10000));
|
||||
expected.add(new Impact(10, 10000));
|
||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||
|
||||
acc.add(5, 200);
|
||||
expected.add(new FreqAndNorm(5, 200));
|
||||
expected.add(new Impact(5, 200));
|
||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||
|
||||
acc.add(20, -100);
|
||||
expected.add(new FreqAndNorm(20, -100));
|
||||
expected.add(new Impact(20, -100));
|
||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||
|
||||
acc.add(30, -3);
|
||||
expected.add(new FreqAndNorm(30, -3));
|
||||
expected.add(new Impact(30, -3));
|
||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||
}
|
||||
|
||||
public void testOmitFreqs() {
|
||||
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
|
||||
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
|
||||
|
||||
acc.add(1, 5);
|
||||
acc.add(1, 7);
|
||||
acc.add(1, 4);
|
||||
|
||||
assertEquals(Collections.singleton(new FreqAndNorm(1, 4)), acc.getCompetitiveFreqNormPairs());
|
||||
assertEquals(Collections.singleton(new Impact(1, 4)), acc.getCompetitiveFreqNormPairs());
|
||||
}
|
||||
|
||||
public void testOmitNorms() {
|
||||
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
|
||||
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
|
||||
|
||||
acc.add(5, 1);
|
||||
acc.add(7, 1);
|
||||
acc.add(4, 1);
|
||||
|
||||
assertEquals(Collections.singleton(new FreqAndNorm(7, 1)), acc.getCompetitiveFreqNormPairs());
|
||||
assertEquals(Collections.singleton(new Impact(7, 1)), acc.getCompetitiveFreqNormPairs());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,19 +18,23 @@ package org.apache.lucene.codecs.lucene50;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
|
||||
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
|
||||
import org.apache.lucene.codecs.blocktree.FieldReader;
|
||||
import org.apache.lucene.codecs.blocktree.Stats;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50ScoreSkipReader.MutableImpactList;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
|
@ -89,33 +93,43 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
|
|||
|
||||
public void testImpactSerialization() throws IOException {
|
||||
// omit norms and omit freqs
|
||||
doTestImpactSerialization(new int[] { 1 }, new long[] { 1L });
|
||||
doTestImpactSerialization(Collections.singletonList(new Impact(1, 1L)));
|
||||
|
||||
// omit freqs
|
||||
doTestImpactSerialization(new int[] { 1 }, new long[] { 42L });
|
||||
doTestImpactSerialization(Collections.singletonList(new Impact(1, 42L)));
|
||||
// omit freqs with very large norms
|
||||
doTestImpactSerialization(new int[] { 1 }, new long[] { -100L });
|
||||
doTestImpactSerialization(Collections.singletonList(new Impact(1, -100L)));
|
||||
|
||||
// omit norms
|
||||
doTestImpactSerialization(new int[] { 30 }, new long[] { 1L });
|
||||
doTestImpactSerialization(Collections.singletonList(new Impact(30, 1L)));
|
||||
// omit norms with large freq
|
||||
doTestImpactSerialization(new int[] { 500 }, new long[] { 1L });
|
||||
doTestImpactSerialization(Collections.singletonList(new Impact(500, 1L)));
|
||||
|
||||
// freqs and norms, basic
|
||||
doTestImpactSerialization(
|
||||
new int[] { 1, 3, 7, 15, 20, 28 },
|
||||
new long[] { 7L, 9L, 10L, 11L, 13L, 14L });
|
||||
Arrays.asList(
|
||||
new Impact(1, 7L),
|
||||
new Impact(3, 9L),
|
||||
new Impact(7, 10L),
|
||||
new Impact(15, 11L),
|
||||
new Impact(20, 13L),
|
||||
new Impact(28, 14L)));
|
||||
|
||||
// freqs and norms, high values
|
||||
doTestImpactSerialization(
|
||||
new int[] { 2, 10, 12, 50, 1000, 1005 },
|
||||
new long[] { 2L, 10L, 50L, -100L, -80L, -3L });
|
||||
Arrays.asList(
|
||||
new Impact(2, 2L),
|
||||
new Impact(10, 10L),
|
||||
new Impact(12, 50L),
|
||||
new Impact(50, -100L),
|
||||
new Impact(1000, -80L),
|
||||
new Impact(1005, -3L)));
|
||||
}
|
||||
|
||||
private void doTestImpactSerialization(int[] freqs, long[] norms) throws IOException {
|
||||
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
|
||||
for (int i = 0; i < freqs.length; ++i) {
|
||||
acc.add(freqs[i], norms[i]);
|
||||
private void doTestImpactSerialization(List<Impact> impacts) throws IOException {
|
||||
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
|
||||
for (Impact impact : impacts) {
|
||||
acc.add(impact.freq, impact.norm);
|
||||
}
|
||||
try(Directory dir = newDirectory()) {
|
||||
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
|
||||
|
@ -124,17 +138,8 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
|
|||
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
|
||||
byte[] b = new byte[Math.toIntExact(in.length())];
|
||||
in.readBytes(b, 0, b.length);
|
||||
Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new SimScorer("") {
|
||||
int i = 0;
|
||||
|
||||
@Override
|
||||
public float score(float freq, long norm) {
|
||||
assert freq == freqs[i];
|
||||
assert norm == norms[i];
|
||||
i++;
|
||||
return 0;
|
||||
}
|
||||
});
|
||||
List<Impact> impacts2 = Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new MutableImpactList());
|
||||
assertEquals(impacts, impacts2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,6 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -680,7 +679,7 @@ public class TestCodecs extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,7 +42,6 @@ import org.apache.lucene.search.ScoreMode;
|
|||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.SimpleCollector;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -1429,8 +1428,8 @@ public class MemoryIndex {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1L));
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, flags));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,11 +21,10 @@ import java.io.IOException;
|
|||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.ImpactsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
|
@ -90,7 +89,7 @@ final class IDVersionPostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException("Should never be called, IDVersionSegmentTermsEnum implements impacts directly");
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.index.PostingsEnum;
|
|||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -1009,10 +1008,10 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
// Only one posting, the slow impl is fine
|
||||
// We could make this throw UOE but then CheckIndex is angry
|
||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
||||
return new SlowImpactsEnum(postings(null, flags));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -45,7 +45,6 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
import org.apache.lucene.index.SlowImpactsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
|
@ -477,8 +476,8 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1));
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,12 +18,12 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.PointValues.Relation;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -211,12 +211,12 @@ public class AssertingLeafReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
assertThread("Terms enums", creationThread);
|
||||
assert state == State.POSITIONED: "docs(...) called on unpositioned TermsEnum";
|
||||
assert (flags & PostingsEnum.FREQS) != 0 : "Freqs should be requested on impacts";
|
||||
|
||||
return new AssertingImpactsEnum(super.impacts(scorer, flags));
|
||||
return new AssertingImpactsEnum(super.impacts(flags));
|
||||
}
|
||||
|
||||
// TODO: we should separately track if we are 'at the end' ?
|
||||
|
@ -454,7 +454,7 @@ public class AssertingLeafReader extends FilterLeafReader {
|
|||
|
||||
private final AssertingPostingsEnum assertingPostings;
|
||||
private final ImpactsEnum in;
|
||||
private int lastShallowTarget;
|
||||
private int lastShallowTarget = -1;
|
||||
|
||||
AssertingImpactsEnum(ImpactsEnum impacts) {
|
||||
in = impacts;
|
||||
|
@ -463,20 +463,19 @@ public class AssertingLeafReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int advanceShallow(int target) throws IOException {
|
||||
public void advanceShallow(int target) throws IOException {
|
||||
assert target >= lastShallowTarget : "called on decreasing targets: target = " + target + " < last target = " + lastShallowTarget;
|
||||
assert target >= docID() : "target = " + target + " < docID = " + docID();
|
||||
int upTo = in.advanceShallow(target);
|
||||
assert upTo >= target : "upTo = " + upTo + " < target = " + target;
|
||||
lastShallowTarget = target;
|
||||
return upTo;
|
||||
in.advanceShallow(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
assert upTo >= lastShallowTarget : "uTo = " + upTo + " < last shallow target = " + lastShallowTarget;
|
||||
float maxScore = in.getMaxScore(upTo);
|
||||
return maxScore;
|
||||
public Impacts getImpacts() throws IOException {
|
||||
assert docID() >= 0 || lastShallowTarget >= 0 : "Cannot get impacts until the iterator is positioned or advanceShallow has been called";
|
||||
Impacts impacts = in.getImpacts();
|
||||
CheckIndex.checkImpacts(impacts, Math.max(docID(), lastShallowTarget));
|
||||
return new AssertingImpacts(impacts, this);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -527,6 +526,38 @@ public class AssertingLeafReader extends FilterLeafReader {
|
|||
}
|
||||
}
|
||||
|
||||
static class AssertingImpacts extends Impacts {
|
||||
|
||||
private final Impacts in;
|
||||
private final AssertingImpactsEnum impactsEnum;
|
||||
private final int validFor;
|
||||
|
||||
AssertingImpacts(Impacts in, AssertingImpactsEnum impactsEnum) {
|
||||
this.in = in;
|
||||
this.impactsEnum = impactsEnum;
|
||||
validFor = Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numLevels() {
|
||||
assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget) : "Cannot reuse impacts after advancing the iterator";
|
||||
return in.numLevels();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocIdUpTo(int level) {
|
||||
assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget) : "Cannot reuse impacts after advancing the iterator";
|
||||
return in.getDocIdUpTo(level);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Impact> getImpacts(int level) {
|
||||
assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget) : "Cannot reuse impacts after advancing the iterator";
|
||||
return in.getImpacts(level);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Wraps a NumericDocValues but with additional asserts */
|
||||
public static class AssertingNumericDocValues extends NumericDocValues {
|
||||
private final Thread creationThread = Thread.currentThread();
|
||||
|
|
|
@ -16,11 +16,18 @@
|
|||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -33,13 +40,13 @@ import java.util.Set;
|
|||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
import java.util.function.IntToLongFunction;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.NormsProducer;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FlushInfo;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
|
@ -55,12 +62,6 @@ import org.apache.lucene.util.automaton.AutomatonTestUtil;
|
|||
import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/** Helper class extracted from BasePostingsFormatTestCase to exercise a postings format. */
|
||||
public class RandomPostingsTester {
|
||||
|
||||
|
@ -608,7 +609,7 @@ public class RandomPostingsTester {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
@ -1055,126 +1056,146 @@ public class RandomPostingsTester {
|
|||
} else {
|
||||
docToNorm = doc -> 1L;
|
||||
}
|
||||
for (int s = 0; s < 3; ++s) {
|
||||
final int scoreMode = s;
|
||||
SimScorer scorer = new SimScorer(field) {
|
||||
@Override
|
||||
public float score(float freq, long norm) {
|
||||
switch (scoreMode) {
|
||||
case 0:
|
||||
return freq; // make sure the postings record the best freq
|
||||
case 1:
|
||||
return 1f / norm; // make sure the postings record the best norm
|
||||
default:
|
||||
return freq - norm + MAX_NORM; // now a combination that could make intermediate pairs more competitive
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// First check max scores and block uptos
|
||||
int max = -1;
|
||||
float maxScore = 0;
|
||||
int flags = PostingsEnum.FREQS;
|
||||
if (doCheckPositions) {
|
||||
flags |= PostingsEnum.POSITIONS;
|
||||
// First check impacts and block uptos
|
||||
int max = -1;
|
||||
List<Impact> impactsCopy = null;
|
||||
int flags = PostingsEnum.FREQS;
|
||||
if (doCheckPositions) {
|
||||
flags |= PostingsEnum.POSITIONS;
|
||||
if (doCheckOffsets) {
|
||||
flags |= PostingsEnum.OFFSETS;
|
||||
}
|
||||
if (doCheckPayloads) {
|
||||
flags |= PostingsEnum.PAYLOADS;
|
||||
}
|
||||
}
|
||||
|
||||
ImpactsEnum impactsEnum = termsEnum.impacts(flags);
|
||||
PostingsEnum postings = termsEnum.postings(null, flags);
|
||||
for (int doc = impactsEnum.nextDoc(); ; doc = impactsEnum.nextDoc()) {
|
||||
assertEquals(postings.nextDoc(), doc);
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
int freq = postings.freq();
|
||||
assertEquals("freq is wrong", freq, impactsEnum.freq());
|
||||
for (int i = 0; i < freq; ++i) {
|
||||
int pos = postings.nextPosition();
|
||||
assertEquals("position is wrong", pos, impactsEnum.nextPosition());
|
||||
if (doCheckOffsets) {
|
||||
flags |= PostingsEnum.OFFSETS;
|
||||
assertEquals("startOffset is wrong", postings.startOffset(), impactsEnum.startOffset());
|
||||
assertEquals("endOffset is wrong", postings.endOffset(), impactsEnum.endOffset());
|
||||
}
|
||||
if (doCheckPayloads) {
|
||||
flags |= PostingsEnum.PAYLOADS;
|
||||
assertEquals("payload is wrong", postings.getPayload(), impactsEnum.getPayload());
|
||||
}
|
||||
}
|
||||
if (doc > max) {
|
||||
impactsEnum.advanceShallow(doc);
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
CheckIndex.checkImpacts(impacts, doc);
|
||||
impactsCopy = impacts.getImpacts(0)
|
||||
.stream()
|
||||
.map(i -> new Impact(i.freq, i.norm))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
freq = impactsEnum.freq();
|
||||
long norm = docToNorm.applyAsLong(doc);
|
||||
int idx = Collections.binarySearch(impactsCopy, new Impact(freq, norm), Comparator.comparing(i -> i.freq));
|
||||
if (idx < 0) {
|
||||
idx = -1 - idx;
|
||||
}
|
||||
assertTrue("Got " + new Impact(freq, norm) + " in postings, but no impact triggers equal or better scores in " + impactsCopy,
|
||||
idx <= impactsCopy.size() && impactsCopy.get(idx).norm <= norm);
|
||||
}
|
||||
|
||||
// Now check advancing
|
||||
impactsEnum = termsEnum.impacts(flags);
|
||||
postings = termsEnum.postings(postings, flags);
|
||||
|
||||
max = -1;
|
||||
while (true) {
|
||||
int doc = impactsEnum.docID();
|
||||
boolean advance;
|
||||
int target;
|
||||
if (random.nextBoolean()) {
|
||||
advance = false;
|
||||
target = doc + 1;
|
||||
} else {
|
||||
advance = true;
|
||||
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
|
||||
target = impactsEnum.docID() + delta;
|
||||
}
|
||||
|
||||
if (target > max && random.nextBoolean()) {
|
||||
int delta = Math.min(random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - target);
|
||||
max = target + delta;
|
||||
|
||||
impactsEnum.advanceShallow(target);
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
CheckIndex.checkImpacts(impacts, target);
|
||||
impactsCopy = Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
|
||||
for (int level = 0; level < impacts.numLevels(); ++level) {
|
||||
if (impacts.getDocIdUpTo(level) >= max) {
|
||||
impactsCopy = impacts.getImpacts(level)
|
||||
.stream()
|
||||
.map(i -> new Impact(i.freq, i.norm))
|
||||
.collect(Collectors.toList());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ImpactsEnum impacts = termsEnum.impacts(scorer, flags);
|
||||
PostingsEnum postings = termsEnum.postings(null, flags);
|
||||
for (int doc = impacts.nextDoc(); ; doc = impacts.nextDoc()) {
|
||||
assertEquals(postings.nextDoc(), doc);
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
int freq = postings.freq();
|
||||
assertEquals("freq is wrong", freq, impacts.freq());
|
||||
for (int i = 0; i < freq; ++i) {
|
||||
int pos = postings.nextPosition();
|
||||
assertEquals("position is wrong", pos, impacts.nextPosition());
|
||||
if (doCheckOffsets) {
|
||||
assertEquals("startOffset is wrong", postings.startOffset(), impacts.startOffset());
|
||||
assertEquals("endOffset is wrong", postings.endOffset(), impacts.endOffset());
|
||||
}
|
||||
if (doCheckPayloads) {
|
||||
assertEquals("payload is wrong", postings.getPayload(), impacts.getPayload());
|
||||
}
|
||||
}
|
||||
if (doc > max) {
|
||||
max = impacts.advanceShallow(doc);
|
||||
assertTrue(max >= doc);
|
||||
maxScore = impacts.getMaxScore(max);
|
||||
}
|
||||
assertEquals(max, impacts.advanceShallow(doc));
|
||||
assertTrue(scorer.score(impacts.freq(), docToNorm.applyAsLong(doc)) <= maxScore);
|
||||
if (advance) {
|
||||
doc = impactsEnum.advance(target);
|
||||
} else {
|
||||
doc = impactsEnum.nextDoc();
|
||||
}
|
||||
|
||||
// Now check advancing
|
||||
impacts = termsEnum.impacts(scorer, flags);
|
||||
postings = termsEnum.postings(postings, flags);
|
||||
|
||||
max = -1;
|
||||
while (true) {
|
||||
int doc = impacts.docID();
|
||||
boolean advance;
|
||||
int target;
|
||||
if (random.nextBoolean()) {
|
||||
advance = false;
|
||||
target = doc + 1;
|
||||
} else {
|
||||
advance = true;
|
||||
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
|
||||
target = impacts.docID() + delta;
|
||||
}
|
||||
|
||||
if (target > max && random.nextBoolean()) {
|
||||
int delta = Math.min(random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - target);
|
||||
max = target + delta;
|
||||
int m = impacts.advanceShallow(target);
|
||||
assertTrue(m >= target);
|
||||
maxScore = impacts.getMaxScore(max);
|
||||
}
|
||||
|
||||
if (advance) {
|
||||
doc = impacts.advance(target);
|
||||
} else {
|
||||
doc = impacts.nextDoc();
|
||||
}
|
||||
|
||||
assertEquals(postings.advance(target), doc);
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
int freq = postings.freq();
|
||||
assertEquals("freq is wrong", freq, impacts.freq());
|
||||
for (int i = 0; i < postings.freq(); ++i) {
|
||||
int pos = postings.nextPosition();
|
||||
assertEquals("position is wrong", pos, impacts.nextPosition());
|
||||
if (doCheckOffsets) {
|
||||
assertEquals("startOffset is wrong", postings.startOffset(), impacts.startOffset());
|
||||
assertEquals("endOffset is wrong", postings.endOffset(), impacts.endOffset());
|
||||
}
|
||||
if (doCheckPayloads) {
|
||||
assertEquals("payload is wrong", postings.getPayload(), impacts.getPayload());
|
||||
}
|
||||
}
|
||||
|
||||
if (doc > max) {
|
||||
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
|
||||
max = doc + delta;
|
||||
int m = impacts.advanceShallow(doc);
|
||||
assertTrue(m >= doc);
|
||||
maxScore = impacts.getMaxScore(max);
|
||||
}
|
||||
|
||||
float score = scorer.score(impacts.freq(), docToNorm.applyAsLong(doc));
|
||||
assertTrue(score <= maxScore);
|
||||
assertEquals(postings.advance(target), doc);
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
int freq = postings.freq();
|
||||
assertEquals("freq is wrong", freq, impactsEnum.freq());
|
||||
for (int i = 0; i < postings.freq(); ++i) {
|
||||
int pos = postings.nextPosition();
|
||||
assertEquals("position is wrong", pos, impactsEnum.nextPosition());
|
||||
if (doCheckOffsets) {
|
||||
assertEquals("startOffset is wrong", postings.startOffset(), impactsEnum.startOffset());
|
||||
assertEquals("endOffset is wrong", postings.endOffset(), impactsEnum.endOffset());
|
||||
}
|
||||
if (doCheckPayloads) {
|
||||
assertEquals("payload is wrong", postings.getPayload(), impactsEnum.getPayload());
|
||||
}
|
||||
}
|
||||
|
||||
if (doc > max) {
|
||||
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
|
||||
max = doc + delta;
|
||||
Impacts impacts = impactsEnum.getImpacts();
|
||||
CheckIndex.checkImpacts(impacts, doc);
|
||||
impactsCopy = Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
|
||||
for (int level = 0; level < impacts.numLevels(); ++level) {
|
||||
if (impacts.getDocIdUpTo(level) >= max) {
|
||||
impactsCopy = impacts.getImpacts(level)
|
||||
.stream()
|
||||
.map(i -> new Impact(i.freq, i.norm))
|
||||
.collect(Collectors.toList());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
freq = impactsEnum.freq();
|
||||
long norm = docToNorm.applyAsLong(doc);
|
||||
int idx = Collections.binarySearch(impactsCopy, new Impact(freq, norm), Comparator.comparing(i -> i.freq));
|
||||
if (idx < 0) {
|
||||
idx = -1 - idx;
|
||||
}
|
||||
assertTrue("Got " + new Impact(freq, norm) + " in postings, but no impact triggers equal or better scores in " + impactsCopy,
|
||||
idx <= impactsCopy.size() && impactsCopy.get(idx).norm <= norm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -88,6 +88,7 @@ public class AssertingScorer extends Scorer {
|
|||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
assert upTo >= lastShallowTarget : "uTo = " + upTo + " < last target = " + lastShallowTarget;
|
||||
assert docID() >= 0 || lastShallowTarget >= 0 : "Cannot get max scores until the iterator is positioned or advanceShallow has been called";
|
||||
float maxScore = in.getMaxScore(upTo);
|
||||
return maxScore;
|
||||
}
|
||||
|
|
|
@ -632,7 +632,7 @@ public class CheckHits {
|
|||
Assert.assertTrue(twoPhase1 == null || twoPhase1.matches());
|
||||
float score = s2.score();
|
||||
Assert.assertEquals(s1.score(), score);
|
||||
Assert.assertTrue(score <= maxScore);
|
||||
Assert.assertTrue(score + " > " + maxScore + " up to " + upTo, score <= maxScore);
|
||||
|
||||
if (score >= minScore && random.nextInt(10) == 0) {
|
||||
// On some scorers, changing the min score changes the way that docs are iterated
|
||||
|
|
|
@ -43,7 +43,6 @@ import org.apache.lucene.search.ScoreMode;
|
|||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
|
@ -241,8 +240,8 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return te.impacts(scorer, flags);
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return te.impacts(flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -35,7 +35,6 @@ import org.apache.lucene.index.SortedSetDocValues;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -609,8 +608,8 @@ public class DocTermOrds implements Accountable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
||||
return termsEnum.impacts(scorer, flags);
|
||||
public ImpactsEnum impacts(int flags) throws IOException {
|
||||
return termsEnum.impacts(flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue