mirror of https://github.com/apache/lucene.git
LUCENE-8142: Make postings APIs expose raw impacts rather than scores.
This commit is contained in:
parent
555b7ef270
commit
af680af77f
|
@ -30,16 +30,15 @@ import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.ImpactsEnum;
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
|
@ -661,9 +660,9 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
decodeMetaData();
|
decodeMetaData();
|
||||||
return postingsReader.impacts(fieldInfo, state, scorer, flags);
|
return postingsReader.impacts(fieldInfo, state, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -208,9 +207,9 @@ final class OrdsIntersectTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
currentFrame.decodeMetaData();
|
currentFrame.decodeMetaData();
|
||||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, scorer, flags);
|
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getState() {
|
private int getState() {
|
||||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
@ -936,7 +935,7 @@ public final class OrdsSegmentTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
assert !eof;
|
assert !eof;
|
||||||
//if (DEBUG) {
|
//if (DEBUG) {
|
||||||
//System.out.println("BTTR.docs seg=" + segment);
|
//System.out.println("BTTR.docs seg=" + segment);
|
||||||
|
@ -945,7 +944,7 @@ public final class OrdsSegmentTermsEnum extends TermsEnum {
|
||||||
//if (DEBUG) {
|
//if (DEBUG) {
|
||||||
//System.out.println(" state=" + currentFrame.state);
|
//System.out.println(" state=" + currentFrame.state);
|
||||||
//}
|
//}
|
||||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, scorer, flags);
|
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -32,16 +32,15 @@ import org.apache.lucene.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.codecs.NormsProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult;
|
import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.ImpactsEnum;
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
@ -375,8 +374,8 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return delegate().impacts(scorer, flags);
|
return delegate().impacts(flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,6 @@ import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.RAMOutputStream;
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
|
@ -948,8 +947,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
return new SlowImpactsEnum(postings(null, flags));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1503,8 +1502,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
return new SlowImpactsEnum(postings(null, flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -31,18 +31,17 @@ import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.ImpactsEnum;
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
@ -435,9 +434,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
decodeMetaData();
|
decodeMetaData();
|
||||||
return postingsReader.impacts(fieldInfo, state, scorer, flags);
|
return postingsReader.impacts(fieldInfo, state, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: this can be achieved by making use of Util.getByOutput()
|
// TODO: this can be achieved by making use of Util.getByOutput()
|
||||||
|
|
|
@ -42,7 +42,6 @@ import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
|
@ -301,9 +300,9 @@ public class FSTTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
decodeMetaData();
|
decodeMetaData();
|
||||||
return postingsReader.impacts(fieldInfo, state, scorer, flags);
|
return postingsReader.impacts(fieldInfo, state, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -29,7 +29,6 @@ import java.util.concurrent.atomic.AtomicLong;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
@ -44,11 +43,11 @@ import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.IntsRefBuilder;
|
import org.apache.lucene.util.IntsRefBuilder;
|
||||||
import org.apache.lucene.util.PagedBytes;
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
|
|
||||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||||
|
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
|
||||||
|
import org.apache.lucene.util.fst.FST;
|
||||||
import org.apache.lucene.util.fst.FST.Arc;
|
import org.apache.lucene.util.fst.FST.Arc;
|
||||||
import org.apache.lucene.util.fst.FST.BytesReader;
|
import org.apache.lucene.util.fst.FST.BytesReader;
|
||||||
import org.apache.lucene.util.fst.FST;
|
|
||||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||||
import org.apache.lucene.util.fst.Util;
|
import org.apache.lucene.util.fst.Util;
|
||||||
import org.apache.lucene.util.packed.BlockPackedReader;
|
import org.apache.lucene.util.packed.BlockPackedReader;
|
||||||
|
@ -871,7 +870,7 @@ class MemoryDocValuesProducer extends DocValuesProducer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,19 +31,18 @@ import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.TermStats;
|
import org.apache.lucene.codecs.TermStats;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.ImpactsEnum;
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.SlowImpactsEnum;
|
import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
@ -819,8 +818,8 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
return new SlowImpactsEnum(postings(null, flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -36,7 +36,6 @@ import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SlowImpactsEnum;
|
import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
@ -234,8 +233,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
return new SlowImpactsEnum(postings(null, flags));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,15 +25,14 @@ import java.util.SortedMap;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.TermVectorsReader;
|
import org.apache.lucene.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.ImpactsEnum;
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.SlowImpactsEnum;
|
import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
|
@ -414,8 +413,8 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1));
|
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,23 +23,25 @@ import java.util.Iterator;
|
||||||
import java.util.SortedSet;
|
import java.util.SortedSet;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Impact;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class accumulates the (freq, norm) pairs that may produce competitive scores.
|
* This class accumulates the (freq, norm) pairs that may produce competitive scores.
|
||||||
*/
|
*/
|
||||||
public final class CompetitiveFreqNormAccumulator {
|
public final class CompetitiveImpactAccumulator {
|
||||||
|
|
||||||
// We speed up accumulation for common norm values by first computing
|
// We speed up accumulation for common norm values by first computing
|
||||||
// the max freq for all norms in -128..127
|
// the max freq for all norms in -128..127
|
||||||
private final int[] maxFreqs;
|
private final int[] maxFreqs;
|
||||||
private boolean dirty;
|
private boolean dirty;
|
||||||
private final TreeSet<FreqAndNorm> freqNormPairs;
|
private final TreeSet<Impact> freqNormPairs;
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public CompetitiveFreqNormAccumulator() {
|
public CompetitiveImpactAccumulator() {
|
||||||
maxFreqs = new int[256];
|
maxFreqs = new int[256];
|
||||||
Comparator<FreqAndNorm> comparator = new Comparator<CompetitiveFreqNormAccumulator.FreqAndNorm>() {
|
Comparator<Impact> comparator = new Comparator<Impact>() {
|
||||||
@Override
|
@Override
|
||||||
public int compare(FreqAndNorm o1, FreqAndNorm o2) {
|
public int compare(Impact o1, Impact o2) {
|
||||||
// greater freqs compare greater
|
// greater freqs compare greater
|
||||||
int cmp = Integer.compare(o1.freq, o2.freq);
|
int cmp = Integer.compare(o1.freq, o2.freq);
|
||||||
if (cmp == 0) {
|
if (cmp == 0) {
|
||||||
|
@ -59,44 +61,6 @@ public final class CompetitiveFreqNormAccumulator {
|
||||||
freqNormPairs.clear();
|
freqNormPairs.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* A (freq, norm) pair.
|
|
||||||
*/
|
|
||||||
public static class FreqAndNorm {
|
|
||||||
/** Doc-term frequency. */
|
|
||||||
public final int freq;
|
|
||||||
/** Normalization factor. */
|
|
||||||
public final long norm;
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public FreqAndNorm(int freq, long norm) {
|
|
||||||
this.freq = freq;
|
|
||||||
this.norm = norm;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (obj == null || obj instanceof FreqAndNorm == false) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
FreqAndNorm that = (FreqAndNorm) obj;
|
|
||||||
return freq == that.freq && norm == that.norm;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
int h = getClass().hashCode();
|
|
||||||
h = 31 * h + freq;
|
|
||||||
h = 31 * h + Long.hashCode(norm);
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "{" + freq + "," + norm + "}";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Accumulate a (freq,norm) pair, updating this structure if there is no
|
/** Accumulate a (freq,norm) pair, updating this structure if there is no
|
||||||
* equivalent or more competitive entry already. */
|
* equivalent or more competitive entry already. */
|
||||||
public void add(int freq, long norm) {
|
public void add(int freq, long norm) {
|
||||||
|
@ -105,23 +69,23 @@ public final class CompetitiveFreqNormAccumulator {
|
||||||
maxFreqs[index] = Math.max(maxFreqs[index], freq);
|
maxFreqs[index] = Math.max(maxFreqs[index], freq);
|
||||||
dirty = true;
|
dirty = true;
|
||||||
} else {
|
} else {
|
||||||
add(new FreqAndNorm(freq, norm));
|
add(new Impact(freq, norm));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Merge {@code acc} into this. */
|
/** Merge {@code acc} into this. */
|
||||||
public void addAll(CompetitiveFreqNormAccumulator acc) {
|
public void addAll(CompetitiveImpactAccumulator acc) {
|
||||||
for (FreqAndNorm entry : acc.getCompetitiveFreqNormPairs()) {
|
for (Impact entry : acc.getCompetitiveFreqNormPairs()) {
|
||||||
add(entry);
|
add(entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Get the set of competitive freq and norm pairs, orderer by increasing freq and norm. */
|
/** Get the set of competitive freq and norm pairs, orderer by increasing freq and norm. */
|
||||||
public SortedSet<FreqAndNorm> getCompetitiveFreqNormPairs() {
|
public SortedSet<Impact> getCompetitiveFreqNormPairs() {
|
||||||
if (dirty) {
|
if (dirty) {
|
||||||
for (int i = 0; i < maxFreqs.length; ++i) {
|
for (int i = 0; i < maxFreqs.length; ++i) {
|
||||||
if (maxFreqs[i] > 0) {
|
if (maxFreqs[i] > 0) {
|
||||||
add(new FreqAndNorm(maxFreqs[i], (byte) i));
|
add(new Impact(maxFreqs[i], (byte) i));
|
||||||
maxFreqs[i] = 0;
|
maxFreqs[i] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -130,8 +94,8 @@ public final class CompetitiveFreqNormAccumulator {
|
||||||
return Collections.unmodifiableSortedSet(freqNormPairs);
|
return Collections.unmodifiableSortedSet(freqNormPairs);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void add(FreqAndNorm newEntry) {
|
private void add(Impact newEntry) {
|
||||||
FreqAndNorm next = freqNormPairs.ceiling(newEntry);
|
Impact next = freqNormPairs.ceiling(newEntry);
|
||||||
if (next == null) {
|
if (next == null) {
|
||||||
// nothing is more competitive
|
// nothing is more competitive
|
||||||
freqNormPairs.add(newEntry);
|
freqNormPairs.add(newEntry);
|
||||||
|
@ -144,8 +108,8 @@ public final class CompetitiveFreqNormAccumulator {
|
||||||
freqNormPairs.add(newEntry);
|
freqNormPairs.add(newEntry);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Iterator<FreqAndNorm> it = freqNormPairs.headSet(newEntry, false).descendingIterator(); it.hasNext(); ) {
|
for (Iterator<Impact> it = freqNormPairs.headSet(newEntry, false).descendingIterator(); it.hasNext(); ) {
|
||||||
FreqAndNorm entry = it.next();
|
Impact entry = it.next();
|
||||||
if (Long.compareUnsigned(entry.norm, newEntry.norm) >= 0) {
|
if (Long.compareUnsigned(entry.norm, newEntry.norm) >= 0) {
|
||||||
// less competitive
|
// less competitive
|
||||||
it.remove();
|
it.remove();
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.ImpactsEnum;
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
|
@ -72,7 +71,7 @@ public abstract class PostingsReaderBase implements Closeable, Accountable {
|
||||||
* Return a {@link ImpactsEnum} that computes impacts with {@code scorer}.
|
* Return a {@link ImpactsEnum} that computes impacts with {@code scorer}.
|
||||||
* @see #postings(FieldInfo, BlockTermState, PostingsEnum, int)
|
* @see #postings(FieldInfo, BlockTermState, PostingsEnum, int)
|
||||||
*/
|
*/
|
||||||
public abstract ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, Similarity.SimScorer scorer, int flags) throws IOException;
|
public abstract ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks consistency of this reader.
|
* Checks consistency of this reader.
|
||||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -235,9 +234,9 @@ final class IntersectTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
currentFrame.decodeMetaData();
|
currentFrame.decodeMetaData();
|
||||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, scorer, flags);
|
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.termState, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getState() {
|
private int getState() {
|
||||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
@ -1005,7 +1004,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
assert !eof;
|
assert !eof;
|
||||||
//if (DEBUG) {
|
//if (DEBUG) {
|
||||||
//System.out.println("BTTR.docs seg=" + segment);
|
//System.out.println("BTTR.docs seg=" + segment);
|
||||||
|
@ -1014,7 +1013,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
||||||
//if (DEBUG) {
|
//if (DEBUG) {
|
||||||
//System.out.println(" state=" + currentFrame.state);
|
//System.out.println(" state=" + currentFrame.state);
|
||||||
//}
|
//}
|
||||||
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, scorer, flags);
|
return fr.parent.postingsReader.impacts(fr.fieldInfo, currentFrame.state, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -37,7 +37,6 @@ import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
|
@ -946,10 +945,9 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
final PostingsEnum delegate = postings(null, PostingsEnum.FREQS);
|
final PostingsEnum delegate = postings(null, PostingsEnum.FREQS);
|
||||||
final float maxScore = scorer.score(Float.MAX_VALUE, 1);
|
return new SlowImpactsEnum(delegate);
|
||||||
return new SlowImpactsEnum(delegate, maxScore);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,20 +19,19 @@ package org.apache.lucene.codecs.lucene50;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.BlockTermState;
|
import org.apache.lucene.codecs.BlockTermState;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
|
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.Impacts;
|
||||||
import org.apache.lucene.index.ImpactsEnum;
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SlowImpactsEnum;
|
import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
@ -239,13 +238,12 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException {
|
||||||
Objects.requireNonNull(scorer);
|
|
||||||
if (state.docFreq <= BLOCK_SIZE || version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
|
if (state.docFreq <= BLOCK_SIZE || version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
|
||||||
// no skip data
|
// no skip data
|
||||||
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags), scorer.score(Float.MAX_VALUE, 1));
|
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
|
||||||
}
|
}
|
||||||
return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, scorer, flags);
|
return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
final class BlockDocsEnum extends PostingsEnum {
|
final class BlockDocsEnum extends PostingsEnum {
|
||||||
|
@ -1367,7 +1365,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
private long seekTo = -1;
|
private long seekTo = -1;
|
||||||
|
|
||||||
public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, SimScorer scorer, int flags) throws IOException {
|
public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termState, int flags) throws IOException {
|
||||||
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||||
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
indexHasPos = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
@ -1440,8 +1438,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
||||||
MAX_SKIP_LEVELS,
|
MAX_SKIP_LEVELS,
|
||||||
indexHasPos,
|
indexHasPos,
|
||||||
indexHasOffsets,
|
indexHasOffsets,
|
||||||
indexHasPayloads,
|
indexHasPayloads);
|
||||||
scorer);
|
|
||||||
skipper.init(docTermStartFP+termState.skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
|
skipper.init(docTermStartFP+termState.skipOffset, docTermStartFP, posTermStartFP, payTermStartFP, docFreq);
|
||||||
|
|
||||||
if (indexHasFreq == false) {
|
if (indexHasFreq == false) {
|
||||||
|
@ -1544,17 +1541,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() throws IOException {
|
public void advanceShallow(int target) throws IOException {
|
||||||
return advance(doc + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
|
||||||
return skipper.getMaxScore(upTo);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int advanceShallow(int target) throws IOException {
|
|
||||||
if (target > nextSkipDoc) {
|
if (target > nextSkipDoc) {
|
||||||
// always plus one to fix the result, since skip position in Lucene50SkipReader
|
// always plus one to fix the result, since skip position in Lucene50SkipReader
|
||||||
// is a little different from MultiLevelSkipListReader
|
// is a little different from MultiLevelSkipListReader
|
||||||
|
@ -1580,7 +1567,17 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
||||||
nextSkipDoc = skipper.getNextSkipDoc();
|
nextSkipDoc = skipper.getNextSkipDoc();
|
||||||
}
|
}
|
||||||
assert nextSkipDoc >= target;
|
assert nextSkipDoc >= target;
|
||||||
return nextSkipDoc;
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Impacts getImpacts() throws IOException {
|
||||||
|
advanceShallow(doc);
|
||||||
|
return skipper.getImpacts();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return advance(doc + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -31,7 +31,7 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.BlockTermState;
|
import org.apache.lucene.codecs.BlockTermState;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
|
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
|
||||||
import org.apache.lucene.codecs.PushPostingsWriterBase;
|
import org.apache.lucene.codecs.PushPostingsWriterBase;
|
||||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
|
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
|
@ -101,7 +101,7 @@ public final class Lucene50PostingsWriter extends PushPostingsWriterBase {
|
||||||
|
|
||||||
private boolean fieldHasNorms;
|
private boolean fieldHasNorms;
|
||||||
private NumericDocValues norms;
|
private NumericDocValues norms;
|
||||||
private final CompetitiveFreqNormAccumulator competitiveFreqNormAccumulator = new CompetitiveFreqNormAccumulator();
|
private final CompetitiveImpactAccumulator competitiveFreqNormAccumulator = new CompetitiveImpactAccumulator();
|
||||||
|
|
||||||
/** Creates a postings writer */
|
/** Creates a postings writer */
|
||||||
public Lucene50PostingsWriter(SegmentWriteState state) throws IOException {
|
public Lucene50PostingsWriter(SegmentWriteState state) throws IOException {
|
||||||
|
|
|
@ -17,90 +17,143 @@
|
||||||
package org.apache.lucene.codecs.lucene50;
|
package org.apache.lucene.codecs.lucene50;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.AbstractList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Objects;
|
import java.util.List;
|
||||||
|
import java.util.RandomAccess;
|
||||||
|
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
import org.apache.lucene.index.Impact;
|
||||||
|
import org.apache.lucene.index.Impacts;
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
|
||||||
final class Lucene50ScoreSkipReader extends Lucene50SkipReader {
|
final class Lucene50ScoreSkipReader extends Lucene50SkipReader {
|
||||||
|
|
||||||
private final SimScorer scorer;
|
private final byte[][] impactData;
|
||||||
private final float[] maxScore;
|
private final int[] impactDataLength;
|
||||||
private final byte[][] impacts;
|
|
||||||
private final int[] impactsLength;
|
|
||||||
private final float globalMaxScore;
|
|
||||||
private final ByteArrayDataInput badi = new ByteArrayDataInput();
|
private final ByteArrayDataInput badi = new ByteArrayDataInput();
|
||||||
|
private final Impacts impacts;
|
||||||
|
private int numLevels = 1;
|
||||||
|
private final MutableImpactList[] perLevelImpacts;
|
||||||
|
|
||||||
public Lucene50ScoreSkipReader(int version, IndexInput skipStream, int maxSkipLevels,
|
public Lucene50ScoreSkipReader(int version, IndexInput skipStream, int maxSkipLevels,
|
||||||
boolean hasPos, boolean hasOffsets, boolean hasPayloads, SimScorer scorer) {
|
boolean hasPos, boolean hasOffsets, boolean hasPayloads) {
|
||||||
super(version, skipStream, maxSkipLevels, hasPos, hasOffsets, hasPayloads);
|
super(version, skipStream, maxSkipLevels, hasPos, hasOffsets, hasPayloads);
|
||||||
if (version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
|
if (version < Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
|
||||||
throw new IllegalStateException("Cannot skip based on scores if impacts are not indexed");
|
throw new IllegalStateException("Cannot skip based on scores if impacts are not indexed");
|
||||||
}
|
}
|
||||||
this.scorer = Objects.requireNonNull(scorer);
|
this.impactData = new byte[maxSkipLevels][];
|
||||||
this.maxScore = new float[maxSkipLevels];
|
Arrays.fill(impactData, new byte[0]);
|
||||||
this.impacts = new byte[maxSkipLevels][];
|
this.impactDataLength = new int[maxSkipLevels];
|
||||||
Arrays.fill(impacts, new byte[0]);
|
this.perLevelImpacts = new MutableImpactList[maxSkipLevels];
|
||||||
this.impactsLength = new int[maxSkipLevels];
|
for (int i = 0; i < perLevelImpacts.length; ++i) {
|
||||||
this.globalMaxScore = scorer.score(Float.MAX_VALUE, 1);
|
perLevelImpacts[i] = new MutableImpactList();
|
||||||
|
}
|
||||||
|
impacts = new Impacts() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int numLevels() {
|
||||||
|
return numLevels;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getDocIdUpTo(int level) {
|
||||||
|
return skipDoc[level];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Impact> getImpacts(int level) {
|
||||||
|
assert level < numLevels;
|
||||||
|
if (impactDataLength[level] > 0) {
|
||||||
|
badi.reset(impactData[level], 0, impactDataLength[level]);
|
||||||
|
perLevelImpacts[level] = readImpacts(badi, perLevelImpacts[level]);
|
||||||
|
impactDataLength[level] = 0;
|
||||||
|
}
|
||||||
|
return perLevelImpacts[level];
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init(long skipPointer, long docBasePointer, long posBasePointer, long payBasePointer, int df) throws IOException {
|
public int skipTo(int target) throws IOException {
|
||||||
super.init(skipPointer, docBasePointer, posBasePointer, payBasePointer, df);
|
int result = super.skipTo(target);
|
||||||
Arrays.fill(impactsLength, 0);
|
if (numberOfSkipLevels > 0) {
|
||||||
Arrays.fill(maxScore, globalMaxScore);
|
numLevels = numberOfSkipLevels;
|
||||||
|
} else {
|
||||||
|
// End of postings don't have skip data anymore, so we fill with dummy data
|
||||||
|
// like SlowImpactsEnum.
|
||||||
|
numLevels = 1;
|
||||||
|
perLevelImpacts[0].length = 1;
|
||||||
|
perLevelImpacts[0].impacts[0].freq = Integer.MAX_VALUE;
|
||||||
|
perLevelImpacts[0].impacts[0].norm = 1L;
|
||||||
|
impactDataLength[0] = 0;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Upper bound of scores up to {@code upTo} included. */
|
Impacts getImpacts() {
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
return impacts;
|
||||||
for (int level = 0; level < numberOfSkipLevels; ++level) {
|
|
||||||
if (upTo <= skipDoc[level]) {
|
|
||||||
return maxScore(level);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return globalMaxScore;
|
|
||||||
}
|
|
||||||
|
|
||||||
private float maxScore(int level) throws IOException {
|
|
||||||
assert level < numberOfSkipLevels;
|
|
||||||
if (impactsLength[level] > 0) {
|
|
||||||
badi.reset(impacts[level], 0, impactsLength[level]);
|
|
||||||
maxScore[level] = readImpacts(badi, scorer);
|
|
||||||
impactsLength[level] = 0;
|
|
||||||
}
|
|
||||||
return maxScore[level];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void readImpacts(int level, IndexInput skipStream) throws IOException {
|
protected void readImpacts(int level, IndexInput skipStream) throws IOException {
|
||||||
int length = skipStream.readVInt();
|
int length = skipStream.readVInt();
|
||||||
if (impacts[level].length < length) {
|
if (impactData[level].length < length) {
|
||||||
impacts[level] = new byte[ArrayUtil.oversize(length, Byte.BYTES)];
|
impactData[level] = new byte[ArrayUtil.oversize(length, Byte.BYTES)];
|
||||||
}
|
}
|
||||||
skipStream.readBytes(impacts[level], 0, length);
|
skipStream.readBytes(impactData[level], 0, length);
|
||||||
impactsLength[level] = length;
|
impactDataLength[level] = length;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float readImpacts(ByteArrayDataInput in, SimScorer scorer) throws IOException {
|
static MutableImpactList readImpacts(ByteArrayDataInput in, MutableImpactList reuse) {
|
||||||
|
int maxNumImpacts = in.length(); // at most one impact per byte
|
||||||
|
if (reuse.impacts.length < maxNumImpacts) {
|
||||||
|
int oldLength = reuse.impacts.length;
|
||||||
|
reuse.impacts = ArrayUtil.grow(reuse.impacts, maxNumImpacts);
|
||||||
|
for (int i = oldLength; i < reuse.impacts.length; ++i) {
|
||||||
|
reuse.impacts[i] = new Impact(Integer.MAX_VALUE, 1L);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int freq = 0;
|
int freq = 0;
|
||||||
long norm = 0;
|
long norm = 0;
|
||||||
float maxScore = 0;
|
int length = 0;
|
||||||
while (in.getPosition() < in.length()) {
|
while (in.getPosition() < in.length()) {
|
||||||
int freqDelta = in.readVInt();
|
int freqDelta = in.readVInt();
|
||||||
if ((freqDelta & 0x01) != 0) {
|
if ((freqDelta & 0x01) != 0) {
|
||||||
freq += 1 + (freqDelta >>> 1);
|
freq += 1 + (freqDelta >>> 1);
|
||||||
norm += 1 + in.readZLong();
|
try {
|
||||||
|
norm += 1 + in.readZLong();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e); // cannot happen on a BADI
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
freq += 1 + (freqDelta >>> 1);
|
freq += 1 + (freqDelta >>> 1);
|
||||||
norm++;
|
norm++;
|
||||||
}
|
}
|
||||||
maxScore = Math.max(maxScore, scorer.score(freq, norm));
|
Impact impact = reuse.impacts[length];
|
||||||
|
impact.freq = freq;
|
||||||
|
impact.norm = norm;
|
||||||
|
length++;
|
||||||
|
}
|
||||||
|
reuse.length = length;
|
||||||
|
return reuse;
|
||||||
|
}
|
||||||
|
|
||||||
|
static class MutableImpactList extends AbstractList<Impact> implements RandomAccess {
|
||||||
|
int length = 1;
|
||||||
|
Impact[] impacts = new Impact[] { new Impact(Integer.MAX_VALUE, 1L) };
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Impact get(int index) {
|
||||||
|
return impacts[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return length;
|
||||||
}
|
}
|
||||||
return maxScore;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -200,8 +200,7 @@ class Lucene50SkipReader extends MultiLevelSkipListReader {
|
||||||
return delta;
|
return delta;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The default impl skips impacts since they are only useful if we have a SimScorer
|
// The default impl skips impacts
|
||||||
// to compute the scores that impacts map to.
|
|
||||||
protected void readImpacts(int level, IndexInput skipStream) throws IOException {
|
protected void readImpacts(int level, IndexInput skipStream) throws IOException {
|
||||||
if (version >= Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
|
if (version >= Lucene50PostingsFormat.VERSION_IMPACT_SKIP_DATA) {
|
||||||
// The base implementation skips impacts, they are not used
|
// The base implementation skips impacts, they are not used
|
||||||
|
|
|
@ -22,9 +22,9 @@ import java.util.Arrays;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.SortedSet;
|
import java.util.SortedSet;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
|
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
|
||||||
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm;
|
|
||||||
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
|
import org.apache.lucene.codecs.MultiLevelSkipListWriter;
|
||||||
|
import org.apache.lucene.index.Impact;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.RAMOutputStream;
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
||||||
private long curPayPointer;
|
private long curPayPointer;
|
||||||
private int curPosBufferUpto;
|
private int curPosBufferUpto;
|
||||||
private int curPayloadByteUpto;
|
private int curPayloadByteUpto;
|
||||||
private CompetitiveFreqNormAccumulator[] curCompetitiveFreqNorms;
|
private CompetitiveImpactAccumulator[] curCompetitiveFreqNorms;
|
||||||
private boolean fieldHasPositions;
|
private boolean fieldHasPositions;
|
||||||
private boolean fieldHasOffsets;
|
private boolean fieldHasOffsets;
|
||||||
private boolean fieldHasPayloads;
|
private boolean fieldHasPayloads;
|
||||||
|
@ -85,9 +85,9 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
||||||
}
|
}
|
||||||
lastPayloadByteUpto = new int[maxSkipLevels];
|
lastPayloadByteUpto = new int[maxSkipLevels];
|
||||||
}
|
}
|
||||||
curCompetitiveFreqNorms = new CompetitiveFreqNormAccumulator[maxSkipLevels];
|
curCompetitiveFreqNorms = new CompetitiveImpactAccumulator[maxSkipLevels];
|
||||||
for (int i = 0; i < maxSkipLevels; ++i) {
|
for (int i = 0; i < maxSkipLevels; ++i) {
|
||||||
curCompetitiveFreqNorms[i] = new CompetitiveFreqNormAccumulator();
|
curCompetitiveFreqNorms[i] = new CompetitiveImpactAccumulator();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -116,7 +116,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (initialized) {
|
if (initialized) {
|
||||||
for (CompetitiveFreqNormAccumulator acc : curCompetitiveFreqNorms) {
|
for (CompetitiveImpactAccumulator acc : curCompetitiveFreqNorms) {
|
||||||
acc.clear();
|
acc.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -139,7 +139,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
||||||
}
|
}
|
||||||
// sets of competitive freq,norm pairs should be empty at this point
|
// sets of competitive freq,norm pairs should be empty at this point
|
||||||
assert Arrays.stream(curCompetitiveFreqNorms)
|
assert Arrays.stream(curCompetitiveFreqNorms)
|
||||||
.map(CompetitiveFreqNormAccumulator::getCompetitiveFreqNormPairs)
|
.map(CompetitiveImpactAccumulator::getCompetitiveFreqNormPairs)
|
||||||
.mapToInt(Set::size)
|
.mapToInt(Set::size)
|
||||||
.sum() == 0;
|
.sum() == 0;
|
||||||
initialized = true;
|
initialized = true;
|
||||||
|
@ -149,7 +149,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
||||||
/**
|
/**
|
||||||
* Sets the values for the current skip data.
|
* Sets the values for the current skip data.
|
||||||
*/
|
*/
|
||||||
public void bufferSkip(int doc, CompetitiveFreqNormAccumulator competitiveFreqNorms,
|
public void bufferSkip(int doc, CompetitiveImpactAccumulator competitiveFreqNorms,
|
||||||
int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
|
int numDocs, long posFP, long payFP, int posBufferUpto, int payloadByteUpto) throws IOException {
|
||||||
initSkip();
|
initSkip();
|
||||||
this.curDoc = doc;
|
this.curDoc = doc;
|
||||||
|
@ -191,7 +191,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CompetitiveFreqNormAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
|
CompetitiveImpactAccumulator competitiveFreqNorms = curCompetitiveFreqNorms[level];
|
||||||
assert competitiveFreqNorms.getCompetitiveFreqNormPairs().size() > 0;
|
assert competitiveFreqNorms.getCompetitiveFreqNormPairs().size() > 0;
|
||||||
if (level + 1 < numberOfSkipLevels) {
|
if (level + 1 < numberOfSkipLevels) {
|
||||||
curCompetitiveFreqNorms[level + 1].addAll(competitiveFreqNorms);
|
curCompetitiveFreqNorms[level + 1].addAll(competitiveFreqNorms);
|
||||||
|
@ -203,14 +203,14 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
||||||
competitiveFreqNorms.clear();
|
competitiveFreqNorms.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void writeImpacts(CompetitiveFreqNormAccumulator acc, IndexOutput out) throws IOException {
|
static void writeImpacts(CompetitiveImpactAccumulator acc, IndexOutput out) throws IOException {
|
||||||
SortedSet<FreqAndNorm> freqAndNorms = acc.getCompetitiveFreqNormPairs();
|
SortedSet<Impact> impacts = acc.getCompetitiveFreqNormPairs();
|
||||||
FreqAndNorm previous = new FreqAndNorm(0, 0);
|
Impact previous = new Impact(0, 0);
|
||||||
for (FreqAndNorm freqAndNorm : freqAndNorms) {
|
for (Impact impact : impacts) {
|
||||||
assert freqAndNorm.freq > previous.freq;
|
assert impact.freq > previous.freq;
|
||||||
assert Long.compareUnsigned(freqAndNorm.norm, previous.norm) > 0;
|
assert Long.compareUnsigned(impact.norm, previous.norm) > 0;
|
||||||
int freqDelta = freqAndNorm.freq - previous.freq - 1;
|
int freqDelta = impact.freq - previous.freq - 1;
|
||||||
long normDelta = freqAndNorm.norm - previous.norm - 1;
|
long normDelta = impact.norm - previous.norm - 1;
|
||||||
if (normDelta == 0) {
|
if (normDelta == 0) {
|
||||||
// most of time, norm only increases by 1, so we can fold everything in a single byte
|
// most of time, norm only increases by 1, so we can fold everything in a single byte
|
||||||
out.writeVInt(freqDelta << 1);
|
out.writeVInt(freqDelta << 1);
|
||||||
|
@ -218,7 +218,7 @@ final class Lucene50SkipWriter extends MultiLevelSkipListWriter {
|
||||||
out.writeVInt((freqDelta << 1) | 1);
|
out.writeVInt((freqDelta << 1) | 1);
|
||||||
out.writeZLong(normDelta);
|
out.writeZLong(normDelta);
|
||||||
}
|
}
|
||||||
previous = freqAndNorm;
|
previous = impact;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,6 @@ import org.apache.lucene.index.SortedNumericDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.RandomAccessInput;
|
import org.apache.lucene.store.RandomAccessInput;
|
||||||
|
@ -1160,7 +1159,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Explanation;
|
import org.apache.lucene.search.Explanation;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.MaxScoreCache;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.ScoreMode;
|
import org.apache.lucene.search.ScoreMode;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
|
@ -114,7 +115,8 @@ final class FeatureQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
SimScorer scorer = function.scorer(fieldName, boost);
|
SimScorer scorer = function.scorer(fieldName, boost);
|
||||||
ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
|
ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
|
||||||
|
MaxScoreCache maxScoreCache = new MaxScoreCache(impacts, scorer);
|
||||||
|
|
||||||
return new Scorer(this) {
|
return new Scorer(this) {
|
||||||
|
|
||||||
|
@ -135,12 +137,12 @@ final class FeatureQuery extends Query {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advanceShallow(int target) throws IOException {
|
public int advanceShallow(int target) throws IOException {
|
||||||
return impacts.advanceShallow(target);
|
return maxScoreCache.advanceShallow(target);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
public float getMaxScore(int upTo) throws IOException {
|
||||||
return impacts.getMaxScore(upTo);
|
return maxScoreCache.getMaxScore(upTo);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -27,6 +27,7 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Deque;
|
import java.util.Deque;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
@ -48,7 +49,6 @@ import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
import org.apache.lucene.search.LeafFieldComparator;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
@ -1602,58 +1602,45 @@ public final class CheckIndex implements Closeable {
|
||||||
// Checking score blocks is heavy, we only do it on long postings lists, on every 1024th term
|
// Checking score blocks is heavy, we only do it on long postings lists, on every 1024th term
|
||||||
// or if slow checks are enabled.
|
// or if slow checks are enabled.
|
||||||
if (doSlowChecks || docFreq > 1024 || (status.termCount + status.delTermCount) % 1024 == 0) {
|
if (doSlowChecks || docFreq > 1024 || (status.termCount + status.delTermCount) % 1024 == 0) {
|
||||||
// Test score blocks
|
|
||||||
// We only score on freq to keep things simple and not pull norms
|
|
||||||
SimScorer scorer = new SimScorer(field) {
|
|
||||||
@Override
|
|
||||||
public float score(float freq, long norm) {
|
|
||||||
return freq;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// First check max scores and block uptos
|
// First check max scores and block uptos
|
||||||
// But only if slok checks are enabled since we visit all docs
|
// But only if slok checks are enabled since we visit all docs
|
||||||
if (doSlowChecks) {
|
if (doSlowChecks) {
|
||||||
int max = -1;
|
int max = -1;
|
||||||
float maxScore = 0;
|
int maxFreq = 0;
|
||||||
ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
|
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
|
||||||
postings = termsEnum.postings(postings, PostingsEnum.FREQS);
|
postings = termsEnum.postings(postings, PostingsEnum.FREQS);
|
||||||
for (int doc = impacts.nextDoc(); ; doc = impacts.nextDoc()) {
|
for (int doc = impactsEnum.nextDoc(); ; doc = impactsEnum.nextDoc()) {
|
||||||
if (postings.nextDoc() != doc) {
|
if (postings.nextDoc() != doc) {
|
||||||
throw new RuntimeException("Wrong next doc: " + doc + ", expected " + postings.docID());
|
throw new RuntimeException("Wrong next doc: " + doc + ", expected " + postings.docID());
|
||||||
}
|
}
|
||||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (postings.freq() != impacts.freq()) {
|
if (postings.freq() != impactsEnum.freq()) {
|
||||||
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq());
|
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impactsEnum.freq());
|
||||||
}
|
}
|
||||||
if (doc > max) {
|
if (doc > max) {
|
||||||
max = impacts.advanceShallow(doc);
|
impactsEnum.advanceShallow(doc);
|
||||||
if (max < doc) {
|
Impacts impacts = impactsEnum.getImpacts();
|
||||||
throw new RuntimeException("max block doc id " + max + " must be greater than the target: " + doc);
|
checkImpacts(impacts, doc);
|
||||||
}
|
max = impacts.getDocIdUpTo(0);
|
||||||
maxScore = impacts.getMaxScore(max);
|
List<Impact> impacts0 = impacts.getImpacts(0);
|
||||||
|
maxFreq = impacts0.get(impacts0.size() - 1).freq;
|
||||||
}
|
}
|
||||||
int max2 = impacts.advanceShallow(doc);
|
if (impactsEnum.freq() > maxFreq) {
|
||||||
if (max != max2) {
|
throw new RuntimeException("freq " + impactsEnum.freq() + " is greater than the max freq according to impacts " + maxFreq);
|
||||||
throw new RuntimeException("max is not stable, initially had " + max + " but now " + max2);
|
|
||||||
}
|
|
||||||
float score = scorer.score(impacts.freq(), 1);
|
|
||||||
if (score > maxScore) {
|
|
||||||
throw new RuntimeException("score " + score + " is greater than the max score " + maxScore);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now check advancing
|
// Now check advancing
|
||||||
ImpactsEnum impacts = termsEnum.impacts(scorer, PostingsEnum.FREQS);
|
ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
|
||||||
postings = termsEnum.postings(postings, PostingsEnum.FREQS);
|
postings = termsEnum.postings(postings, PostingsEnum.FREQS);
|
||||||
|
|
||||||
int max = -1;
|
int max = -1;
|
||||||
float maxScore = 0;
|
int maxFreq = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
int doc = impacts.docID();
|
int doc = impactsEnum.docID();
|
||||||
boolean advance;
|
boolean advance;
|
||||||
int target;
|
int target;
|
||||||
if (((field.hashCode() + doc) & 1) == 1) {
|
if (((field.hashCode() + doc) & 1) == 1) {
|
||||||
|
@ -1662,23 +1649,29 @@ public final class CheckIndex implements Closeable {
|
||||||
} else {
|
} else {
|
||||||
advance = true;
|
advance = true;
|
||||||
int delta = Math.min(1 + ((31 * field.hashCode() + doc) & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
|
int delta = Math.min(1 + ((31 * field.hashCode() + doc) & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
|
||||||
target = impacts.docID() + delta;
|
target = impactsEnum.docID() + delta;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (target > max && target % 2 == 1) {
|
if (target > max && target % 2 == 1) {
|
||||||
int delta = Math.min((31 * field.hashCode() + target) & 0x1ff, DocIdSetIterator.NO_MORE_DOCS - target);
|
int delta = Math.min((31 * field.hashCode() + target) & 0x1ff, DocIdSetIterator.NO_MORE_DOCS - target);
|
||||||
max = target + delta;
|
max = target + delta;
|
||||||
int m = impacts.advanceShallow(target);
|
impactsEnum.advanceShallow(target);
|
||||||
if (m < target) {
|
Impacts impacts = impactsEnum.getImpacts();
|
||||||
throw new RuntimeException("Block max doc: " + m + " is less than the target " + target);
|
checkImpacts(impacts, doc);
|
||||||
|
maxFreq = Integer.MAX_VALUE;
|
||||||
|
for (int level = 0; level < impacts.numLevels(); ++level) {
|
||||||
|
if (impacts.getDocIdUpTo(level) >= max) {
|
||||||
|
List<Impact> perLevelImpacts = impacts.getImpacts(level);
|
||||||
|
maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
maxScore = impacts.getMaxScore(max);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (advance) {
|
if (advance) {
|
||||||
doc = impacts.advance(target);
|
doc = impactsEnum.advance(target);
|
||||||
} else {
|
} else {
|
||||||
doc = impacts.nextDoc();
|
doc = impactsEnum.nextDoc();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (postings.advance(target) != doc) {
|
if (postings.advance(target) != doc) {
|
||||||
|
@ -1687,23 +1680,28 @@ public final class CheckIndex implements Closeable {
|
||||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (postings.freq() != impacts.freq()) {
|
if (postings.freq() != impactsEnum.freq()) {
|
||||||
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impacts.freq());
|
throw new RuntimeException("Wrong freq, expected " + postings.freq() + ", but got " + impactsEnum.freq());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doc >= max) {
|
if (doc >= max) {
|
||||||
int delta = Math.min((31 * field.hashCode() + target & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
|
int delta = Math.min((31 * field.hashCode() + target & 0x1ff), DocIdSetIterator.NO_MORE_DOCS - doc);
|
||||||
max = doc + delta;
|
max = doc + delta;
|
||||||
int m = impacts.advanceShallow(doc);
|
impactsEnum.advanceShallow(doc);
|
||||||
if (m < doc) {
|
Impacts impacts = impactsEnum.getImpacts();
|
||||||
throw new RuntimeException("Block max doc: " + m + " is less than the target " + doc);
|
checkImpacts(impacts, doc);
|
||||||
|
maxFreq = Integer.MAX_VALUE;
|
||||||
|
for (int level = 0; level < impacts.numLevels(); ++level) {
|
||||||
|
if (impacts.getDocIdUpTo(level) >= max) {
|
||||||
|
List<Impact> perLevelImpacts = impacts.getImpacts(level);
|
||||||
|
maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
maxScore = impacts.getMaxScore(max);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
float score = scorer.score(impacts.freq(), 1);
|
if (impactsEnum.freq() > maxFreq) {
|
||||||
if (score > maxScore) {
|
throw new RuntimeException("Term frequency " + impactsEnum.freq() + " is greater than the max freq according to impacts " + maxFreq);
|
||||||
throw new RuntimeException("score " + score + " is greater than the max score " + maxScore);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1850,6 +1848,68 @@ public final class CheckIndex implements Closeable {
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void checkImpacts(Impacts impacts, int lastTarget) {
|
||||||
|
final int numLevels = impacts.numLevels();
|
||||||
|
if (numLevels < 1) {
|
||||||
|
throw new RuntimeException("The number of levels must be >= 1, got " + numLevels);
|
||||||
|
}
|
||||||
|
|
||||||
|
int docIdUpTo0 = impacts.getDocIdUpTo(0);
|
||||||
|
if (docIdUpTo0 < lastTarget) {
|
||||||
|
throw new RuntimeException("getDocIdUpTo returned " + docIdUpTo0 + " on level 0, which is less than the target " + lastTarget);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int level = 1; level < numLevels; ++level) {
|
||||||
|
int docIdUpTo = impacts.getDocIdUpTo(level);
|
||||||
|
int previousDocIdUpTo = impacts.getDocIdUpTo(level - 1);
|
||||||
|
if (docIdUpTo < previousDocIdUpTo) {
|
||||||
|
throw new RuntimeException("Decreasing return for getDocIdUpTo: level " + (level-1) + " returned " + previousDocIdUpTo
|
||||||
|
+ " but level " + level + " returned " + docIdUpTo + " for target " + lastTarget);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int level = 0; level < numLevels; ++level) {
|
||||||
|
List<Impact> perLevelImpacts = impacts.getImpacts(level);
|
||||||
|
if (perLevelImpacts.isEmpty()) {
|
||||||
|
throw new RuntimeException("Got empty list of impacts on level " + level);
|
||||||
|
}
|
||||||
|
Impact first = perLevelImpacts.get(0);
|
||||||
|
if (first.freq < 1) {
|
||||||
|
throw new RuntimeException("First impact had a freq <= 0: " + first);
|
||||||
|
}
|
||||||
|
if (first.norm == 0) {
|
||||||
|
throw new RuntimeException("First impact had a norm == 0: " + first);
|
||||||
|
}
|
||||||
|
// Impacts must be in increasing order of norm AND freq
|
||||||
|
Impact previous = first;
|
||||||
|
for (int i = 1; i < perLevelImpacts.size(); ++i) {
|
||||||
|
Impact impact = perLevelImpacts.get(i);
|
||||||
|
if (impact.freq <= previous.freq || Long.compareUnsigned(impact.norm, previous.norm) <= 0) {
|
||||||
|
throw new RuntimeException("Impacts are not ordered or contain dups, got " + previous + " then " + impact);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (level > 0) {
|
||||||
|
// Make sure that impacts at level N trigger better scores than an level N-1
|
||||||
|
Iterator<Impact> previousIt = impacts.getImpacts(level-1).iterator();
|
||||||
|
previous = previousIt.next();
|
||||||
|
Iterator<Impact> it = perLevelImpacts.iterator();
|
||||||
|
Impact impact = it.next();
|
||||||
|
while (previousIt.hasNext()) {
|
||||||
|
previous = previousIt.next();
|
||||||
|
if (previous.freq <= impact.freq && Long.compareUnsigned(previous.norm, impact.norm) >= 0) {
|
||||||
|
// previous triggers a lower score than the current impact, all good
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (it.hasNext() == false) {
|
||||||
|
throw new RuntimeException("Found impact " + previous + " on level " + (level-1) + " but no impact on level "
|
||||||
|
+ level + " triggers a better score: " + perLevelImpacts);
|
||||||
|
}
|
||||||
|
impact = it.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the term index.
|
* Test the term index.
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -216,8 +215,8 @@ public abstract class FilterLeafReader extends LeafReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return in.impacts(scorer, flags);
|
return in.impacts(flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -184,8 +183,8 @@ public abstract class FilteredTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return tenum.impacts(scorer, flags);
|
return tenum.impacts(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This enum does not support seeking!
|
/** This enum does not support seeking!
|
||||||
|
|
|
@ -24,7 +24,6 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
|
import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
@ -275,7 +274,7 @@ class FreqProxFields extends Fields {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per-document scoring factors.
|
||||||
|
*/
|
||||||
|
public final class Impact {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Term frequency of the term in the document.
|
||||||
|
*/
|
||||||
|
public int freq;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Norm factor of the document.
|
||||||
|
*/
|
||||||
|
public long norm;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
*/
|
||||||
|
public Impact(int freq, long norm) {
|
||||||
|
this.freq = freq;
|
||||||
|
this.norm = norm;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "{freq=" + freq + ",norm=" + norm + "}";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int h = freq;
|
||||||
|
h = 31 * h + Long.hashCode(norm);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (obj == null || getClass() != obj.getClass()) return false;
|
||||||
|
Impact other = (Impact) obj;
|
||||||
|
return freq == other.freq && norm == other.norm;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,51 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Information about upcoming impacts, ie. (freq, norm) pairs.
|
||||||
|
*/
|
||||||
|
public abstract class Impacts {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the number of levels on which we have impacts.
|
||||||
|
* The returned value is always greater than 0 and may not always be the
|
||||||
|
* same, even on a single postings list, depending on the current doc ID.
|
||||||
|
*/
|
||||||
|
public abstract int numLevels();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the maximum inclusive doc ID until which the list of impacts
|
||||||
|
* returned by {@link #getImpacts(int)} is valid. This is a non-decreasing
|
||||||
|
* function of {@code level}.
|
||||||
|
*/
|
||||||
|
public abstract int getDocIdUpTo(int level);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return impacts on the given level. These impacts are sorted by increasing
|
||||||
|
* frequency and increasing unsigned norm, and only valid until the doc ID
|
||||||
|
* returned by {@link #getDocIdUpTo(int)} for the same level, included.
|
||||||
|
* The returned list is never empty.
|
||||||
|
* NOTE: There is no guarantee that these impacts actually appear in postings,
|
||||||
|
* only that they trigger scores that are greater than or equal to the impacts
|
||||||
|
* that actually appear in postings.
|
||||||
|
*/
|
||||||
|
public abstract List<Impact> getImpacts(int level);
|
||||||
|
|
||||||
|
}
|
|
@ -18,11 +18,9 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extension of {@link PostingsEnum} which also provides information about the
|
* Extension of {@link PostingsEnum} which also provides information about
|
||||||
* produced scores.
|
* upcoming impacts.
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public abstract class ImpactsEnum extends PostingsEnum {
|
public abstract class ImpactsEnum extends PostingsEnum {
|
||||||
|
@ -31,23 +29,28 @@ public abstract class ImpactsEnum extends PostingsEnum {
|
||||||
protected ImpactsEnum() {}
|
protected ImpactsEnum() {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Advance to the block of documents that contains {@code target} in order to
|
* Shallow-advance to {@code target}. This is cheaper than calling
|
||||||
* get scoring information about this block. This method is implicitly called
|
* {@link #advance(int)} and allows further calls to {@link #getImpacts()}
|
||||||
* by {@link DocIdSetIterator#advance(int)} and
|
* to ignore doc IDs that are less than {@code target} in order to get more
|
||||||
* {@link DocIdSetIterator#nextDoc()}. Calling this method doesn't modify the
|
* precise information about impacts.
|
||||||
* current {@link DocIdSetIterator#docID()}.
|
* This method may not be called on targets that are less than the current
|
||||||
* It returns a number that is greater than or equal to all documents
|
* {@link #docID()}.
|
||||||
* contained in the current block, but less than any doc IDS of the next block.
|
* After this method has been called, {@link #nextDoc()} may not be called
|
||||||
* {@code target} must be >= {@link #docID()} as well as all targets that
|
* if the current doc ID is less than {@code target - 1} and
|
||||||
* have been passed to {@link #advanceShallow(int)} so far.
|
* {@link #advance(int)} may not be called on targets that are less than
|
||||||
|
* {@code target}.
|
||||||
*/
|
*/
|
||||||
public abstract int advanceShallow(int target) throws IOException;
|
public abstract void advanceShallow(int target) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the maximum score that documents between the last {@code target}
|
* Get information about upcoming impacts for doc ids that are greater than
|
||||||
* that this iterator was {@link #advanceShallow(int) shallow-advanced} to
|
* or equal to the maximum of {@link #docID()} and the last target that was
|
||||||
* included and {@code upTo} included.
|
* passed to {@link #advanceShallow(int)}.
|
||||||
|
* This method may not be called on an unpositioned iterator on which
|
||||||
|
* {@link #advanceShallow(int)} has never been called.
|
||||||
|
* NOTE: advancing this iterator may invalidate the returned impacts, so they
|
||||||
|
* should not be used after the iterator has been advanced.
|
||||||
*/
|
*/
|
||||||
public abstract float getMaxScore(int upTo) throws IOException;
|
public abstract Impacts getImpacts() throws IOException;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,6 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
@ -369,9 +368,9 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
// implemented to not fail CheckIndex, but you shouldn't be using impacts on a slow reader
|
// implemented to not fail CheckIndex, but you shouldn't be using impacts on a slow reader
|
||||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
return new SlowImpactsEnum(postings(null, flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
final static class TermsEnumWithSlice {
|
final static class TermsEnumWithSlice {
|
||||||
|
|
|
@ -17,23 +17,45 @@
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@link ImpactsEnum} that doesn't index impacts but implements the API in a
|
* {@link ImpactsEnum} that doesn't index impacts but implements the API in a
|
||||||
* legal way. This should typically be used for short postings that do not need
|
* legal way. This is typically used for short postings that do not need
|
||||||
* skipping.
|
* skipping.
|
||||||
*/
|
*/
|
||||||
public final class SlowImpactsEnum extends ImpactsEnum {
|
public final class SlowImpactsEnum extends ImpactsEnum {
|
||||||
|
|
||||||
|
private static final Impacts DUMMY_IMPACTS = new Impacts() {
|
||||||
|
|
||||||
|
private final List<Impact> impacts = Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int numLevels() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getDocIdUpTo(int level) {
|
||||||
|
return DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Impact> getImpacts(int level) {
|
||||||
|
return impacts;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
private final PostingsEnum delegate;
|
private final PostingsEnum delegate;
|
||||||
private final float maxScore;
|
|
||||||
|
|
||||||
/** Wrap the given {@link PostingsEnum}. */
|
/** Wrap the given {@link PostingsEnum}. */
|
||||||
public SlowImpactsEnum(PostingsEnum delegate, float maxScore) {
|
public SlowImpactsEnum(PostingsEnum delegate) {
|
||||||
this.delegate = delegate;
|
this.delegate = delegate;
|
||||||
this.maxScore = maxScore;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -82,13 +104,10 @@ public final class SlowImpactsEnum extends ImpactsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advanceShallow(int target) {
|
public void advanceShallow(int target) {}
|
||||||
return NO_MORE_DOCS;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getMaxScore(int maxDoc) {
|
public Impacts getImpacts() {
|
||||||
return maxScore;
|
return DUMMY_IMPACTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
|
||||||
|
@ -111,7 +110,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
|
||||||
|
@ -111,7 +110,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefIterator;
|
import org.apache.lucene.util.BytesRefIterator;
|
||||||
|
@ -171,10 +170,10 @@ public abstract class TermsEnum implements BytesRefIterator {
|
||||||
public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException;
|
public abstract PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a {@link ImpactsEnum} that computes impacts with {@code scorer}.
|
* Return a {@link ImpactsEnum}.
|
||||||
* @see #postings(PostingsEnum, int)
|
* @see #postings(PostingsEnum, int)
|
||||||
*/
|
*/
|
||||||
public abstract ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException;
|
public abstract ImpactsEnum impacts(int flags) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expert: Returns the TermsEnums internal state to position the TermsEnum
|
* Expert: Returns the TermsEnums internal state to position the TermsEnum
|
||||||
|
@ -236,7 +235,7 @@ public abstract class TermsEnum implements BytesRefIterator {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
throw new IllegalStateException("this method should never be called");
|
throw new IllegalStateException("this method should never be called");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,6 +38,9 @@ final class BlockMaxConjunctionScorer extends Scorer {
|
||||||
BlockMaxConjunctionScorer(Weight weight, Collection<Scorer> scorersList) throws IOException {
|
BlockMaxConjunctionScorer(Weight weight, Collection<Scorer> scorersList) throws IOException {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.scorers = scorersList.toArray(new Scorer[scorersList.size()]);
|
this.scorers = scorersList.toArray(new Scorer[scorersList.size()]);
|
||||||
|
for (Scorer scorer : scorers) {
|
||||||
|
scorer.advanceShallow(0);
|
||||||
|
}
|
||||||
this.maxScorePropagator = new MaxScoreSumPropagator(scorersList);
|
this.maxScorePropagator = new MaxScoreSumPropagator(scorersList);
|
||||||
|
|
||||||
// Put scorers with the higher max scores first
|
// Put scorers with the higher max scores first
|
||||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.Attribute;
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
import org.apache.lucene.util.AttributeReflector;
|
import org.apache.lucene.util.AttributeReflector;
|
||||||
|
@ -275,8 +274,8 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return actualEnum.impacts(scorer, flags);
|
return actualEnum.impacts(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,138 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Impact;
|
||||||
|
import org.apache.lucene.index.Impacts;
|
||||||
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute maximum scores based on {@link Impacts} and keep them in a cache in
|
||||||
|
* order not to run expensive similarity score computations multiple times on
|
||||||
|
* the same data.
|
||||||
|
*/
|
||||||
|
public final class MaxScoreCache {
|
||||||
|
|
||||||
|
private final ImpactsEnum impactsEnum;
|
||||||
|
private final SimScorer scorer;
|
||||||
|
private final float globalMaxScore;
|
||||||
|
private float[] maxScoreCache;
|
||||||
|
private int[] maxScoreCacheUpTo;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sole constructor.
|
||||||
|
*/
|
||||||
|
public MaxScoreCache(ImpactsEnum impactsEnum, SimScorer scorer) {
|
||||||
|
this.impactsEnum = impactsEnum;
|
||||||
|
this.scorer = scorer;
|
||||||
|
globalMaxScore = scorer.score(Integer.MAX_VALUE, 1L);
|
||||||
|
maxScoreCache = new float[0];
|
||||||
|
maxScoreCacheUpTo = new int[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ensureCacheSize(int size) {
|
||||||
|
if (maxScoreCache.length < size) {
|
||||||
|
int oldLength = maxScoreCache.length;
|
||||||
|
maxScoreCache = ArrayUtil.grow(maxScoreCache, size);
|
||||||
|
maxScoreCacheUpTo = Arrays.copyOf(maxScoreCacheUpTo, maxScoreCache.length);
|
||||||
|
Arrays.fill(maxScoreCacheUpTo, oldLength, maxScoreCacheUpTo.length, -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private float computeMaxScore(List<Impact> impacts) {
|
||||||
|
float maxScore = 0;
|
||||||
|
for (Impact impact : impacts) {
|
||||||
|
maxScore = Math.max(scorer.score(impact.freq, impact.norm), maxScore);
|
||||||
|
}
|
||||||
|
return maxScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the first level that includes all doc IDs up to {@code upTo},
|
||||||
|
* or -1 if there is no such level.
|
||||||
|
*/
|
||||||
|
private int getLevel(int upTo) throws IOException {
|
||||||
|
final Impacts impacts = impactsEnum.getImpacts();
|
||||||
|
for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
|
||||||
|
final int impactsUpTo = impacts.getDocIdUpTo(level);
|
||||||
|
if (upTo <= impactsUpTo) {
|
||||||
|
return level;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the maximum score for the given {@code level}.
|
||||||
|
*/
|
||||||
|
float getMaxScoreForLevel(int level) throws IOException {
|
||||||
|
final Impacts impacts = impactsEnum.getImpacts();
|
||||||
|
ensureCacheSize(level + 1);
|
||||||
|
final int levelUpTo = impacts.getDocIdUpTo(level);
|
||||||
|
if (maxScoreCacheUpTo[level] < levelUpTo) {
|
||||||
|
maxScoreCache[level] = computeMaxScore(impacts.getImpacts(level));
|
||||||
|
maxScoreCacheUpTo[level] = levelUpTo;
|
||||||
|
}
|
||||||
|
return maxScoreCache[level];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the maximum level at which scores are all less than {@code minScore},
|
||||||
|
* or -1 if none.
|
||||||
|
*/
|
||||||
|
int getSkipLevel(float minScore) throws IOException {
|
||||||
|
final Impacts impacts = impactsEnum.getImpacts();
|
||||||
|
final int numLevels = impacts.numLevels();
|
||||||
|
for (int level = 0; level < numLevels; ++level) {
|
||||||
|
if (getMaxScoreForLevel(level) >= minScore) {
|
||||||
|
return level - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return numLevels - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement the contract of {@link Scorer#advanceShallow(int)} based on the
|
||||||
|
* wrapped {@link ImpactsEnum}.
|
||||||
|
* @see Scorer#advanceShallow(int)
|
||||||
|
*/
|
||||||
|
public int advanceShallow(int target) throws IOException {
|
||||||
|
impactsEnum.advanceShallow(target);
|
||||||
|
Impacts impacts = impactsEnum.getImpacts();
|
||||||
|
return impacts.getDocIdUpTo(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implement the contract of {@link Scorer#getMaxScore(int)} based on the
|
||||||
|
* wrapped {@link ImpactsEnum} and {@link Scorer}.
|
||||||
|
* @see Scorer#getMaxScore(int)
|
||||||
|
*/
|
||||||
|
public float getMaxScore(int upTo) throws IOException {
|
||||||
|
final int level = getLevel(upTo);
|
||||||
|
if (level == -1) {
|
||||||
|
return globalMaxScore;
|
||||||
|
} else {
|
||||||
|
return getMaxScoreForLevel(level);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -161,8 +161,8 @@ public abstract class Scorer {
|
||||||
* Advance to the block of documents that contains {@code target} in order to
|
* Advance to the block of documents that contains {@code target} in order to
|
||||||
* get scoring information about this block. This method is implicitly called
|
* get scoring information about this block. This method is implicitly called
|
||||||
* by {@link DocIdSetIterator#advance(int)} and
|
* by {@link DocIdSetIterator#advance(int)} and
|
||||||
* {@link DocIdSetIterator#nextDoc()}. Calling this method doesn't modify the
|
* {@link DocIdSetIterator#nextDoc()} on the returned doc ID. Calling this
|
||||||
* current {@link DocIdSetIterator#docID()}.
|
* method doesn't modify the current {@link DocIdSetIterator#docID()}.
|
||||||
* It returns a number that is greater than or equal to all documents
|
* It returns a number that is greater than or equal to all documents
|
||||||
* contained in the current block, but less than any doc IDS of the next block.
|
* contained in the current block, but less than any doc IDS of the next block.
|
||||||
* {@code target} must be >= {@link #docID()} as well as all targets that
|
* {@code target} must be >= {@link #docID()} as well as all targets that
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Impacts;
|
||||||
import org.apache.lucene.index.ImpactsEnum;
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SlowImpactsEnum;
|
import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
|
@ -31,6 +32,7 @@ final class TermScorer extends Scorer {
|
||||||
private final ImpactsEnum impactsEnum;
|
private final ImpactsEnum impactsEnum;
|
||||||
private final DocIdSetIterator iterator;
|
private final DocIdSetIterator iterator;
|
||||||
private final LeafSimScorer docScorer;
|
private final LeafSimScorer docScorer;
|
||||||
|
private final MaxScoreCache maxScoreCache;
|
||||||
private float minCompetitiveScore;
|
private float minCompetitiveScore;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -47,7 +49,8 @@ final class TermScorer extends Scorer {
|
||||||
super(weight);
|
super(weight);
|
||||||
this.docScorer = docScorer;
|
this.docScorer = docScorer;
|
||||||
if (scoreMode == ScoreMode.TOP_SCORES) {
|
if (scoreMode == ScoreMode.TOP_SCORES) {
|
||||||
impactsEnum = te.impacts(docScorer.getSimScorer(), PostingsEnum.FREQS);
|
impactsEnum = te.impacts(PostingsEnum.FREQS);
|
||||||
|
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
|
||||||
postingsEnum = impactsEnum;
|
postingsEnum = impactsEnum;
|
||||||
iterator = new DocIdSetIterator() {
|
iterator = new DocIdSetIterator() {
|
||||||
|
|
||||||
|
@ -61,8 +64,10 @@ final class TermScorer extends Scorer {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (target > upTo) {
|
if (target > upTo) {
|
||||||
upTo = impactsEnum.advanceShallow(target);
|
impactsEnum.advanceShallow(target);
|
||||||
maxScore = impactsEnum.getMaxScore(upTo);
|
Impacts impacts = impactsEnum.getImpacts();
|
||||||
|
upTo = impacts.getDocIdUpTo(0);
|
||||||
|
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -76,10 +81,23 @@ final class TermScorer extends Scorer {
|
||||||
return NO_MORE_DOCS;
|
return NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
|
|
||||||
target = upTo + 1;
|
impactsEnum.advanceShallow(upTo + 1);
|
||||||
|
Impacts impacts = impactsEnum.getImpacts();
|
||||||
upTo = impactsEnum.advanceShallow(target);
|
final int level = maxScoreCache.getSkipLevel(minCompetitiveScore);
|
||||||
maxScore = impactsEnum.getMaxScore(upTo);
|
if (level >= 0) {
|
||||||
|
// we can skip more docs
|
||||||
|
int newUpTo = impacts.getDocIdUpTo(level);
|
||||||
|
if (newUpTo == NO_MORE_DOCS) {
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
target = newUpTo + 1;
|
||||||
|
impactsEnum.advanceShallow(target);
|
||||||
|
impacts = impactsEnum.getImpacts();
|
||||||
|
} else {
|
||||||
|
target = upTo + 1;
|
||||||
|
}
|
||||||
|
upTo = impacts.getDocIdUpTo(0);
|
||||||
|
maxScore = maxScoreCache.getMaxScoreForLevel(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,7 +123,8 @@ final class TermScorer extends Scorer {
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE);
|
postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE);
|
||||||
impactsEnum = new SlowImpactsEnum(postingsEnum, docScorer.getSimScorer().score(Float.MAX_VALUE, 1));
|
impactsEnum = new SlowImpactsEnum(postingsEnum);
|
||||||
|
maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
|
||||||
iterator = postingsEnum;
|
iterator = postingsEnum;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -132,12 +151,12 @@ final class TermScorer extends Scorer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advanceShallow(int target) throws IOException {
|
public int advanceShallow(int target) throws IOException {
|
||||||
return impactsEnum.advanceShallow(target);
|
return maxScoreCache.advanceShallow(target);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
public float getMaxScore(int upTo) throws IOException {
|
||||||
return impactsEnum.getMaxScore(upTo);
|
return maxScoreCache.getMaxScore(upTo);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -148,4 +167,5 @@ final class TermScorer extends Scorer {
|
||||||
/** Returns a string representation of this <code>TermScorer</code>. */
|
/** Returns a string representation of this <code>TermScorer</code>. */
|
||||||
@Override
|
@Override
|
||||||
public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; }
|
public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; }
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -139,6 +139,7 @@ final class WANDScorer extends Scorer {
|
||||||
|
|
||||||
OptionalInt scalingFactor = OptionalInt.empty();
|
OptionalInt scalingFactor = OptionalInt.empty();
|
||||||
for (Scorer scorer : scorers) {
|
for (Scorer scorer : scorers) {
|
||||||
|
scorer.advanceShallow(0);
|
||||||
float maxScore = scorer.getMaxScore(DocIdSetIterator.NO_MORE_DOCS);
|
float maxScore = scorer.getMaxScore(DocIdSetIterator.NO_MORE_DOCS);
|
||||||
if (maxScore != 0 && Float.isFinite(maxScore)) {
|
if (maxScore != 0 && Float.isFinite(maxScore)) {
|
||||||
// 0 and +Infty should not impact the scale
|
// 0 and +Infty should not impact the scale
|
||||||
|
|
|
@ -20,85 +20,85 @@ import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator.FreqAndNorm;
|
import org.apache.lucene.index.Impact;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
public class TestCompetitiveFreqNormAccumulator extends LuceneTestCase {
|
public class TestCompetitiveFreqNormAccumulator extends LuceneTestCase {
|
||||||
|
|
||||||
public void testBasics() {
|
public void testBasics() {
|
||||||
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
|
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
|
||||||
Set<FreqAndNorm> expected = new HashSet<>();
|
Set<Impact> expected = new HashSet<>();
|
||||||
|
|
||||||
acc.add(3, 5);
|
acc.add(3, 5);
|
||||||
expected.add(new FreqAndNorm(3, 5));
|
expected.add(new Impact(3, 5));
|
||||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||||
|
|
||||||
acc.add(6, 11);
|
acc.add(6, 11);
|
||||||
expected.add(new FreqAndNorm(6, 11));
|
expected.add(new Impact(6, 11));
|
||||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||||
|
|
||||||
acc.add(10, 13);
|
acc.add(10, 13);
|
||||||
expected.add(new FreqAndNorm(10, 13));
|
expected.add(new Impact(10, 13));
|
||||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||||
|
|
||||||
acc.add(1, 2);
|
acc.add(1, 2);
|
||||||
expected.add(new FreqAndNorm(1, 2));
|
expected.add(new Impact(1, 2));
|
||||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||||
|
|
||||||
acc.add(7, 9);
|
acc.add(7, 9);
|
||||||
expected.remove(new FreqAndNorm(6, 11));
|
expected.remove(new Impact(6, 11));
|
||||||
expected.add(new FreqAndNorm(7, 9));
|
expected.add(new Impact(7, 9));
|
||||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||||
|
|
||||||
acc.add(8, 2);
|
acc.add(8, 2);
|
||||||
expected.clear();
|
expected.clear();
|
||||||
expected.add(new FreqAndNorm(10, 13));
|
expected.add(new Impact(10, 13));
|
||||||
expected.add(new FreqAndNorm(8, 2));
|
expected.add(new Impact(8, 2));
|
||||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testExtremeNorms() {
|
public void testExtremeNorms() {
|
||||||
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
|
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
|
||||||
Set<FreqAndNorm> expected = new HashSet<>();
|
Set<Impact> expected = new HashSet<>();
|
||||||
|
|
||||||
acc.add(3, 5);
|
acc.add(3, 5);
|
||||||
expected.add(new FreqAndNorm(3, 5));
|
expected.add(new Impact(3, 5));
|
||||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||||
|
|
||||||
acc.add(10, 10000);
|
acc.add(10, 10000);
|
||||||
expected.add(new FreqAndNorm(10, 10000));
|
expected.add(new Impact(10, 10000));
|
||||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||||
|
|
||||||
acc.add(5, 200);
|
acc.add(5, 200);
|
||||||
expected.add(new FreqAndNorm(5, 200));
|
expected.add(new Impact(5, 200));
|
||||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||||
|
|
||||||
acc.add(20, -100);
|
acc.add(20, -100);
|
||||||
expected.add(new FreqAndNorm(20, -100));
|
expected.add(new Impact(20, -100));
|
||||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||||
|
|
||||||
acc.add(30, -3);
|
acc.add(30, -3);
|
||||||
expected.add(new FreqAndNorm(30, -3));
|
expected.add(new Impact(30, -3));
|
||||||
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
assertEquals(expected, acc.getCompetitiveFreqNormPairs());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testOmitFreqs() {
|
public void testOmitFreqs() {
|
||||||
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
|
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
|
||||||
|
|
||||||
acc.add(1, 5);
|
acc.add(1, 5);
|
||||||
acc.add(1, 7);
|
acc.add(1, 7);
|
||||||
acc.add(1, 4);
|
acc.add(1, 4);
|
||||||
|
|
||||||
assertEquals(Collections.singleton(new FreqAndNorm(1, 4)), acc.getCompetitiveFreqNormPairs());
|
assertEquals(Collections.singleton(new Impact(1, 4)), acc.getCompetitiveFreqNormPairs());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testOmitNorms() {
|
public void testOmitNorms() {
|
||||||
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
|
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
|
||||||
|
|
||||||
acc.add(5, 1);
|
acc.add(5, 1);
|
||||||
acc.add(7, 1);
|
acc.add(7, 1);
|
||||||
acc.add(4, 1);
|
acc.add(4, 1);
|
||||||
|
|
||||||
assertEquals(Collections.singleton(new FreqAndNorm(7, 1)), acc.getCompetitiveFreqNormPairs());
|
assertEquals(Collections.singleton(new Impact(7, 1)), acc.getCompetitiveFreqNormPairs());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,19 +18,23 @@ package org.apache.lucene.codecs.lucene50;
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.CompetitiveFreqNormAccumulator;
|
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
|
||||||
import org.apache.lucene.codecs.blocktree.FieldReader;
|
import org.apache.lucene.codecs.blocktree.FieldReader;
|
||||||
import org.apache.lucene.codecs.blocktree.Stats;
|
import org.apache.lucene.codecs.blocktree.Stats;
|
||||||
|
import org.apache.lucene.codecs.lucene50.Lucene50ScoreSkipReader.MutableImpactList;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.Impact;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
@ -89,33 +93,43 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
|
||||||
|
|
||||||
public void testImpactSerialization() throws IOException {
|
public void testImpactSerialization() throws IOException {
|
||||||
// omit norms and omit freqs
|
// omit norms and omit freqs
|
||||||
doTestImpactSerialization(new int[] { 1 }, new long[] { 1L });
|
doTestImpactSerialization(Collections.singletonList(new Impact(1, 1L)));
|
||||||
|
|
||||||
// omit freqs
|
// omit freqs
|
||||||
doTestImpactSerialization(new int[] { 1 }, new long[] { 42L });
|
doTestImpactSerialization(Collections.singletonList(new Impact(1, 42L)));
|
||||||
// omit freqs with very large norms
|
// omit freqs with very large norms
|
||||||
doTestImpactSerialization(new int[] { 1 }, new long[] { -100L });
|
doTestImpactSerialization(Collections.singletonList(new Impact(1, -100L)));
|
||||||
|
|
||||||
// omit norms
|
// omit norms
|
||||||
doTestImpactSerialization(new int[] { 30 }, new long[] { 1L });
|
doTestImpactSerialization(Collections.singletonList(new Impact(30, 1L)));
|
||||||
// omit norms with large freq
|
// omit norms with large freq
|
||||||
doTestImpactSerialization(new int[] { 500 }, new long[] { 1L });
|
doTestImpactSerialization(Collections.singletonList(new Impact(500, 1L)));
|
||||||
|
|
||||||
// freqs and norms, basic
|
// freqs and norms, basic
|
||||||
doTestImpactSerialization(
|
doTestImpactSerialization(
|
||||||
new int[] { 1, 3, 7, 15, 20, 28 },
|
Arrays.asList(
|
||||||
new long[] { 7L, 9L, 10L, 11L, 13L, 14L });
|
new Impact(1, 7L),
|
||||||
|
new Impact(3, 9L),
|
||||||
|
new Impact(7, 10L),
|
||||||
|
new Impact(15, 11L),
|
||||||
|
new Impact(20, 13L),
|
||||||
|
new Impact(28, 14L)));
|
||||||
|
|
||||||
// freqs and norms, high values
|
// freqs and norms, high values
|
||||||
doTestImpactSerialization(
|
doTestImpactSerialization(
|
||||||
new int[] { 2, 10, 12, 50, 1000, 1005 },
|
Arrays.asList(
|
||||||
new long[] { 2L, 10L, 50L, -100L, -80L, -3L });
|
new Impact(2, 2L),
|
||||||
|
new Impact(10, 10L),
|
||||||
|
new Impact(12, 50L),
|
||||||
|
new Impact(50, -100L),
|
||||||
|
new Impact(1000, -80L),
|
||||||
|
new Impact(1005, -3L)));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doTestImpactSerialization(int[] freqs, long[] norms) throws IOException {
|
private void doTestImpactSerialization(List<Impact> impacts) throws IOException {
|
||||||
CompetitiveFreqNormAccumulator acc = new CompetitiveFreqNormAccumulator();
|
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
|
||||||
for (int i = 0; i < freqs.length; ++i) {
|
for (Impact impact : impacts) {
|
||||||
acc.add(freqs[i], norms[i]);
|
acc.add(impact.freq, impact.norm);
|
||||||
}
|
}
|
||||||
try(Directory dir = newDirectory()) {
|
try(Directory dir = newDirectory()) {
|
||||||
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
|
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
|
||||||
|
@ -124,17 +138,8 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
|
||||||
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
|
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
|
||||||
byte[] b = new byte[Math.toIntExact(in.length())];
|
byte[] b = new byte[Math.toIntExact(in.length())];
|
||||||
in.readBytes(b, 0, b.length);
|
in.readBytes(b, 0, b.length);
|
||||||
Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new SimScorer("") {
|
List<Impact> impacts2 = Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new MutableImpactList());
|
||||||
int i = 0;
|
assertEquals(impacts, impacts2);
|
||||||
|
|
||||||
@Override
|
|
||||||
public float score(float freq, long norm) {
|
|
||||||
assert freq == freqs[i];
|
|
||||||
assert norm == norms[i];
|
|
||||||
i++;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,6 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
@ -680,7 +679,7 @@ public class TestCodecs extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,6 @@ import org.apache.lucene.search.ScoreMode;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
import org.apache.lucene.search.SimpleCollector;
|
import org.apache.lucene.search.SimpleCollector;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -1429,8 +1428,8 @@ public class MemoryIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1L));
|
return new SlowImpactsEnum(postings(null, flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,11 +21,10 @@ import java.io.IOException;
|
||||||
import org.apache.lucene.codecs.BlockTermState;
|
import org.apache.lucene.codecs.BlockTermState;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.ImpactsEnum;
|
import org.apache.lucene.index.ImpactsEnum;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.DataInput;
|
import org.apache.lucene.store.DataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
|
||||||
|
@ -90,7 +89,7 @@ final class IDVersionPostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException {
|
||||||
throw new UnsupportedOperationException("Should never be called, IDVersionSegmentTermsEnum implements impacts directly");
|
throw new UnsupportedOperationException("Should never be called, IDVersionSegmentTermsEnum implements impacts directly");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SlowImpactsEnum;
|
import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
@ -1009,10 +1008,10 @@ public final class IDVersionSegmentTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
// Only one posting, the slow impl is fine
|
// Only one posting, the slow impl is fine
|
||||||
// We could make this throw UOE but then CheckIndex is angry
|
// We could make this throw UOE but then CheckIndex is angry
|
||||||
return new SlowImpactsEnum(postings(null, flags), scorer.score(Float.MAX_VALUE, 1));
|
return new SlowImpactsEnum(postings(null, flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -45,7 +45,6 @@ import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.SlowImpactsEnum;
|
import org.apache.lucene.index.SlowImpactsEnum;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
|
@ -477,8 +476,8 @@ public final class RAMOnlyPostingsFormat extends PostingsFormat {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS), scorer.score(Float.MAX_VALUE, 1));
|
return new SlowImpactsEnum(postings(null, PostingsEnum.FREQS));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,12 +18,12 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||||
import org.apache.lucene.index.PointValues.Relation;
|
import org.apache.lucene.index.PointValues.Relation;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
@ -211,12 +211,12 @@ public class AssertingLeafReader extends FilterLeafReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
assertThread("Terms enums", creationThread);
|
assertThread("Terms enums", creationThread);
|
||||||
assert state == State.POSITIONED: "docs(...) called on unpositioned TermsEnum";
|
assert state == State.POSITIONED: "docs(...) called on unpositioned TermsEnum";
|
||||||
assert (flags & PostingsEnum.FREQS) != 0 : "Freqs should be requested on impacts";
|
assert (flags & PostingsEnum.FREQS) != 0 : "Freqs should be requested on impacts";
|
||||||
|
|
||||||
return new AssertingImpactsEnum(super.impacts(scorer, flags));
|
return new AssertingImpactsEnum(super.impacts(flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: we should separately track if we are 'at the end' ?
|
// TODO: we should separately track if we are 'at the end' ?
|
||||||
|
@ -454,7 +454,7 @@ public class AssertingLeafReader extends FilterLeafReader {
|
||||||
|
|
||||||
private final AssertingPostingsEnum assertingPostings;
|
private final AssertingPostingsEnum assertingPostings;
|
||||||
private final ImpactsEnum in;
|
private final ImpactsEnum in;
|
||||||
private int lastShallowTarget;
|
private int lastShallowTarget = -1;
|
||||||
|
|
||||||
AssertingImpactsEnum(ImpactsEnum impacts) {
|
AssertingImpactsEnum(ImpactsEnum impacts) {
|
||||||
in = impacts;
|
in = impacts;
|
||||||
|
@ -463,20 +463,19 @@ public class AssertingLeafReader extends FilterLeafReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advanceShallow(int target) throws IOException {
|
public void advanceShallow(int target) throws IOException {
|
||||||
assert target >= lastShallowTarget : "called on decreasing targets: target = " + target + " < last target = " + lastShallowTarget;
|
assert target >= lastShallowTarget : "called on decreasing targets: target = " + target + " < last target = " + lastShallowTarget;
|
||||||
assert target >= docID() : "target = " + target + " < docID = " + docID();
|
assert target >= docID() : "target = " + target + " < docID = " + docID();
|
||||||
int upTo = in.advanceShallow(target);
|
|
||||||
assert upTo >= target : "upTo = " + upTo + " < target = " + target;
|
|
||||||
lastShallowTarget = target;
|
lastShallowTarget = target;
|
||||||
return upTo;
|
in.advanceShallow(target);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
public Impacts getImpacts() throws IOException {
|
||||||
assert upTo >= lastShallowTarget : "uTo = " + upTo + " < last shallow target = " + lastShallowTarget;
|
assert docID() >= 0 || lastShallowTarget >= 0 : "Cannot get impacts until the iterator is positioned or advanceShallow has been called";
|
||||||
float maxScore = in.getMaxScore(upTo);
|
Impacts impacts = in.getImpacts();
|
||||||
return maxScore;
|
CheckIndex.checkImpacts(impacts, Math.max(docID(), lastShallowTarget));
|
||||||
|
return new AssertingImpacts(impacts, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -527,6 +526,38 @@ public class AssertingLeafReader extends FilterLeafReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static class AssertingImpacts extends Impacts {
|
||||||
|
|
||||||
|
private final Impacts in;
|
||||||
|
private final AssertingImpactsEnum impactsEnum;
|
||||||
|
private final int validFor;
|
||||||
|
|
||||||
|
AssertingImpacts(Impacts in, AssertingImpactsEnum impactsEnum) {
|
||||||
|
this.in = in;
|
||||||
|
this.impactsEnum = impactsEnum;
|
||||||
|
validFor = Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int numLevels() {
|
||||||
|
assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget) : "Cannot reuse impacts after advancing the iterator";
|
||||||
|
return in.numLevels();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getDocIdUpTo(int level) {
|
||||||
|
assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget) : "Cannot reuse impacts after advancing the iterator";
|
||||||
|
return in.getDocIdUpTo(level);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Impact> getImpacts(int level) {
|
||||||
|
assert validFor == Math.max(impactsEnum.docID(), impactsEnum.lastShallowTarget) : "Cannot reuse impacts after advancing the iterator";
|
||||||
|
return in.getImpacts(level);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/** Wraps a NumericDocValues but with additional asserts */
|
/** Wraps a NumericDocValues but with additional asserts */
|
||||||
public static class AssertingNumericDocValues extends NumericDocValues {
|
public static class AssertingNumericDocValues extends NumericDocValues {
|
||||||
private final Thread creationThread = Thread.currentThread();
|
private final Thread creationThread = Thread.currentThread();
|
||||||
|
|
|
@ -16,11 +16,18 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
import static org.junit.Assert.assertNull;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
@ -33,13 +40,13 @@ import java.util.Set;
|
||||||
import java.util.SortedMap;
|
import java.util.SortedMap;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.function.IntToLongFunction;
|
import java.util.function.IntToLongFunction;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.FieldsConsumer;
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.codecs.FieldsProducer;
|
import org.apache.lucene.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.codecs.NormsProducer;
|
import org.apache.lucene.codecs.NormsProducer;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FlushInfo;
|
import org.apache.lucene.store.FlushInfo;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
|
@ -55,12 +62,6 @@ import org.apache.lucene.util.automaton.AutomatonTestUtil;
|
||||||
import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
|
import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
|
||||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
|
||||||
import static org.junit.Assert.assertFalse;
|
|
||||||
import static org.junit.Assert.assertNotNull;
|
|
||||||
import static org.junit.Assert.assertNull;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
|
||||||
|
|
||||||
/** Helper class extracted from BasePostingsFormatTestCase to exercise a postings format. */
|
/** Helper class extracted from BasePostingsFormatTestCase to exercise a postings format. */
|
||||||
public class RandomPostingsTester {
|
public class RandomPostingsTester {
|
||||||
|
|
||||||
|
@ -608,7 +609,7 @@ public class RandomPostingsTester {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1055,126 +1056,146 @@ public class RandomPostingsTester {
|
||||||
} else {
|
} else {
|
||||||
docToNorm = doc -> 1L;
|
docToNorm = doc -> 1L;
|
||||||
}
|
}
|
||||||
for (int s = 0; s < 3; ++s) {
|
|
||||||
final int scoreMode = s;
|
|
||||||
SimScorer scorer = new SimScorer(field) {
|
|
||||||
@Override
|
|
||||||
public float score(float freq, long norm) {
|
|
||||||
switch (scoreMode) {
|
|
||||||
case 0:
|
|
||||||
return freq; // make sure the postings record the best freq
|
|
||||||
case 1:
|
|
||||||
return 1f / norm; // make sure the postings record the best norm
|
|
||||||
default:
|
|
||||||
return freq - norm + MAX_NORM; // now a combination that could make intermediate pairs more competitive
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// First check max scores and block uptos
|
// First check impacts and block uptos
|
||||||
int max = -1;
|
int max = -1;
|
||||||
float maxScore = 0;
|
List<Impact> impactsCopy = null;
|
||||||
int flags = PostingsEnum.FREQS;
|
int flags = PostingsEnum.FREQS;
|
||||||
if (doCheckPositions) {
|
if (doCheckPositions) {
|
||||||
flags |= PostingsEnum.POSITIONS;
|
flags |= PostingsEnum.POSITIONS;
|
||||||
|
if (doCheckOffsets) {
|
||||||
|
flags |= PostingsEnum.OFFSETS;
|
||||||
|
}
|
||||||
|
if (doCheckPayloads) {
|
||||||
|
flags |= PostingsEnum.PAYLOADS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ImpactsEnum impactsEnum = termsEnum.impacts(flags);
|
||||||
|
PostingsEnum postings = termsEnum.postings(null, flags);
|
||||||
|
for (int doc = impactsEnum.nextDoc(); ; doc = impactsEnum.nextDoc()) {
|
||||||
|
assertEquals(postings.nextDoc(), doc);
|
||||||
|
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
int freq = postings.freq();
|
||||||
|
assertEquals("freq is wrong", freq, impactsEnum.freq());
|
||||||
|
for (int i = 0; i < freq; ++i) {
|
||||||
|
int pos = postings.nextPosition();
|
||||||
|
assertEquals("position is wrong", pos, impactsEnum.nextPosition());
|
||||||
if (doCheckOffsets) {
|
if (doCheckOffsets) {
|
||||||
flags |= PostingsEnum.OFFSETS;
|
assertEquals("startOffset is wrong", postings.startOffset(), impactsEnum.startOffset());
|
||||||
|
assertEquals("endOffset is wrong", postings.endOffset(), impactsEnum.endOffset());
|
||||||
}
|
}
|
||||||
if (doCheckPayloads) {
|
if (doCheckPayloads) {
|
||||||
flags |= PostingsEnum.PAYLOADS;
|
assertEquals("payload is wrong", postings.getPayload(), impactsEnum.getPayload());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (doc > max) {
|
||||||
|
impactsEnum.advanceShallow(doc);
|
||||||
|
Impacts impacts = impactsEnum.getImpacts();
|
||||||
|
CheckIndex.checkImpacts(impacts, doc);
|
||||||
|
impactsCopy = impacts.getImpacts(0)
|
||||||
|
.stream()
|
||||||
|
.map(i -> new Impact(i.freq, i.norm))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
freq = impactsEnum.freq();
|
||||||
|
long norm = docToNorm.applyAsLong(doc);
|
||||||
|
int idx = Collections.binarySearch(impactsCopy, new Impact(freq, norm), Comparator.comparing(i -> i.freq));
|
||||||
|
if (idx < 0) {
|
||||||
|
idx = -1 - idx;
|
||||||
|
}
|
||||||
|
assertTrue("Got " + new Impact(freq, norm) + " in postings, but no impact triggers equal or better scores in " + impactsCopy,
|
||||||
|
idx <= impactsCopy.size() && impactsCopy.get(idx).norm <= norm);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now check advancing
|
||||||
|
impactsEnum = termsEnum.impacts(flags);
|
||||||
|
postings = termsEnum.postings(postings, flags);
|
||||||
|
|
||||||
|
max = -1;
|
||||||
|
while (true) {
|
||||||
|
int doc = impactsEnum.docID();
|
||||||
|
boolean advance;
|
||||||
|
int target;
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
advance = false;
|
||||||
|
target = doc + 1;
|
||||||
|
} else {
|
||||||
|
advance = true;
|
||||||
|
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
|
||||||
|
target = impactsEnum.docID() + delta;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (target > max && random.nextBoolean()) {
|
||||||
|
int delta = Math.min(random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - target);
|
||||||
|
max = target + delta;
|
||||||
|
|
||||||
|
impactsEnum.advanceShallow(target);
|
||||||
|
Impacts impacts = impactsEnum.getImpacts();
|
||||||
|
CheckIndex.checkImpacts(impacts, target);
|
||||||
|
impactsCopy = Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
|
||||||
|
for (int level = 0; level < impacts.numLevels(); ++level) {
|
||||||
|
if (impacts.getDocIdUpTo(level) >= max) {
|
||||||
|
impactsCopy = impacts.getImpacts(level)
|
||||||
|
.stream()
|
||||||
|
.map(i -> new Impact(i.freq, i.norm))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ImpactsEnum impacts = termsEnum.impacts(scorer, flags);
|
if (advance) {
|
||||||
PostingsEnum postings = termsEnum.postings(null, flags);
|
doc = impactsEnum.advance(target);
|
||||||
for (int doc = impacts.nextDoc(); ; doc = impacts.nextDoc()) {
|
} else {
|
||||||
assertEquals(postings.nextDoc(), doc);
|
doc = impactsEnum.nextDoc();
|
||||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
int freq = postings.freq();
|
|
||||||
assertEquals("freq is wrong", freq, impacts.freq());
|
|
||||||
for (int i = 0; i < freq; ++i) {
|
|
||||||
int pos = postings.nextPosition();
|
|
||||||
assertEquals("position is wrong", pos, impacts.nextPosition());
|
|
||||||
if (doCheckOffsets) {
|
|
||||||
assertEquals("startOffset is wrong", postings.startOffset(), impacts.startOffset());
|
|
||||||
assertEquals("endOffset is wrong", postings.endOffset(), impacts.endOffset());
|
|
||||||
}
|
|
||||||
if (doCheckPayloads) {
|
|
||||||
assertEquals("payload is wrong", postings.getPayload(), impacts.getPayload());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (doc > max) {
|
|
||||||
max = impacts.advanceShallow(doc);
|
|
||||||
assertTrue(max >= doc);
|
|
||||||
maxScore = impacts.getMaxScore(max);
|
|
||||||
}
|
|
||||||
assertEquals(max, impacts.advanceShallow(doc));
|
|
||||||
assertTrue(scorer.score(impacts.freq(), docToNorm.applyAsLong(doc)) <= maxScore);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now check advancing
|
assertEquals(postings.advance(target), doc);
|
||||||
impacts = termsEnum.impacts(scorer, flags);
|
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
postings = termsEnum.postings(postings, flags);
|
break;
|
||||||
|
|
||||||
max = -1;
|
|
||||||
while (true) {
|
|
||||||
int doc = impacts.docID();
|
|
||||||
boolean advance;
|
|
||||||
int target;
|
|
||||||
if (random.nextBoolean()) {
|
|
||||||
advance = false;
|
|
||||||
target = doc + 1;
|
|
||||||
} else {
|
|
||||||
advance = true;
|
|
||||||
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
|
|
||||||
target = impacts.docID() + delta;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (target > max && random.nextBoolean()) {
|
|
||||||
int delta = Math.min(random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - target);
|
|
||||||
max = target + delta;
|
|
||||||
int m = impacts.advanceShallow(target);
|
|
||||||
assertTrue(m >= target);
|
|
||||||
maxScore = impacts.getMaxScore(max);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (advance) {
|
|
||||||
doc = impacts.advance(target);
|
|
||||||
} else {
|
|
||||||
doc = impacts.nextDoc();
|
|
||||||
}
|
|
||||||
|
|
||||||
assertEquals(postings.advance(target), doc);
|
|
||||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
int freq = postings.freq();
|
|
||||||
assertEquals("freq is wrong", freq, impacts.freq());
|
|
||||||
for (int i = 0; i < postings.freq(); ++i) {
|
|
||||||
int pos = postings.nextPosition();
|
|
||||||
assertEquals("position is wrong", pos, impacts.nextPosition());
|
|
||||||
if (doCheckOffsets) {
|
|
||||||
assertEquals("startOffset is wrong", postings.startOffset(), impacts.startOffset());
|
|
||||||
assertEquals("endOffset is wrong", postings.endOffset(), impacts.endOffset());
|
|
||||||
}
|
|
||||||
if (doCheckPayloads) {
|
|
||||||
assertEquals("payload is wrong", postings.getPayload(), impacts.getPayload());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (doc > max) {
|
|
||||||
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
|
|
||||||
max = doc + delta;
|
|
||||||
int m = impacts.advanceShallow(doc);
|
|
||||||
assertTrue(m >= doc);
|
|
||||||
maxScore = impacts.getMaxScore(max);
|
|
||||||
}
|
|
||||||
|
|
||||||
float score = scorer.score(impacts.freq(), docToNorm.applyAsLong(doc));
|
|
||||||
assertTrue(score <= maxScore);
|
|
||||||
}
|
}
|
||||||
|
int freq = postings.freq();
|
||||||
|
assertEquals("freq is wrong", freq, impactsEnum.freq());
|
||||||
|
for (int i = 0; i < postings.freq(); ++i) {
|
||||||
|
int pos = postings.nextPosition();
|
||||||
|
assertEquals("position is wrong", pos, impactsEnum.nextPosition());
|
||||||
|
if (doCheckOffsets) {
|
||||||
|
assertEquals("startOffset is wrong", postings.startOffset(), impactsEnum.startOffset());
|
||||||
|
assertEquals("endOffset is wrong", postings.endOffset(), impactsEnum.endOffset());
|
||||||
|
}
|
||||||
|
if (doCheckPayloads) {
|
||||||
|
assertEquals("payload is wrong", postings.getPayload(), impactsEnum.getPayload());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (doc > max) {
|
||||||
|
int delta = Math.min(1 + random.nextInt(512), DocIdSetIterator.NO_MORE_DOCS - doc);
|
||||||
|
max = doc + delta;
|
||||||
|
Impacts impacts = impactsEnum.getImpacts();
|
||||||
|
CheckIndex.checkImpacts(impacts, doc);
|
||||||
|
impactsCopy = Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
|
||||||
|
for (int level = 0; level < impacts.numLevels(); ++level) {
|
||||||
|
if (impacts.getDocIdUpTo(level) >= max) {
|
||||||
|
impactsCopy = impacts.getImpacts(level)
|
||||||
|
.stream()
|
||||||
|
.map(i -> new Impact(i.freq, i.norm))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
freq = impactsEnum.freq();
|
||||||
|
long norm = docToNorm.applyAsLong(doc);
|
||||||
|
int idx = Collections.binarySearch(impactsCopy, new Impact(freq, norm), Comparator.comparing(i -> i.freq));
|
||||||
|
if (idx < 0) {
|
||||||
|
idx = -1 - idx;
|
||||||
|
}
|
||||||
|
assertTrue("Got " + new Impact(freq, norm) + " in postings, but no impact triggers equal or better scores in " + impactsCopy,
|
||||||
|
idx <= impactsCopy.size() && impactsCopy.get(idx).norm <= norm);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,6 +88,7 @@ public class AssertingScorer extends Scorer {
|
||||||
@Override
|
@Override
|
||||||
public float getMaxScore(int upTo) throws IOException {
|
public float getMaxScore(int upTo) throws IOException {
|
||||||
assert upTo >= lastShallowTarget : "uTo = " + upTo + " < last target = " + lastShallowTarget;
|
assert upTo >= lastShallowTarget : "uTo = " + upTo + " < last target = " + lastShallowTarget;
|
||||||
|
assert docID() >= 0 || lastShallowTarget >= 0 : "Cannot get max scores until the iterator is positioned or advanceShallow has been called";
|
||||||
float maxScore = in.getMaxScore(upTo);
|
float maxScore = in.getMaxScore(upTo);
|
||||||
return maxScore;
|
return maxScore;
|
||||||
}
|
}
|
||||||
|
|
|
@ -632,7 +632,7 @@ public class CheckHits {
|
||||||
Assert.assertTrue(twoPhase1 == null || twoPhase1.matches());
|
Assert.assertTrue(twoPhase1 == null || twoPhase1.matches());
|
||||||
float score = s2.score();
|
float score = s2.score();
|
||||||
Assert.assertEquals(s1.score(), score);
|
Assert.assertEquals(s1.score(), score);
|
||||||
Assert.assertTrue(score <= maxScore);
|
Assert.assertTrue(score + " > " + maxScore + " up to " + upTo, score <= maxScore);
|
||||||
|
|
||||||
if (score >= minScore && random.nextInt(10) == 0) {
|
if (score >= minScore && random.nextInt(10) == 0) {
|
||||||
// On some scorers, changing the min score changes the way that docs are iterated
|
// On some scorers, changing the min score changes the way that docs are iterated
|
||||||
|
|
|
@ -43,7 +43,6 @@ import org.apache.lucene.search.ScoreMode;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.DocIdSetBuilder;
|
import org.apache.lucene.util.DocIdSetBuilder;
|
||||||
|
@ -241,8 +240,8 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return te.impacts(scorer, flags);
|
return te.impacts(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -35,7 +35,6 @@ import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -609,8 +608,8 @@ public class DocTermOrds implements Accountable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ImpactsEnum impacts(SimScorer scorer, int flags) throws IOException {
|
public ImpactsEnum impacts(int flags) throws IOException {
|
||||||
return termsEnum.impacts(scorer, flags);
|
return termsEnum.impacts(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue