mirror of https://github.com/apache/lucene.git
LUCENE-3555: add support for distributed stats
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1197455 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5f8d4fc8af
commit
b19a207c86
|
@ -0,0 +1,72 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.IndexReader; // javadocs
|
||||
import org.apache.lucene.index.Terms; // javadocs
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Contains statistics for a collection (field)
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class CollectionStatistics {
|
||||
private final String field;
|
||||
private final int maxDoc;
|
||||
private final int docCount;
|
||||
private final long sumTotalTermFreq;
|
||||
private final long sumDocFreq;
|
||||
|
||||
public CollectionStatistics(String field, int maxDoc, int docCount, long sumTotalTermFreq, long sumDocFreq) {
|
||||
this.field = field;
|
||||
this.maxDoc = maxDoc;
|
||||
this.docCount = docCount;
|
||||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
}
|
||||
|
||||
/** returns the field name */
|
||||
public String field() {
|
||||
return field;
|
||||
}
|
||||
|
||||
/** returns the total number of documents, regardless of
|
||||
* whether they all contain values for this field.
|
||||
* @see IndexReader#maxDoc() */
|
||||
public int maxDoc() {
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
/** returns the total number of documents that
|
||||
* have at least one term for this field.
|
||||
* @see Terms#getDocCount() */
|
||||
public int docCount() {
|
||||
return docCount;
|
||||
}
|
||||
|
||||
/** returns the total number of tokens for this field
|
||||
* @see Terms#getSumTotalTermFreq() */
|
||||
public long sumTotalTermFreq() {
|
||||
return sumTotalTermFreq;
|
||||
}
|
||||
|
||||
/** returns the total number of postings for this field
|
||||
* @see Terms#getSumDocFreq() */
|
||||
public long sumDocFreq() {
|
||||
return sumDocFreq;
|
||||
}
|
||||
}
|
|
@ -35,14 +35,18 @@ import org.apache.lucene.index.CorruptIndexException;
|
|||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
|
||||
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.NIOFSDirectory; // javadoc
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
|
||||
/** Implements search over a single IndexReader.
|
||||
|
@ -860,4 +864,34 @@ public class IndexSearcher implements Closeable {
|
|||
public String toString() {
|
||||
return "IndexSearcher(" + reader + "; executor=" + executor + ")";
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@link TermStatistics} for a term
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public TermStatistics termStatistics(Term term, TermContext context) throws IOException {
|
||||
return new TermStatistics(term.bytes(), context.docFreq(), context.totalTermFreq());
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns {@link CollectionStatistics} for a field
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public CollectionStatistics collectionStatistics(String field) throws IOException {
|
||||
final int docCount;
|
||||
final long sumTotalTermFreq;
|
||||
final long sumDocFreq;
|
||||
|
||||
Terms terms = MultiFields.getTerms(reader, field);
|
||||
if (terms == null) {
|
||||
docCount = 0;
|
||||
sumTotalTermFreq = 0;
|
||||
sumDocFreq = 0;
|
||||
} else {
|
||||
docCount = terms.getDocCount();
|
||||
sumTotalTermFreq = terms.getSumTotalTermFreq();
|
||||
sumDocFreq = terms.getSumDocFreq();
|
||||
}
|
||||
return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -141,13 +141,15 @@ public class MultiPhraseQuery extends Query {
|
|||
final ReaderContext context = searcher.getTopReaderContext();
|
||||
|
||||
// compute idf
|
||||
ArrayList<TermContext> allTerms = new ArrayList<TermContext>();
|
||||
ArrayList<TermStatistics> allTermStats = new ArrayList<TermStatistics>();
|
||||
for(final Term[] terms: termArrays) {
|
||||
for (Term term: terms) {
|
||||
allTerms.add(TermContext.build(context, term, true));
|
||||
TermContext termContext = TermContext.build(context, term, true);
|
||||
allTermStats.add(searcher.termStatistics(term, termContext));
|
||||
}
|
||||
}
|
||||
stats = similarity.computeStats(searcher, field, getBoost(), allTerms.toArray(new TermContext[allTerms.size()]));
|
||||
stats = similarity.computeStats(searcher.collectionStatistics(field),
|
||||
getBoost(), allTermStats.toArray(new TermStatistics[allTermStats.size()]));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -190,9 +190,13 @@ public class PhraseQuery extends Query {
|
|||
this.similarity = searcher.getSimilarityProvider().get(field);
|
||||
final ReaderContext context = searcher.getTopReaderContext();
|
||||
states = new TermContext[terms.size()];
|
||||
for (int i = 0; i < terms.size(); i++)
|
||||
states[i] = TermContext.build(context, terms.get(i), true);
|
||||
stats = similarity.computeStats(searcher, field, getBoost(), states);
|
||||
TermStatistics termStats[] = new TermStatistics[terms.size()];
|
||||
for (int i = 0; i < terms.size(); i++) {
|
||||
final Term term = terms.get(i);
|
||||
states[i] = TermContext.build(context, term, true);
|
||||
termStats[i] = searcher.termStatistics(term, states[i]);
|
||||
}
|
||||
stats = similarity.computeStats(searcher.collectionStatistics(field), getBoost(), termStats);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -54,7 +54,10 @@ public class TermQuery extends Query {
|
|||
assert termStates != null : "TermContext must not be null";
|
||||
this.termStates = termStates;
|
||||
this.similarity = searcher.getSimilarityProvider().get(term.field());
|
||||
this.stats = similarity.computeStats(searcher, term.field(), getBoost(), termStates);
|
||||
this.stats = similarity.computeStats(
|
||||
searcher.collectionStatistics(term.field()),
|
||||
getBoost(),
|
||||
searcher.termStatistics(term, termStates));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader; // javadocs
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
/**
|
||||
* Contains statistics for a specific term
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class TermStatistics {
|
||||
private final BytesRef term;
|
||||
private final int docFreq;
|
||||
private final long totalTermFreq;
|
||||
|
||||
public TermStatistics(BytesRef term, int docFreq, long totalTermFreq) {
|
||||
this.term = term;
|
||||
this.docFreq = docFreq;
|
||||
this.totalTermFreq = totalTermFreq;
|
||||
}
|
||||
|
||||
/** returns the term text */
|
||||
public BytesRef term() {
|
||||
return term;
|
||||
}
|
||||
|
||||
/** returns the number of documents this term occurs in
|
||||
* @see IndexReader#docFreq(String, BytesRef) */
|
||||
public int docFreq() {
|
||||
return docFreq;
|
||||
}
|
||||
|
||||
/** returns the total number of occurrences of this term
|
||||
* @see IndexReader#totalTermFreq(String, BytesRef) */
|
||||
public long totalTermFreq() {
|
||||
return totalTermFreq;
|
||||
}
|
||||
}
|
|
@ -20,14 +20,12 @@ package org.apache.lucene.search.similarities;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
/**
|
||||
* BM25 Similarity. Introduced in Stephen E. Robertson, Steve Walker,
|
||||
|
@ -75,15 +73,13 @@ public class BM25Similarity extends Similarity {
|
|||
/** The default implementation computes the average as <code>sumTotalTermFreq / maxDoc</code>,
|
||||
* or returns <code>1</code> if the index does not store sumTotalTermFreq (Lucene 3.x indexes
|
||||
* or any field that omits frequency information). */
|
||||
protected float avgFieldLength(IndexSearcher searcher, String field) throws IOException {
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), field);
|
||||
if (terms == null) {
|
||||
// field does not exist;
|
||||
return 1f;
|
||||
protected float avgFieldLength(CollectionStatistics collectionStats) {
|
||||
final long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
|
||||
if (sumTotalTermFreq <= 0) {
|
||||
return 1f; // field does not exist, or stat is unsupported
|
||||
} else {
|
||||
return (float) (sumTotalTermFreq / (double) collectionStats.maxDoc());
|
||||
}
|
||||
long sumTotalTermFreq = terms.getSumTotalTermFreq();
|
||||
long maxdoc = searcher.maxDoc();
|
||||
return sumTotalTermFreq == -1 ? 1f : (float) (sumTotalTermFreq / (double) maxdoc);
|
||||
}
|
||||
|
||||
/** The default implementation encodes <code>boost / sqrt(length)</code>
|
||||
|
@ -131,19 +127,19 @@ public class BM25Similarity extends Similarity {
|
|||
return encodeNormValue(state.getBoost(), numTerms);
|
||||
}
|
||||
|
||||
public Explanation idfExplain(TermContext stats, final IndexSearcher searcher) throws IOException {
|
||||
final int df = stats.docFreq();
|
||||
final int max = searcher.maxDoc();
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||
final int df = termStats.docFreq();
|
||||
final int max = collectionStats.maxDoc();
|
||||
final float idf = idf(df, max);
|
||||
return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
|
||||
}
|
||||
|
||||
public Explanation idfExplain(final TermContext stats[], IndexSearcher searcher) throws IOException {
|
||||
final int max = searcher.maxDoc();
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
|
||||
final int max = collectionStats.maxDoc();
|
||||
float idf = 0.0f;
|
||||
final Explanation exp = new Explanation();
|
||||
exp.setDescription("idf(), sum of:");
|
||||
for (final TermContext stat : stats ) {
|
||||
for (final TermStatistics stat : termStats ) {
|
||||
final int df = stat.docFreq();
|
||||
final float termIdf = idf(df, max);
|
||||
exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
|
||||
|
@ -154,10 +150,10 @@ public class BM25Similarity extends Similarity {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final Stats computeStats(IndexSearcher searcher, String fieldName, float queryBoost, TermContext... termStats) throws IOException {
|
||||
Explanation idf = termStats.length == 1 ? idfExplain(termStats[0], searcher) : idfExplain(termStats, searcher);
|
||||
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
|
||||
|
||||
float avgdl = avgFieldLength(searcher, fieldName);
|
||||
float avgdl = avgFieldLength(collectionStats);
|
||||
|
||||
// compute freq-independent part of bm25 equation across all norm values
|
||||
float cache[] = new float[256];
|
||||
|
|
|
@ -17,11 +17,9 @@ package org.apache.lucene.search.similarities;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
|
||||
/**
|
||||
* Abstract superclass for language modeling Similarities. The following inner
|
||||
|
@ -62,8 +60,8 @@ public abstract class LMSimilarity extends SimilarityBase {
|
|||
* usual statistics.
|
||||
*/
|
||||
@Override
|
||||
protected void fillBasicStats(BasicStats stats, IndexSearcher searcher, String fieldName, TermContext termContext) throws IOException {
|
||||
super.fillBasicStats(stats, searcher, fieldName, termContext);
|
||||
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||
super.fillBasicStats(stats, collectionStats, termStats);
|
||||
LMStats lmStats = (LMStats) stats;
|
||||
lmStats.setCollectionProbability(collectionModel.computeProbability(stats));
|
||||
}
|
||||
|
|
|
@ -21,10 +21,10 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
/**
|
||||
* Implements the CombSUM method for combining evidence from multiple
|
||||
|
@ -45,10 +45,10 @@ public class MultiSimilarity extends Similarity {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Stats computeStats(IndexSearcher searcher, String fieldName, float queryBoost, TermContext... termContexts) throws IOException {
|
||||
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
Stats subStats[] = new Stats[sims.length];
|
||||
for (int i = 0; i < subStats.length; i++) {
|
||||
subStats[i] = sims[i].computeStats(searcher, fieldName, queryBoost, termContexts);
|
||||
subStats[i] = sims[i].computeStats(collectionStats, queryBoost, termStats);
|
||||
}
|
||||
return new MultiStats(subStats);
|
||||
}
|
||||
|
|
|
@ -26,11 +26,13 @@ import org.apache.lucene.index.IndexReader; // javadoc
|
|||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Terms; // javadoc
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.spans.SpanQuery; // javadoc
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.SmallFloat; // javadoc
|
||||
|
@ -81,10 +83,10 @@ import org.apache.lucene.util.TermContext;
|
|||
* <a name="querytime"/>
|
||||
* At query-time, Queries interact with the Similarity via these steps:
|
||||
* <ol>
|
||||
* <li>The {@link #computeStats(IndexSearcher, String, float, TermContext...)} method is called a single time,
|
||||
* <li>The {@link #computeStats(CollectionStatistics, float, TermStatistics...)} method is called a single time,
|
||||
* allowing the implementation to compute any statistics (such as IDF, average document length, etc)
|
||||
* across <i>the entire collection</i>. The {@link TermContext}s passed in are already positioned
|
||||
* to the terms involved with the raw statistics involved, so a Similarity can freely use any combination
|
||||
* across <i>the entire collection</i>. The {@link TermStatistics} passed in already contain
|
||||
* the raw statistics involved, so a Similarity can freely use any combination
|
||||
* of term statistics without causing any additional I/O. Lucene makes no assumption about what is
|
||||
* stored in the returned {@link Similarity.Stats} object.
|
||||
* <li>The query normalization process occurs a single time: {@link Similarity.Stats#getValueForNormalization()}
|
||||
|
@ -128,7 +130,7 @@ public abstract class Similarity {
|
|||
/**
|
||||
* Compute any collection-level stats (e.g. IDF, average document length, etc) needed for scoring a query.
|
||||
*/
|
||||
public abstract Stats computeStats(IndexSearcher searcher, String fieldName, float queryBoost, TermContext... termContexts) throws IOException;
|
||||
public abstract Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats);
|
||||
|
||||
/**
|
||||
* returns a new {@link Similarity.ExactDocScorer}.
|
||||
|
|
|
@ -20,15 +20,12 @@ package org.apache.lucene.search.similarities;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
/**
|
||||
* A subclass of {@code Similarity} that provides a simplified API for its
|
||||
|
@ -71,12 +68,11 @@ public abstract class SimilarityBase extends Similarity {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final Stats computeStats(IndexSearcher searcher, String fieldName,
|
||||
float queryBoost, TermContext... termContexts) throws IOException {
|
||||
BasicStats stats[] = new BasicStats[termContexts.length];
|
||||
for (int i = 0; i < termContexts.length; i++) {
|
||||
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
BasicStats stats[] = new BasicStats[termStats.length];
|
||||
for (int i = 0; i < termStats.length; i++) {
|
||||
stats[i] = newStats(queryBoost);
|
||||
fillBasicStats(stats[i], searcher, fieldName, termContexts[i]);
|
||||
fillBasicStats(stats[i], collectionStats, termStats[i]);
|
||||
}
|
||||
return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
|
||||
}
|
||||
|
@ -88,13 +84,11 @@ public abstract class SimilarityBase extends Similarity {
|
|||
|
||||
/** Fills all member fields defined in {@code BasicStats} in {@code stats}.
|
||||
* Subclasses can override this method to fill additional stats. */
|
||||
protected void fillBasicStats(BasicStats stats, IndexSearcher searcher,
|
||||
String fieldName, TermContext termContext) throws IOException {
|
||||
IndexReader reader = searcher.getIndexReader();
|
||||
int numberOfDocuments = reader.maxDoc();
|
||||
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||
int numberOfDocuments = collectionStats.maxDoc();
|
||||
|
||||
int docFreq = termContext.docFreq();
|
||||
long totalTermFreq = termContext.totalTermFreq();
|
||||
int docFreq = termStats.docFreq();
|
||||
long totalTermFreq = termStats.totalTermFreq();
|
||||
|
||||
// codec does not supply totalTermFreq: substitute docFreq
|
||||
if (totalTermFreq == -1) {
|
||||
|
@ -103,25 +97,19 @@ public abstract class SimilarityBase extends Similarity {
|
|||
|
||||
final long numberOfFieldTokens;
|
||||
final float avgFieldLength;
|
||||
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), fieldName);
|
||||
if (terms == null) {
|
||||
// field does not exist;
|
||||
numberOfFieldTokens = 0;
|
||||
avgFieldLength = 1;
|
||||
} else {
|
||||
long sumTotalTermFreq = terms.getSumTotalTermFreq();
|
||||
|
||||
long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
|
||||
|
||||
if (sumTotalTermFreq <= 0) {
|
||||
// field does not exist;
|
||||
// We have to provide something if codec doesnt supply these measures,
|
||||
// or if someone omitted frequencies for the field... negative values cause
|
||||
// NaN/Inf for some scorers.
|
||||
if (sumTotalTermFreq == -1) {
|
||||
numberOfFieldTokens = docFreq;
|
||||
avgFieldLength = 1;
|
||||
} else {
|
||||
numberOfFieldTokens = sumTotalTermFreq;
|
||||
avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
|
||||
}
|
||||
numberOfFieldTokens = docFreq;
|
||||
avgFieldLength = 1;
|
||||
} else {
|
||||
numberOfFieldTokens = sumTotalTermFreq;
|
||||
avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
|
||||
}
|
||||
|
||||
// TODO: add sumDocFreq for field (numberOfFieldPostings)
|
||||
|
|
|
@ -22,9 +22,11 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
|
@ -575,15 +577,15 @@ public abstract class TFIDFSimilarity extends Similarity {
|
|||
* is inaccurate, so is {@link IndexSearcher#maxDoc()}, and in the same direction.
|
||||
* In addition, {@link IndexSearcher#maxDoc()} is more efficient to compute
|
||||
*
|
||||
* @param stats statistics of the term in question
|
||||
* @param searcher the document collection being searched
|
||||
* @param collectionStats collection-level statistics
|
||||
* @param termStats term-level statistics for the term
|
||||
* @return an Explain object that includes both an idf score factor
|
||||
and an explanation for the term.
|
||||
* @throws IOException
|
||||
*/
|
||||
public Explanation idfExplain(TermContext stats, final IndexSearcher searcher) throws IOException {
|
||||
final int df = stats.docFreq();
|
||||
final int max = searcher.maxDoc();
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||
final int df = termStats.docFreq();
|
||||
final int max = collectionStats.maxDoc();
|
||||
final float idf = idf(df, max);
|
||||
return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
|
||||
}
|
||||
|
@ -595,19 +597,19 @@ public abstract class TFIDFSimilarity extends Similarity {
|
|||
* The default implementation sums the idf factor for
|
||||
* each term in the phrase.
|
||||
*
|
||||
* @param stats statistics of the terms in the phrase
|
||||
* @param searcher the document collection being searched
|
||||
* @param collectionStats collection-level statistics
|
||||
* @param termStats term-level statistics for the terms in the phrase
|
||||
* @return an Explain object that includes both an idf
|
||||
* score factor for the phrase and an explanation
|
||||
* for each term.
|
||||
* @throws IOException
|
||||
*/
|
||||
public Explanation idfExplain(final TermContext stats[], IndexSearcher searcher) throws IOException {
|
||||
final int max = searcher.maxDoc();
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
|
||||
final int max = collectionStats.maxDoc();
|
||||
float idf = 0.0f;
|
||||
final Explanation exp = new Explanation();
|
||||
exp.setDescription("idf(), sum of:");
|
||||
for (final TermContext stat : stats ) {
|
||||
for (final TermStatistics stat : termStats ) {
|
||||
final int df = stat.docFreq();
|
||||
final float termIdf = idf(df, max);
|
||||
exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
|
||||
|
@ -693,11 +695,10 @@ public abstract class TFIDFSimilarity extends Similarity {
|
|||
public abstract float scorePayload(int doc, int start, int end, BytesRef payload);
|
||||
|
||||
@Override
|
||||
public final Stats computeStats(IndexSearcher searcher, String fieldName, float queryBoost,
|
||||
TermContext... termContexts) throws IOException {
|
||||
final Explanation idf = termContexts.length == 1
|
||||
? idfExplain(termContexts[0], searcher)
|
||||
: idfExplain(termContexts, searcher);
|
||||
public final Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
final Explanation idf = termStats.length == 1
|
||||
? idfExplain(collectionStats, termStats[0])
|
||||
: idfExplain(collectionStats, termStats);
|
||||
return new IDFStats(idf, queryBoost);
|
||||
}
|
||||
|
||||
|
|
|
@ -48,10 +48,17 @@ public class SpanWeight extends Weight {
|
|||
query.extractTerms(terms);
|
||||
final ReaderContext context = searcher.getTopReaderContext();
|
||||
final TermContext states[] = new TermContext[terms.size()];
|
||||
final TermStatistics termStats[] = new TermStatistics[terms.size()];
|
||||
int i = 0;
|
||||
for (Term term : terms)
|
||||
states[i++] = TermContext.build(context, term, true);
|
||||
stats = similarity.computeStats(searcher, query.getField(), query.getBoost(), states);
|
||||
for (Term term : terms) {
|
||||
states[i] = TermContext.build(context, term, true);
|
||||
termStats[i] = searcher.termStatistics(term, states[i]);
|
||||
i++;
|
||||
}
|
||||
stats = similarity.computeStats(
|
||||
searcher.collectionStatistics(query.getField()),
|
||||
query.getBoost(),
|
||||
termStats);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -50,7 +49,7 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
@Override public float tf(float freq) { return freq; }
|
||||
@Override public float sloppyFreq(int distance) { return 2.0f; }
|
||||
@Override public float idf(int docFreq, int numDocs) { return 1.0f; }
|
||||
@Override public Explanation idfExplain(TermContext[] terms, IndexSearcher searcher) throws IOException {
|
||||
@Override public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) {
|
||||
return new Explanation(1.0f, "Inexplicable");
|
||||
}
|
||||
@Override public float scorePayload(int doc, int start, int end, BytesRef payload) { return 1.0f; }
|
||||
|
|
|
@ -22,12 +22,8 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.Stats;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
|
@ -250,7 +246,7 @@ final class JustCompileSearch {
|
|||
static final class JustCompileSimilarity extends Similarity {
|
||||
|
||||
@Override
|
||||
public Stats computeStats(IndexSearcher searcher, String fieldName, float queryBoost, TermContext... termContexts) throws IOException {
|
||||
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
|
|
@ -161,8 +161,8 @@ public class TestDocValuesScoring extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Stats computeStats(IndexSearcher searcher, String fieldName, float queryBoost, TermContext... termContexts) throws IOException {
|
||||
return sim.computeStats(searcher, fieldName, queryBoost, termContexts);
|
||||
public Stats computeStats(CollectionStatistics collectionStats, float queryBoost, TermStatistics... termStats) {
|
||||
return sim.computeStats(collectionStats, queryBoost, termStats);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -316,8 +316,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
return new DefaultSimilarity() {
|
||||
|
||||
@Override
|
||||
public Explanation idfExplain(TermContext stats[],
|
||||
IndexSearcher searcher) throws IOException {
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
|
||||
return new Explanation(10f, "just a test");
|
||||
}
|
||||
};
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -50,7 +49,7 @@ public class TestSimilarity extends LuceneTestCase {
|
|||
@Override public float tf(float freq) { return freq; }
|
||||
@Override public float sloppyFreq(int distance) { return 2.0f; }
|
||||
@Override public float idf(int docFreq, int numDocs) { return 1.0f; }
|
||||
@Override public Explanation idfExplain(TermContext[] stats, IndexSearcher searcher) throws IOException {
|
||||
@Override public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics[] stats) {
|
||||
return new Explanation(1.0f, "Inexplicable");
|
||||
}
|
||||
};
|
||||
|
|
|
@ -27,10 +27,12 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
@ -42,7 +44,6 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
|
@ -346,7 +347,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
|
||||
// idf used for phrase queries
|
||||
@Override
|
||||
public Explanation idfExplain(TermContext states[], IndexSearcher searcher) throws IOException {
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics[] termStats) {
|
||||
return new Explanation(1.0f, "Inexplicable");
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1,222 +0,0 @@
|
|||
package org.apache.lucene.search.similarities;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.TermFreqVector;
|
||||
import org.apache.lucene.index.TermVectorMapper;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Index searcher implementation that takes an {@link BasicStats} instance and
|
||||
* returns statistics accordingly. Most of the methods are not implemented, so
|
||||
* it can only be used for Similarity unit testing.
|
||||
*/
|
||||
public class SpoofIndexSearcher extends IndexSearcher {
|
||||
public SpoofIndexSearcher(BasicStats stats) {
|
||||
super(new SpoofIndexReader(stats));
|
||||
}
|
||||
|
||||
public static class SpoofIndexReader extends IndexReader {
|
||||
/** The stats the reader has to return. */
|
||||
protected BasicStats stats;
|
||||
/** The fields the reader has to return. */
|
||||
protected SpoofFields fields;
|
||||
|
||||
public SpoofIndexReader(BasicStats stats) {
|
||||
this.stats = stats;
|
||||
this.fields = new SpoofFields(stats);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numDocs() {
|
||||
return stats.getNumberOfDocuments();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maxDoc() {
|
||||
return stats.getNumberOfDocuments();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return fields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<String> getFieldNames(FieldOption fldOption) {
|
||||
return Arrays.asList(new String[]{"spoof"});
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReaderContext getTopReaderContext() {
|
||||
return new AtomicReaderContext(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasDeletions() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// ------------------------ Not implemented methods ------------------------
|
||||
|
||||
@Override
|
||||
public TermFreqVector[] getTermFreqVectors(int docNumber)
|
||||
throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermFreqVector getTermFreqVector(int docNumber, String field)
|
||||
throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getTermFreqVector(int docNumber, String field,
|
||||
TermVectorMapper mapper) throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getTermFreqVector(int docNumber, TermVectorMapper mapper)
|
||||
throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] norms(String field) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNorm(int doc, String field, byte value)
|
||||
throws CorruptIndexException, IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocValues perDocValues() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doDelete(int docNum) throws CorruptIndexException,
|
||||
IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doUndeleteAll() throws CorruptIndexException, IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doCommit(Map<String,String> commitUserData)
|
||||
throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doClose() throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getLiveDocs() {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Spoof Fields class for Similarity testing. */
|
||||
public static class SpoofFields extends Fields {
|
||||
/** The stats the object has to return. */
|
||||
protected SpoofTerms terms;
|
||||
|
||||
public SpoofFields(BasicStats stats) {
|
||||
this.terms = new SpoofTerms(stats);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return terms;
|
||||
}
|
||||
|
||||
// ------------------------ Not implemented methods ------------------------
|
||||
|
||||
@Override
|
||||
public FieldsEnum iterator() throws IOException {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Spoof Terms class for Similarity testing. */
|
||||
public static class SpoofTerms extends Terms {
|
||||
/** The stats the object has to return. */
|
||||
protected BasicStats stats;
|
||||
|
||||
public SpoofTerms(BasicStats stats) {
|
||||
this.stats = stats;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumTotalTermFreq() throws IOException {
|
||||
return stats.getNumberOfFieldTokens();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSumDocFreq() throws IOException {
|
||||
return stats.getDocFreq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDocCount() throws IOException {
|
||||
return stats.getDocFreq();
|
||||
}
|
||||
|
||||
// ------------------------ Not implemented methods ------------------------
|
||||
|
||||
|
||||
@Override
|
||||
public TermsEnum iterator() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getUniqueTermCount() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -30,12 +30,15 @@ import org.apache.lucene.index.OrdTermState;
|
|||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.junit.Ignore;
|
||||
|
@ -172,7 +175,14 @@ public class TestSimilarityBase extends LuceneTestCase {
|
|||
stats.setTotalTermFreq(TOTAL_TERM_FREQ);
|
||||
return stats;
|
||||
}
|
||||
|
||||
|
||||
private CollectionStatistics toCollectionStats(BasicStats stats) {
|
||||
return new CollectionStatistics("spoof", stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1);
|
||||
}
|
||||
|
||||
private TermStatistics toTermStats(BasicStats stats) {
|
||||
return new TermStatistics(new BytesRef("spoofyText"), stats.getDocFreq(), stats.getTotalTermFreq());
|
||||
}
|
||||
/**
|
||||
* The generic test core called by all unit test methods. It calls the
|
||||
* {@link SimilarityBase#score(BasicStats, float, int)} method of all
|
||||
|
@ -180,17 +190,11 @@ public class TestSimilarityBase extends LuceneTestCase {
|
|||
* is a finite positive real number.
|
||||
*/
|
||||
private void unitTestCore(BasicStats stats, float freq, int docLen)
|
||||
throws IOException {
|
||||
// We have to fake everything, because computeStats() can be overridden and
|
||||
// there is no way to inject false data after fillBasicStats().
|
||||
SpoofIndexSearcher searcher = new SpoofIndexSearcher(stats);
|
||||
TermContext tc = new TermContext(
|
||||
searcher.getIndexReader().getTopReaderContext(),
|
||||
new OrdTermState(), 0, stats.getDocFreq(), stats.getTotalTermFreq());
|
||||
|
||||
throws IOException {
|
||||
for (SimilarityBase sim : sims) {
|
||||
BasicStats realStats = (BasicStats) sim.computeStats(new SpoofIndexSearcher(stats),
|
||||
"spoof", stats.getTotalBoost(), tc);
|
||||
BasicStats realStats = (BasicStats) sim.computeStats(toCollectionStats(stats),
|
||||
stats.getTotalBoost(),
|
||||
toTermStats(stats));
|
||||
float score = sim.score(realStats, freq, docLen);
|
||||
float explScore = sim.explain(
|
||||
realStats, 1, new Explanation(freq, "freq"), docLen).getValue();
|
||||
|
@ -520,16 +524,10 @@ public class TestSimilarityBase extends LuceneTestCase {
|
|||
*/
|
||||
private void correctnessTestCore(SimilarityBase sim, float gold)
|
||||
throws IOException {
|
||||
// We have to fake everything, because computeStats() can be overridden and
|
||||
// there is no way to inject false data after fillBasicStats().
|
||||
BasicStats stats = createStats();
|
||||
SpoofIndexSearcher searcher = new SpoofIndexSearcher(stats);
|
||||
TermContext tc = new TermContext(
|
||||
searcher.getIndexReader().getTopReaderContext(),
|
||||
new OrdTermState(), 0, stats.getDocFreq(), stats.getTotalTermFreq());
|
||||
|
||||
BasicStats realStats = (BasicStats) sim.computeStats(
|
||||
searcher, "spoof", stats.getTotalBoost(), tc);
|
||||
BasicStats realStats = (BasicStats) sim.computeStats(toCollectionStats(stats),
|
||||
stats.getTotalBoost(),
|
||||
toTermStats(stats));
|
||||
float score = sim.score(realStats, FREQ, DOC_LEN);
|
||||
assertEquals(
|
||||
sim.toString() + " score not correct.", gold, score, FLOAT_EPSILON);
|
||||
|
|
Loading…
Reference in New Issue