subs, BasicStats stats, int doc,
- float freq, float docLen) {
- if (stats.getBoost() != 1.0f) {
- subs.add(Explanation.match(stats.getBoost(), "boost"));
+ double freq, double docLen) {
+ if (stats.getBoost() != 1.0d) {
+ subs.add(Explanation.match((float) stats.getBoost(), "boost"));
}
subs.add(Explanation.match(mu, "mu"));
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
index 3788b5ca6b9..2799e3a0849 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
@@ -31,7 +31,9 @@ import org.apache.lucene.search.Explanation;
* The model has a single parameter, λ. According to said paper, the
* optimal value depends on both the collection and the query. The optimal value
* is around {@code 0.1} for title queries and {@code 0.7} for long queries.
- *
+ * Values should be between 0 (exclusive) and 1 (inclusive). Values near zero act score more
+ * like a conjunction (coordinate level matching), whereas values near 1 behave
+ * the opposite (more like pure disjunction).
* @lucene.experimental
*/
public class LMJelinekMercerSimilarity extends LMSimilarity {
@@ -42,27 +44,33 @@ public class LMJelinekMercerSimilarity extends LMSimilarity {
public LMJelinekMercerSimilarity(
CollectionModel collectionModel, float lambda) {
super(collectionModel);
+ if (Float.isNaN(lambda) || lambda <= 0 || lambda > 1) {
+ throw new IllegalArgumentException("lambda must be in the range (0 .. 1]");
+ }
this.lambda = lambda;
}
/** Instantiates with the specified λ parameter. */
public LMJelinekMercerSimilarity(float lambda) {
+ if (Float.isNaN(lambda) || lambda <= 0 || lambda > 1) {
+ throw new IllegalArgumentException("lambda must be in the range (0 .. 1]");
+ }
this.lambda = lambda;
}
@Override
- protected float score(BasicStats stats, float freq, float docLen) {
+ protected double score(BasicStats stats, double freq, double docLen) {
return stats.getBoost() *
- (float)Math.log(1 +
+ Math.log(1 +
((1 - lambda) * freq / docLen) /
(lambda * ((LMStats)stats).getCollectionProbability()));
}
@Override
protected void explain(List subs, BasicStats stats, int doc,
- float freq, float docLen) {
- if (stats.getBoost() != 1.0f) {
- subs.add(Explanation.match(stats.getBoost(), "boost"));
+ double freq, double docLen) {
+ if (stats.getBoost() != 1.0d) {
+ subs.add(Explanation.match((float) stats.getBoost(), "boost"));
}
subs.add(Explanation.match(lambda, "lambda"));
super.explain(subs, stats, doc, freq, docLen);
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
index 2e484eb641b..81548061e5c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
@@ -54,7 +54,7 @@ public abstract class LMSimilarity extends SimilarityBase {
}
@Override
- protected BasicStats newStats(String field, float boost) {
+ protected BasicStats newStats(String field, double boost) {
return new LMStats(field, boost);
}
@@ -71,8 +71,8 @@ public abstract class LMSimilarity extends SimilarityBase {
@Override
protected void explain(List subExpls, BasicStats stats, int doc,
- float freq, float docLen) {
- subExpls.add(Explanation.match(collectionModel.computeProbability(stats),
+ double freq, double docLen) {
+ subExpls.add(Explanation.match((float) collectionModel.computeProbability(stats),
"collection probability"));
}
@@ -103,12 +103,12 @@ public abstract class LMSimilarity extends SimilarityBase {
/** Stores the collection distribution of the current term. */
public static class LMStats extends BasicStats {
/** The probability that the current term is generated by the collection. */
- private float collectionProbability;
+ private double collectionProbability;
/**
* Creates LMStats for the provided field and query-time boost
*/
- public LMStats(String field, float boost) {
+ public LMStats(String field, double boost) {
super(field, boost);
}
@@ -116,7 +116,7 @@ public abstract class LMSimilarity extends SimilarityBase {
* Returns the probability that the current term is generated by the
* collection.
*/
- public final float getCollectionProbability() {
+ public final double getCollectionProbability() {
return collectionProbability;
}
@@ -124,7 +124,7 @@ public abstract class LMSimilarity extends SimilarityBase {
* Sets the probability that the current term is generated by the
* collection.
*/
- public final void setCollectionProbability(float collectionProbability) {
+ public final void setCollectionProbability(double collectionProbability) {
this.collectionProbability = collectionProbability;
}
}
@@ -135,7 +135,7 @@ public abstract class LMSimilarity extends SimilarityBase {
* Computes the probability {@code p(w|C)} according to the language model
* strategy for the current term.
*/
- public float computeProbability(BasicStats stats);
+ public double computeProbability(BasicStats stats);
/** The name of the collection model strategy. */
public String getName();
@@ -151,8 +151,8 @@ public abstract class LMSimilarity extends SimilarityBase {
public DefaultCollectionModel() {}
@Override
- public float computeProbability(BasicStats stats) {
- return (stats.getTotalTermFreq()+1F) / (stats.getNumberOfFieldTokens()+1F);
+ public double computeProbability(BasicStats stats) {
+ return (stats.getTotalTermFreq()+1D) / (stats.getNumberOfFieldTokens()+1D);
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Normalization.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Normalization.java
index 0ab70f6b000..e20ca020da0 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/Normalization.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Normalization.java
@@ -36,7 +36,7 @@ public abstract class Normalization {
/** Returns the normalized term frequency.
* @param len the field length. */
- public abstract float tfn(BasicStats stats, float tf, float len);
+ public abstract double tfn(BasicStats stats, double tf, double len);
/** Returns an explanation for the normalized term frequency.
* The default normalization methods use the field length of the document
@@ -44,13 +44,13 @@ public abstract class Normalization {
* This method provides a generic explanation for such methods.
* Subclasses that use other statistics must override this method.
*/
- public Explanation explain(BasicStats stats, float tf, float len) {
+ public Explanation explain(BasicStats stats, double tf, double len) {
return Explanation.match(
- tfn(stats, tf, len),
+ (float) tfn(stats, tf, len),
getClass().getSimpleName() + ", computed from: ",
- Explanation.match(tf, "tf"),
- Explanation.match(stats.getAvgFieldLength(), "avgFieldLength"),
- Explanation.match(len, "len"));
+ Explanation.match((float) tf, "tf"),
+ Explanation.match((float) stats.getAvgFieldLength(), "avgFieldLength"),
+ Explanation.match((float) len, "len"));
}
/** Implementation used when there is no normalization. */
@@ -60,12 +60,12 @@ public abstract class Normalization {
public NoNormalization() {}
@Override
- public final float tfn(BasicStats stats, float tf, float len) {
+ public double tfn(BasicStats stats, double tf, double len) {
return tf;
}
@Override
- public final Explanation explain(BasicStats stats, float tf, float len) {
+ public Explanation explain(BasicStats stats, double tf, double len) {
return Explanation.match(1, "no normalization");
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH1.java b/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH1.java
index e7f47cafd3e..8e5a28fcaf9 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH1.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH1.java
@@ -36,6 +36,10 @@ public class NormalizationH1 extends Normalization {
* normalization with respect to the document length.
*/
public NormalizationH1(float c) {
+ // unbounded but typical range 0..10 or so
+ if (Float.isFinite(c) == false || c < 0) {
+ throw new IllegalArgumentException("illegal c value: " + c + ", must be a non-negative finite value");
+ }
this.c = c;
}
@@ -47,8 +51,8 @@ public class NormalizationH1 extends Normalization {
}
@Override
- public final float tfn(BasicStats stats, float tf, float len) {
- return tf * c * stats.getAvgFieldLength() / len;
+ public final double tfn(BasicStats stats, double tf, double len) {
+ return tf * c * (stats.getAvgFieldLength() / len);
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH2.java b/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH2.java
index 4bc50045a09..24fb74ea2aa 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH2.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH2.java
@@ -38,6 +38,10 @@ public class NormalizationH2 extends Normalization {
* normalization with respect to the document length.
*/
public NormalizationH2(float c) {
+ // unbounded but typical range 0..10 or so
+ if (Float.isFinite(c) == false || c < 0) {
+ throw new IllegalArgumentException("illegal c value: " + c + ", must be a non-negative finite value");
+ }
this.c = c;
}
@@ -49,8 +53,8 @@ public class NormalizationH2 extends Normalization {
}
@Override
- public final float tfn(BasicStats stats, float tf, float len) {
- return (float)(tf * log2(1 + c * stats.getAvgFieldLength() / len));
+ public final double tfn(BasicStats stats, double tf, double len) {
+ return tf * log2(1 + c * stats.getAvgFieldLength() / len);
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH3.java b/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH3.java
index 579cdb5094a..0bbea496b70 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH3.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationH3.java
@@ -36,11 +36,14 @@ public class NormalizationH3 extends Normalization {
* @param mu smoothing parameter μ
*/
public NormalizationH3(float mu) {
+ if (Float.isFinite(mu) == false || mu < 0) {
+ throw new IllegalArgumentException("illegal mu value: " + mu + ", must be a non-negative finite value");
+ }
this.mu = mu;
}
@Override
- public float tfn(BasicStats stats, float tf, float len) {
+ public double tfn(BasicStats stats, double tf, double len) {
return (tf + mu * ((stats.getTotalTermFreq()+1F) / (stats.getNumberOfFieldTokens()+1F))) / (len + mu) * mu;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationZ.java b/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationZ.java
index 97b92a213b5..dabf9c906bc 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationZ.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/NormalizationZ.java
@@ -34,15 +34,18 @@ public class NormalizationZ extends Normalization {
/**
* Creates NormalizationZ with the supplied parameter z
.
* @param z represents A/(A+1)
where A
- * measures the specificity of the language.
+ * measures the specificity of the language. It ranges from (0 .. 0.5)
*/
public NormalizationZ(float z) {
+ if (Float.isNaN(z) || z <= 0f || z >= 0.5f) {
+ throw new IllegalArgumentException("illegal z value: " + z + ", must be in the range (0 .. 0.5)");
+ }
this.z = z;
}
@Override
- public float tfn(BasicStats stats, float tf, float len) {
- return (float)(tf * Math.pow(stats.avgFieldLength / len, z));
+ public double tfn(BasicStats stats, double tf, double len) {
+ return tf * Math.pow(stats.avgFieldLength / len, z);
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java b/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
index 46899a3378f..d8ec244a6a7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
@@ -34,7 +34,7 @@ import org.apache.lucene.util.SmallFloat;
* A subclass of {@code Similarity} that provides a simplified API for its
* descendants. Subclasses are only required to implement the {@link #score}
* and {@link #toString()} methods. Implementing
- * {@link #explain(List, BasicStats, int, float, float)} is optional,
+ * {@link #explain(List, BasicStats, int, double, double)} is optional,
* inasmuch as SimilarityBase already provides a basic explanation of the score
* and the term frequency. However, implementers of a subclass are encouraged to
* include as much detail about the scoring method as possible.
@@ -93,7 +93,7 @@ public abstract class SimilarityBase extends Similarity {
}
/** Factory method to return a custom stats object */
- protected BasicStats newStats(String field, float boost) {
+ protected BasicStats newStats(String field, double boost) {
return new BasicStats(field, boost);
}
@@ -113,7 +113,7 @@ public abstract class SimilarityBase extends Similarity {
}
final long numberOfFieldTokens;
- final float avgFieldLength;
+ final double avgFieldLength;
long sumTotalTermFreq = collectionStats.sumTotalTermFreq();
@@ -145,7 +145,7 @@ public abstract class SimilarityBase extends Similarity {
* @param docLen the document length.
* @return the score.
*/
- protected abstract float score(BasicStats stats, float freq, float docLen);
+ protected abstract double score(BasicStats stats, double freq, double docLen);
/**
* Subclasses should implement this method to explain the score. {@code expl}
@@ -161,16 +161,16 @@ public abstract class SimilarityBase extends Similarity {
* @param docLen the document length.
*/
protected void explain(
- List subExpls, BasicStats stats, int doc, float freq, float docLen) {}
+ List subExpls, BasicStats stats, int doc, double freq, double docLen) {}
/**
* Explains the score. The implementation here provides a basic explanation
* in the format score(name-of-similarity, doc=doc-id,
* freq=term-frequency), computed from:, and
- * attaches the score (computed via the {@link #score(BasicStats, float, float)}
+ * attaches the score (computed via the {@link #score(BasicStats, double, double)}
* method) and the explanation for the term frequency. Subclasses content with
* this format may add additional details in
- * {@link #explain(List, BasicStats, int, float, float)}.
+ * {@link #explain(List, BasicStats, int, double, double)}.
*
* @param stats the corpus level statistics.
* @param doc the document id.
@@ -179,12 +179,12 @@ public abstract class SimilarityBase extends Similarity {
* @return the explanation.
*/
protected Explanation explain(
- BasicStats stats, int doc, Explanation freq, float docLen) {
+ BasicStats stats, int doc, Explanation freq, double docLen) {
List subs = new ArrayList<>();
explain(subs, stats, doc, freq.getValue(), docLen);
return Explanation.match(
- score(stats, freq.getValue(), docLen),
+ (float) score(stats, freq.getValue(), docLen),
"score(" + getClass().getSimpleName() + ", doc=" + doc + ", freq=" + freq.getValue() +"), computed from:",
subs);
}
@@ -248,8 +248,8 @@ public abstract class SimilarityBase extends Similarity {
/** Delegates the {@link #score(int, float)} and
* {@link #explain(int, Explanation)} methods to
- * {@link SimilarityBase#score(BasicStats, float, float)} and
- * {@link SimilarityBase#explain(BasicStats, int, Explanation, float)},
+ * {@link SimilarityBase#score(BasicStats, double, double)} and
+ * {@link SimilarityBase#explain(BasicStats, int, Explanation, double)},
* respectively.
*/
final class BasicSimScorer extends SimScorer {
@@ -261,9 +261,9 @@ public abstract class SimilarityBase extends Similarity {
this.norms = norms;
}
- float getLengthValue(int doc) throws IOException {
+ double getLengthValue(int doc) throws IOException {
if (norms == null) {
- return 1F;
+ return 1D;
}
if (norms.advanceExact(doc)) {
return LENGTH_TABLE[Byte.toUnsignedInt((byte) norms.longValue())];
@@ -275,7 +275,7 @@ public abstract class SimilarityBase extends Similarity {
@Override
public float score(int doc, float freq) throws IOException {
// We have to supply something in case norms are omitted
- return SimilarityBase.this.score(stats, freq, getLengthValue(doc));
+ return (float) SimilarityBase.this.score(stats, freq, getLengthValue(doc));
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
index dba1c61b090..97e522697dd 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
@@ -450,7 +450,9 @@ public abstract class TFIDFSimilarity extends Similarity {
final long df = termStats.docFreq();
final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
final float idf = idf(df, docCount);
- return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")");
+ return Explanation.match(idf, "idf(docFreq, docCount)",
+ Explanation.match(df, "docFreq, number of documents containing term"),
+ Explanation.match(docCount, "docCount, total number of documents with field"));
}
/**
@@ -643,20 +645,37 @@ public abstract class TFIDFSimilarity extends Similarity {
"fieldNorm(doc=" + doc + ")");
return Explanation.match(
- tfExplanation.getValue() * stats.idf.getValue() * fieldNormExpl.getValue(),
+ tfExplanation.getValue() * fieldNormExpl.getValue(),
"fieldWeight in " + doc + ", product of:",
- tfExplanation, stats.idf, fieldNormExpl);
+ tfExplanation, fieldNormExpl);
}
private Explanation explainScore(int doc, Explanation freq, IDFStats stats, NumericDocValues norms, float[] normTable) throws IOException {
- Explanation queryExpl = Explanation.match(stats.boost, "boost");
- Explanation fieldExpl = explainField(doc, freq, stats, norms, normTable);
- if (stats.boost == 1f) {
- return fieldExpl;
+ List subs = new ArrayList();
+ if (stats.boost != 1F) {
+ subs.add(Explanation.match(stats.boost, "boost"));
}
+ subs.add(stats.idf);
+ Explanation tf = Explanation.match(tf(freq.getValue()), "tf(freq="+freq.getValue()+"), with freq of:", freq);
+ subs.add(tf);
+
+ float norm;
+ if (norms == null) {
+ norm = 1f;
+ } else if (norms.advanceExact(doc) == false) {
+ norm = 0f;
+ } else {
+ norm = normTable[(int) (norms.longValue() & 0xFF)];
+ }
+
+ Explanation fieldNorm = Explanation.match(
+ norm,
+ "fieldNorm(doc=" + doc + ")");
+ subs.add(fieldNorm);
+
return Explanation.match(
- queryExpl.getValue() * fieldExpl.getValue(),
+ stats.queryWeight * tf.getValue() * norm,
"score(doc="+doc+",freq="+freq.getValue()+"), product of:",
- queryExpl, fieldExpl);
+ subs);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java b/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java
index a3544d71442..1ed9669147c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java
@@ -97,7 +97,7 @@
* The easiest way to quickly implement a new ranking method is to extend
* {@link org.apache.lucene.search.similarities.SimilarityBase}, which provides
* basic implementations for the low level . Subclasses are only required to
- * implement the {@link org.apache.lucene.search.similarities.SimilarityBase#score(BasicStats, float, float)}
+ * implement the {@link org.apache.lucene.search.similarities.SimilarityBase#score(BasicStats, double, double)}
* and {@link org.apache.lucene.search.similarities.SimilarityBase#toString()}
* methods.
*
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/AxiomaticTestCase.java b/lucene/core/src/test/org/apache/lucene/search/similarities/AxiomaticTestCase.java
new file mode 100644
index 00000000000..c2f614c8a0d
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/AxiomaticTestCase.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import java.util.Random;
+
+public abstract class AxiomaticTestCase extends BaseSimilarityTestCase {
+
+ @Override
+ protected final Similarity getSimilarity(Random random) {
+ // axiomatic parameter s
+ final float s;
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ s = 0;
+ break;
+ case 1:
+ // tiny value
+ s = Float.MIN_VALUE;
+ break;
+ case 2:
+ // maximum value
+ s = 1;
+ break;
+ default:
+ // random value
+ s = random.nextFloat();
+ break;
+ }
+ // axiomatic query length
+ final int queryLen;
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ queryLen = 0;
+ break;
+ case 1:
+ // tiny value
+ queryLen = 1;
+ break;
+ case 2:
+ // maximum value
+ queryLen = Integer.MAX_VALUE;
+ break;
+ default:
+ // random value
+ queryLen = random.nextInt(Integer.MAX_VALUE);
+ break;
+ }
+ // axiomatic parameter k
+ final float k;
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ k = 0;
+ break;
+ case 1:
+ // tiny value
+ k = Float.MIN_VALUE;
+ break;
+ case 2:
+ // maximum value
+ k = 1;
+ break;
+ default:
+ // random value
+ k = random.nextFloat();
+ break;
+ }
+
+ return getAxiomaticModel(s, queryLen, k);
+ }
+
+ protected abstract Similarity getAxiomaticModel(float s, int queryLen, float k);
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/BasicModelTestCase.java b/lucene/core/src/test/org/apache/lucene/search/similarities/BasicModelTestCase.java
new file mode 100644
index 00000000000..66236669704
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/BasicModelTestCase.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import java.util.Random;
+
+public abstract class BasicModelTestCase extends BaseSimilarityTestCase {
+
+ @Override
+ protected final Similarity getSimilarity(Random random) {
+ final AfterEffect afterEffect;
+ switch(random.nextInt(3)) {
+ case 0:
+ afterEffect = new AfterEffect.NoAfterEffect();
+ break;
+ case 1:
+ afterEffect = new AfterEffectL();
+ break;
+ default:
+ afterEffect = new AfterEffectB();
+ break;
+ }
+ // normalization hyper-parameter c
+ final float c;
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ c = 0;
+ break;
+ case 1:
+ // tiny value
+ c = Float.MIN_VALUE;
+ break;
+ case 2:
+ // maximum value
+ // we just limit the test to "reasonable" c values but don't enforce this anywhere.
+ c = Integer.MAX_VALUE;
+ break;
+ default:
+ // random value
+ c = Integer.MAX_VALUE * random.nextFloat();
+ break;
+ }
+ // normalization hyper-parameter z
+ final float z;
+ switch (random.nextInt(3)) {
+ case 0:
+ // minimum value
+ z = Float.MIN_VALUE;
+ break;
+ case 1:
+ // maximum value
+ z = Math.nextDown(0.5f);
+ break;
+ default:
+ // random value
+ float zcand = random.nextFloat() / 2;
+ if (zcand == 0f) {
+ // nextFloat returns 0 inclusive, we have to avoid it.
+ z = Math.nextUp(zcand);
+ } else {
+ z = zcand;
+ }
+ }
+ // dirichlet parameter mu
+ final float mu;
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ mu = 0;
+ break;
+ case 1:
+ // tiny value
+ mu = Float.MIN_VALUE;
+ break;
+ case 2:
+ // maximum value
+ // we just limit the test to "reasonable" mu values but don't enforce this anywhere.
+ mu = Integer.MAX_VALUE;
+ break;
+ default:
+ // random value
+ mu = Integer.MAX_VALUE * random.nextFloat();
+ break;
+ }
+ final Normalization normalization;
+ switch(random.nextInt(5)) {
+ case 0:
+ normalization = new Normalization.NoNormalization();
+ break;
+ case 1:
+ normalization = new NormalizationH1(c);
+ break;
+ case 2:
+ normalization = new NormalizationH2(c);
+ break;
+ case 3:
+ normalization = new NormalizationH3(mu);
+ break;
+ default:
+ normalization = new NormalizationZ(z);
+ break;
+ }
+ return new DFRSimilarity(getBasicModel(), afterEffect, normalization);
+ }
+
+ /** return BasicModel under test */
+ protected abstract BasicModel getBasicModel();
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/DistributionTestCase.java b/lucene/core/src/test/org/apache/lucene/search/similarities/DistributionTestCase.java
new file mode 100644
index 00000000000..6d425d29c0c
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/DistributionTestCase.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import java.util.Random;
+
+public abstract class DistributionTestCase extends BaseSimilarityTestCase {
+
+ @Override
+ protected final Similarity getSimilarity(Random random) {
+ final Lambda lambda;
+ if (random.nextBoolean()) {
+ lambda = new LambdaDF();
+ } else {
+ lambda = new LambdaTTF();
+ }
+
+ // normalization hyper-parameter c
+ final float c;
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ c = 0;
+ break;
+ case 1:
+ // tiny value
+ c = Float.MIN_VALUE;
+ break;
+ case 2:
+ // maximum value
+ // we just limit the test to "reasonable" c values but don't enforce this anywhere.
+ c = Integer.MAX_VALUE;
+ break;
+ default:
+ // random value
+ c = Integer.MAX_VALUE * random.nextFloat();
+ break;
+ }
+ // normalization hyper-parameter z
+ final float z;
+ switch (random.nextInt(3)) {
+ case 0:
+ // minimum value
+ z = Float.MIN_VALUE;
+ break;
+ case 1:
+ // maximum value
+ z = Math.nextDown(0.5f);
+ break;
+ default:
+ // random value
+ float zcand = random.nextFloat() / 2;
+ if (zcand == 0f) {
+ // nextFloat returns 0 inclusive, we have to avoid it.
+ z = Math.nextUp(zcand);
+ } else {
+ z = zcand;
+ }
+ }
+ // dirichlet parameter mu
+ final float mu;
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ mu = 0;
+ break;
+ case 1:
+ // tiny value
+ mu = Float.MIN_VALUE;
+ break;
+ case 2:
+ // maximum value
+ // we just limit the test to "reasonable" mu values but don't enforce this anywhere.
+ mu = Integer.MAX_VALUE;
+ break;
+ default:
+ // random value
+ mu = Integer.MAX_VALUE * random.nextFloat();
+ break;
+ }
+ final Normalization normalization;
+ switch(random.nextInt(5)) {
+ case 0:
+ normalization = new Normalization.NoNormalization();
+ break;
+ case 1:
+ normalization = new NormalizationH1(c);
+ break;
+ case 2:
+ normalization = new NormalizationH2(c);
+ break;
+ case 3:
+ normalization = new NormalizationH3(mu);
+ break;
+ default:
+ normalization = new NormalizationZ(z);
+ break;
+ }
+ return new IBSimilarity(getDistribution(), lambda, normalization);
+ }
+
+ /** return BasicModel under test */
+ protected abstract Distribution getDistribution();
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java
new file mode 100644
index 00000000000..16da903e1dc
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1EXP.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
+
+// returns NaN scores for sloppy freqs < 1 (due to log without floor)
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
+public class TestAxiomaticF1EXP extends AxiomaticTestCase {
+
+ @Override
+ protected final Similarity getAxiomaticModel(float s, int queryLen, float k) {
+ return new AxiomaticF1EXP(s, k);
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java
new file mode 100644
index 00000000000..88ad18ee2ef
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF1LOG.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
+
+// returns NaN scores for sloppy freqs < 1 (due to log without floor)
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
+public class TestAxiomaticF1LOG extends AxiomaticTestCase {
+
+ @Override
+ protected final Similarity getAxiomaticModel(float s, int queryLen, float k) {
+ return new AxiomaticF1LOG(s);
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF2EXP.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF2EXP.java
new file mode 100644
index 00000000000..e9ab9b6ff60
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF2EXP.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+public class TestAxiomaticF2EXP extends AxiomaticTestCase {
+
+ @Override
+ protected final Similarity getAxiomaticModel(float s, int queryLen, float k) {
+ return new AxiomaticF2EXP(s, k);
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF2LOG.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF2LOG.java
new file mode 100644
index 00000000000..f9c9420cc72
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF2LOG.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+public class TestAxiomaticF2LOG extends AxiomaticTestCase {
+
+ @Override
+ protected final Similarity getAxiomaticModel(float s, int queryLen, float k) {
+ return new AxiomaticF2LOG(s);
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java
new file mode 100644
index 00000000000..69ab7193e56
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3EXP.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
+
+// returns negative scores at least, but it (now) warns it has problems
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
+public class TestAxiomaticF3EXP extends AxiomaticTestCase {
+
+ @Override
+ protected final Similarity getAxiomaticModel(float s, int queryLen, float k) {
+ // TODO: use the randomized parameters and not these hardcoded ones
+ return new AxiomaticF3EXP(0.25f, 1);
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java
new file mode 100644
index 00000000000..686327731f0
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestAxiomaticF3LOG.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
+
+// returns negative scores at least, but it (now) warns it has problems
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
+public class TestAxiomaticF3LOG extends AxiomaticTestCase {
+
+ @Override
+ protected final Similarity getAxiomaticModel(float s, int queryLen, float k) {
+ // TODO: use the randomized parameters and not these hardcoded ones
+ return new AxiomaticF3LOG(0.25f, 1);
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBM25Similarity.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBM25Similarity.java
index 4c6382baf62..9dcf7e64889 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBM25Similarity.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBM25Similarity.java
@@ -17,10 +17,9 @@
package org.apache.lucene.search.similarities;
-import org.apache.lucene.search.Explanation;
-import org.apache.lucene.util.LuceneTestCase;
+import java.util.Random;
-public class TestBM25Similarity extends LuceneTestCase {
+public class TestBM25Similarity extends BaseSimilarityTestCase {
public void testIllegalK1() {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
@@ -61,17 +60,51 @@ public class TestBM25Similarity extends LuceneTestCase {
assertTrue(expected.getMessage().contains("illegal b value"));
}
- private static Explanation findExplanation(Explanation expl, String text) {
- if (expl.getDescription().equals(text)) {
- return expl;
- } else {
- for (Explanation sub : expl.getDetails()) {
- Explanation match = findExplanation(sub, text);
- if (match != null) {
- return match;
- }
- }
+ @Override
+ protected Similarity getSimilarity(Random random) {
+ // term frequency normalization parameter k1
+ final float k1;
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ k1 = 0;
+ break;
+ case 1:
+ // tiny value
+ k1 = Float.MIN_VALUE;
+ break;
+ case 2:
+ // maximum value
+ // upper bounds on individual term's score is 43.262806 * (k1 + 1) * boost
+ // we just limit the test to "reasonable" k1 values but don't enforce this anywhere.
+ k1 = Integer.MAX_VALUE;
+ break;
+ default:
+ // random value
+ k1 = Integer.MAX_VALUE * random.nextFloat();
+ break;
}
- return null;
+
+ // length normalization parameter b [0 .. 1]
+ final float b;
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ b = 0;
+ break;
+ case 1:
+ // tiny value
+ b = Float.MIN_VALUE;
+ break;
+ case 2:
+ // maximum value
+ b = 1;
+ break;
+ default:
+ // random value
+ b = random.nextFloat();
+ break;
+ }
+ return new BM25Similarity(k1, b);
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelBE.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelBE.java
new file mode 100644
index 00000000000..2dc956f7da0
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelBE.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
+
+// returns negative scores at least, but it warns it has problems
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
+public class TestBasicModelBE extends BasicModelTestCase {
+
+ @Override
+ protected BasicModel getBasicModel() {
+ return new BasicModelBE();
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelD.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelD.java
new file mode 100644
index 00000000000..7eee359b3d4
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelD.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
+
+// scores go backwards with respect to TF, but it warns it has problems
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
+public class TestBasicModelD extends BasicModelTestCase {
+
+ @Override
+ protected BasicModel getBasicModel() {
+ return new BasicModelD();
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelG.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelG.java
new file mode 100644
index 00000000000..280affb89a0
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelG.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+public class TestBasicModelG extends BasicModelTestCase {
+
+ @Override
+ protected BasicModel getBasicModel() {
+ return new BasicModelG();
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelIF.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelIF.java
new file mode 100644
index 00000000000..0b7c9fc1e3f
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelIF.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+public class TestBasicModelIF extends BasicModelTestCase {
+
+ @Override
+ protected BasicModel getBasicModel() {
+ return new BasicModelIF();
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelIn.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelIn.java
new file mode 100644
index 00000000000..c474982d0dd
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelIn.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+public class TestBasicModelIn extends BasicModelTestCase {
+
+ @Override
+ protected BasicModel getBasicModel() {
+ return new BasicModelIn();
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelIne.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelIne.java
new file mode 100644
index 00000000000..c9a8a5f7102
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelIne.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+public class TestBasicModelIne extends BasicModelTestCase {
+
+ @Override
+ protected BasicModel getBasicModel() {
+ return new BasicModelIne();
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelP.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelP.java
new file mode 100644
index 00000000000..2788ff8edb8
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBasicModelP.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
+
+//scores go backwards with respect to TF, but it warns it has problems
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
+public class TestBasicModelP extends BasicModelTestCase {
+
+ @Override
+ protected BasicModel getBasicModel() {
+ return new BasicModelP();
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBooleanSimilarity.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBooleanSimilarity.java
index c3885143547..c4dec7c13f5 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBooleanSimilarity.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBooleanSimilarity.java
@@ -17,6 +17,7 @@
package org.apache.lucene.search.similarities;
import java.io.IOException;
+import java.util.Random;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
@@ -32,11 +33,10 @@ import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
-public class TestBooleanSimilarity extends LuceneTestCase {
+public class TestBooleanSimilarity extends BaseSimilarityTestCase {
public void testTermScoreIsEqualToBoost() throws IOException {
Directory dir = newDirectory();
@@ -114,4 +114,9 @@ public class TestBooleanSimilarity extends LuceneTestCase {
0f);
}
}
+
+ @Override
+ protected Similarity getSimilarity(Random random) {
+ return new BooleanSimilarity();
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestClassicSimilarity.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestClassicSimilarity.java
index e7a56067c55..4a5a10fcbaf 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestClassicSimilarity.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestClassicSimilarity.java
@@ -19,6 +19,7 @@ package org.apache.lucene.search.similarities;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Random;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
@@ -39,11 +40,10 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similarities.TFIDFSimilarity.IDFStats;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
-public class TestClassicSimilarity extends LuceneTestCase {
+public class TestClassicSimilarity extends BaseSimilarityTestCase {
private Directory directory;
private IndexReader indexReader;
private IndexSearcher indexSearcher;
@@ -185,4 +185,9 @@ public class TestClassicSimilarity extends LuceneTestCase {
0f);
}
}
+
+ @Override
+ protected Similarity getSimilarity(Random random) {
+ return new ClassicSimilarity();
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionLL.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionLL.java
new file mode 100644
index 00000000000..de28d6f1b97
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionLL.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+public class TestDistributionLL extends DistributionTestCase {
+
+ @Override
+ protected Distribution getDistribution() {
+ return new DistributionLL();
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java
new file mode 100644
index 00000000000..984915a23da
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestDistributionSPL.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
+
+// scores go infinite, but it warns it has problems
+@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-8010")
+public class TestDistributionSPL extends DistributionTestCase {
+
+ @Override
+ protected Distribution getDistribution() {
+ return new DistributionSPL();
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestIndependenceChiSquared.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestIndependenceChiSquared.java
new file mode 100644
index 00000000000..c2fa06c4f6f
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestIndependenceChiSquared.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import java.util.Random;
+
+public class TestIndependenceChiSquared extends BaseSimilarityTestCase {
+
+ @Override
+ protected final Similarity getSimilarity(Random random) {
+ return new DFISimilarity(new IndependenceChiSquared());
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestIndependenceSaturated.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestIndependenceSaturated.java
new file mode 100644
index 00000000000..38be8b699df
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestIndependenceSaturated.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import java.util.Random;
+
+public class TestIndependenceSaturated extends BaseSimilarityTestCase {
+
+ @Override
+ protected final Similarity getSimilarity(Random random) {
+ return new DFISimilarity(new IndependenceSaturated());
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestIndependenceStandardized.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestIndependenceStandardized.java
new file mode 100644
index 00000000000..959912a2553
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestIndependenceStandardized.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import java.util.Random;
+
+public class TestIndependenceStandardized extends BaseSimilarityTestCase {
+
+ @Override
+ protected final Similarity getSimilarity(Random random) {
+ return new DFISimilarity(new IndependenceStandardized());
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestLMDirichletSimilarity.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestLMDirichletSimilarity.java
new file mode 100644
index 00000000000..d6043e5cfec
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestLMDirichletSimilarity.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import java.util.Random;
+
+public class TestLMDirichletSimilarity extends BaseSimilarityTestCase {
+
+ @Override
+ protected final Similarity getSimilarity(Random random) {
+ // smoothing parameter mu, unbounded
+ final float mu;
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ mu = 0;
+ break;
+ case 1:
+ // tiny value
+ mu = Float.MIN_VALUE;
+ break;
+ case 2:
+ // maximum value
+ // we just limit the test to "reasonable" mu values but don't enforce this anywhere.
+ mu = Integer.MAX_VALUE;
+ break;
+ default:
+ // random value
+ mu = Integer.MAX_VALUE * random.nextFloat();
+ break;
+ }
+ return new LMDirichletSimilarity(mu);
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestLMJelinekMercerSimilarity.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestLMJelinekMercerSimilarity.java
new file mode 100644
index 00000000000..0fa8db8c058
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestLMJelinekMercerSimilarity.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import java.util.Random;
+
+public class TestLMJelinekMercerSimilarity extends BaseSimilarityTestCase {
+
+ @Override
+ protected final Similarity getSimilarity(Random random) {
+ // smoothing parameter lambda: (0..1]
+ final float lambda;
+ switch (random.nextInt(3)) {
+ case 0:
+ // tiny value
+ lambda = Float.MIN_VALUE;
+ break;
+ case 1:
+ // maximum value
+ lambda = 1;
+ break;
+ default:
+ // random value
+ lambda = random.nextFloat();
+ break;
+ }
+ return new LMJelinekMercerSimilarity(lambda);
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java
index 8fbd69d9190..e52c9742f65 100644
--- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java
@@ -51,7 +51,7 @@ import org.apache.lucene.util.Version;
* items in the list. If a test case fails, the name of the Similarity that
* caused the failure is returned as part of the assertion error message.
* Unit testing is performed by constructing statistics manually and calling
- * the {@link SimilarityBase#score(BasicStats, float, float)} method of the
+ * the {@link SimilarityBase#score(BasicStats, double, double)} method of the
* Similarities. The statistics represent corner cases of corpus distributions.
*
* For the integration tests, a small (8-document) collection is indexed. The
@@ -191,17 +191,17 @@ public class TestSimilarityBase extends LuceneTestCase {
}
/**
* The generic test core called by all unit test methods. It calls the
- * {@link SimilarityBase#score(BasicStats, float, float)} method of all
+ * {@link SimilarityBase#score(BasicStats, double, double)} method of all
* Similarities in {@link #sims} and checks if the score is valid; i.e. it
* is a finite positive real number.
*/
private void unitTestCore(BasicStats stats, float freq, int docLen) {
for (SimilarityBase sim : sims) {
BasicStats realStats = (BasicStats) sim.computeWeight(
- stats.getBoost(),
+ (float)stats.getBoost(),
toCollectionStats(stats),
toTermStats(stats));
- float score = sim.score(realStats, freq, docLen);
+ float score = (float)sim.score(realStats, freq, docLen);
float explScore = sim.explain(
realStats, 1, Explanation.match(freq, "freq"), docLen).getValue();
assertFalse("Score infinite: " + sim.toString(), Float.isInfinite(score));
@@ -524,17 +524,17 @@ public class TestSimilarityBase extends LuceneTestCase {
/**
* The generic test core called by all correctness test methods. It calls the
- * {@link SimilarityBase#score(BasicStats, float, float)} method of all
+ * {@link SimilarityBase#score(BasicStats, double, double)} method of all
* Similarities in {@link #sims} and compares the score against the manually
* computed {@code gold}.
*/
private void correctnessTestCore(SimilarityBase sim, float gold) {
BasicStats stats = createStats();
BasicStats realStats = (BasicStats) sim.computeWeight(
- stats.getBoost(),
+ (float)stats.getBoost(),
toCollectionStats(stats),
toTermStats(stats));
- float score = sim.score(realStats, FREQ, DOC_LEN);
+ float score = (float) sim.score(realStats, FREQ, DOC_LEN);
assertEquals(
sim.toString() + " score not correct.", gold, score, FLOAT_EPSILON);
}
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
index c87fdbb6ce5..7830648368b 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
@@ -1484,7 +1484,7 @@ public class TestBlockJoin extends LuceneTestCase {
}
@Override
- protected float score(BasicStats stats, float freq, float docLen) {
+ protected double score(BasicStats stats, double freq, double docLen) {
return freq;
}
};
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java b/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
index dee7d8405c0..7696a63f83e 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
+import java.util.regex.Pattern;
import java.util.Random;
import junit.framework.Assert;
@@ -318,6 +319,8 @@ public class CheckHits {
public static float explainToleranceDelta(float f1, float f2) {
return Math.max(EXPLAIN_SCORE_TOLERANCE_MINIMUM, Math.max(Math.abs(f1), Math.abs(f2)) * EXPLAIN_SCORE_TOLERANCE_DELTA);
}
+
+ private static final Pattern COMPUTED_FROM_PATTERN = Pattern.compile(".*, computed as .* from:");
/**
* Assert that an explanation has the expected score, and optionally that its
@@ -335,9 +338,12 @@ public class CheckHits {
boolean deep,
Explanation expl) {
float value = expl.getValue();
- Assert.assertEquals(q+": score(doc="+doc+")="+score+
- " != explanationScore="+value+" Explanation: "+expl,
- score,value,explainToleranceDelta(score, value));
+ // TODO: clean this up if we use junit 5 (the assert message is costly)
+ try {
+ Assert.assertEquals(score, value, explainToleranceDelta(score, value));
+ } catch (Exception e) {
+ Assert.fail(q+": score(doc="+doc+")="+score+" != explanationScore="+value+" Explanation: "+expl);
+ }
if (!deep) return;
@@ -368,7 +374,7 @@ public class CheckHits {
boolean productOf = descr.endsWith("product of:");
boolean sumOf = descr.endsWith("sum of:");
boolean maxOf = descr.endsWith("max of:");
- boolean computedOf = descr.matches(".*, computed as .* from:");
+ boolean computedOf = descr.indexOf("computed as") > 0 && COMPUTED_FROM_PATTERN.matcher(descr).matches();
boolean maxTimesOthers = false;
if (!(productOf || sumOf || maxOf || computedOf)) {
// maybe 'max plus x times others'
@@ -386,11 +392,12 @@ public class CheckHits {
}
}
// TODO: this is a TERRIBLE assertion!!!!
- Assert.assertTrue(
- q+": multi valued explanation description=\""+descr
- +"\" must be 'max of plus x times others', 'computed as x from:' or end with 'product of'"
- +" or 'sum of:' or 'max of:' - "+expl,
- productOf || sumOf || maxOf || computedOf || maxTimesOthers);
+ if (false == (productOf || sumOf || maxOf || computedOf || maxTimesOthers)) {
+ Assert.fail(
+ q+": multi valued explanation description=\""+descr
+ +"\" must be 'max of plus x times others', 'computed as x from:' or end with 'product of'"
+ +" or 'sum of:' or 'max of:' - "+expl);
+ }
float sum = 0;
float product = 1;
float max = 0;
@@ -414,9 +421,13 @@ public class CheckHits {
Assert.assertTrue("should never get here!", computedOf);
combined = value;
}
- Assert.assertEquals(q+": actual subDetails combined=="+combined+
- " != value="+value+" Explanation: "+expl,
- combined,value,explainToleranceDelta(combined, value));
+ // TODO: clean this up if we use junit 5 (the assert message is costly)
+ try {
+ Assert.assertEquals(combined, value, explainToleranceDelta(combined, value));
+ } catch (Exception e) {
+ Assert.fail(q+": actual subDetails combined=="+combined+
+ " != value="+value+" Explanation: "+expl);
+ }
}
}
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
new file mode 100644
index 00000000000..d93594d884c
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
@@ -0,0 +1,473 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.CheckHits;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.lucene.search.similarities.Similarity.SimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimWeight;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.SmallFloat;
+import org.apache.lucene.util.TestUtil;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+/**
+ * Abstract class to do basic tests for a similarity.
+ * NOTE: This test focuses on the similarity impl, nothing else.
+ * The [stretch] goal is for this test to be
+ * so thorough in testing a new Similarity that if this
+ * test passes, then all Lucene/Solr tests should also pass. Ie,
+ * if there is some bug in a given Similarity that this
+ * test fails to catch then this test needs to be improved! */
+public abstract class BaseSimilarityTestCase extends LuceneTestCase {
+
+ static LeafReader WITHOUT_NORM;
+ static Directory WITHOUT_NORM_DIR;
+
+ static LeafReader WITH_NORM_BASE;
+ static Directory WITH_NORM_DIR;
+ static List NORM_VALUES;
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ // without norms
+ WITHOUT_NORM_DIR = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), WITHOUT_NORM_DIR);
+ Document doc = new Document();
+ doc.add(newTextField("field", "value", Field.Store.NO));
+ writer.addDocument(doc);
+ WITHOUT_NORM = getOnlyLeafReader(writer.getReader());
+ writer.close();
+
+ // with norms
+ WITH_NORM_DIR = newDirectory();
+ writer = new RandomIndexWriter(random(), WITH_NORM_DIR);
+ doc = new Document();
+ FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
+ fieldType.setOmitNorms(true);
+ doc.add(newField("field", "value", fieldType));
+ writer.addDocument(doc);
+ WITH_NORM_BASE = getOnlyLeafReader(writer.getReader());
+ writer.close();
+
+ // all possible norm values for the doc
+ NORM_VALUES = new ArrayList<>();
+ NORM_VALUES.add(WITHOUT_NORM);
+ for (int i = 1; i < 256; i++) {
+ final long value = i;
+ NORM_VALUES.add(new FilterLeafReader(WITH_NORM_BASE) {
+ @Override
+ public CacheHelper getCoreCacheHelper() {
+ return null;
+ }
+
+ @Override
+ public CacheHelper getReaderCacheHelper() {
+ return null;
+ }
+
+ @Override
+ public NumericDocValues getNormValues(String field) throws IOException {
+ if (field.equals("field")) {
+ return new CannedNorm(value);
+ } else {
+ return super.getNormValues(field);
+ }
+ }
+ });
+ }
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ IOUtils.close(WITH_NORM_BASE, WITH_NORM_DIR, WITHOUT_NORM, WITHOUT_NORM_DIR);
+ WITH_NORM_BASE = WITHOUT_NORM = null;
+ WITH_NORM_DIR = WITHOUT_NORM_DIR = null;
+ NORM_VALUES = null;
+ }
+
+ /** 1-document norms impl of the given value */
+ static class CannedNorm extends NumericDocValues {
+ int docID = -1;
+ final long value;
+
+ CannedNorm(long value) {
+ this.value = value;
+ }
+
+ @Override
+ public long longValue() throws IOException {
+ return value;
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ assert target == 0;
+ docID = target;
+ return true;
+ }
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (docID == -1) {
+ return docID = 0;
+ } else {
+ return docID = NO_MORE_DOCS;
+ }
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target == 0) {
+ return docID = 0;
+ } else {
+ return docID = NO_MORE_DOCS;
+ }
+ }
+
+ @Override
+ public long cost() {
+ return 0;
+ }
+ }
+
+ /**
+ * Return a new similarity with all parameters randomized within valid ranges.
+ */
+ protected abstract Similarity getSimilarity(Random random);
+
+ static final long MAXDOC_FORTESTING = 1L << 48;
+ // must be at least MAXDOC_FORTESTING + Integer.MAX_VALUE
+ static final long MAXTOKENS_FORTESTING = 1L << 49;
+
+ /**
+ * returns a random corpus that is at least possible given
+ * the norm value for a single document.
+ */
+ static CollectionStatistics newCorpus(Random random, int norm) {
+ // lower bound of tokens in the collection (you produced this norm somehow)
+ final int lowerBound;
+ if (norm == 0) {
+ // norms are omitted, but there must have been at least one token to produce that norm
+ lowerBound = 1;
+ } else {
+ // minimum value that would decode to such a norm
+ lowerBound = SmallFloat.byte4ToInt((byte) norm);
+ }
+ final long maxDoc;
+ if (random.nextBoolean()) {
+ // small collection
+ maxDoc = TestUtil.nextLong(random, 1, 100000);
+ } else {
+ // yuge collection
+ maxDoc = TestUtil.nextLong(random, 1, MAXDOC_FORTESTING);
+ }
+ // TODO: make this a mandatory statistic, or test it with -1
+ final long docCount;
+ if (random.nextBoolean()) {
+ // sparse field
+ docCount = TestUtil.nextLong(random, 1, maxDoc);
+ } else {
+ // fully populated
+ docCount = maxDoc;
+ }
+ // random docsize: but can't require docs to have > 2B tokens
+ long upperBound;
+ try {
+ upperBound = Math.min(MAXTOKENS_FORTESTING, Math.multiplyExact(docCount, Integer.MAX_VALUE));
+ } catch (ArithmeticException overflow) {
+ upperBound = MAXTOKENS_FORTESTING;
+ }
+ // TODO: make this a mandatory statistic, or test it with -1
+ final long sumDocFreq;
+ if (random.nextBoolean()) {
+ // shortest possible docs
+ sumDocFreq = docCount;
+ } else {
+ // random docsize
+ sumDocFreq = TestUtil.nextLong(random, docCount, upperBound + 1 - lowerBound);
+ }
+ final long sumTotalTermFreq;
+ switch (random.nextInt(3)) {
+ case 0:
+ // unsupported (e.g. omitTF)
+ sumTotalTermFreq = -1;
+ break;
+ case 1:
+ // no repetition of terms (except to satisfy this norm)
+ sumTotalTermFreq = sumDocFreq - 1 + lowerBound;
+ break;
+ default:
+ // random repetition
+ assert sumDocFreq - 1 + lowerBound <= upperBound;
+ sumTotalTermFreq = TestUtil.nextLong(random, sumDocFreq - 1 + lowerBound, upperBound);
+ break;
+ }
+ return new CollectionStatistics("field", maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
+ }
+
+ private static final BytesRef TERM = new BytesRef("term");
+
+ /**
+ * returns new random term, that fits within the bounds of the corpus
+ */
+ static TermStatistics newTerm(Random random, CollectionStatistics corpus) {
+ final long docFreq;
+ if (random.nextBoolean()) {
+ // rare term
+ docFreq = 1;
+ } else {
+ // random specificity
+ docFreq = TestUtil.nextLong(random, 1, corpus.docCount());
+ }
+ final long totalTermFreq;
+ if (corpus.sumTotalTermFreq() == -1) {
+ // omitTF
+ totalTermFreq = -1;
+ } else if (random.nextBoolean()) {
+ // no repetition
+ totalTermFreq = docFreq;
+ } else {
+ // random repetition: but can't require docs to have > 2B tokens
+ long upperBound;
+ try {
+ upperBound = Math.min(corpus.sumTotalTermFreq(), Math.multiplyExact(docFreq, Integer.MAX_VALUE));
+ } catch (ArithmeticException overflow) {
+ upperBound = corpus.sumTotalTermFreq();
+ }
+ totalTermFreq = TestUtil.nextLong(random, docFreq, upperBound);
+ }
+ return new TermStatistics(TERM, docFreq, totalTermFreq);
+ }
+
+ /**
+ * Tests scoring across a bunch of random terms/corpora/frequencies for each possible document length.
+ * It does the following checks:
+ *
+ * - scores are non-negative and finite.
+ *
- score matches the explanation exactly.
+ *
- internal explanations calculations are sane (e.g. sum of: and so on actually compute sums)
+ *
- scores don't decrease as term frequencies increase: e.g. score(freq=N + 1) >= score(freq=N)
+ *
- scores don't decrease as documents get shorter, e.g. score(len=M) >= score(len=M+1)
+ *
- scores don't decrease as terms get rarer, e.g. score(term=N) >= score(term=N+1)
+ *
- scoring works for floating point frequencies (e.g. sloppy phrase and span queries will work)
+ *
- scoring works for reasonably large 64-bit statistic values (e.g. distributed search will work)
+ *
- scoring works for reasonably large boost values (0 .. Integer.MAX_VALUE, e.g. query boosts will work)
+ *
- scoring works for parameters randomized within valid ranges (see {@link #getSimilarity(Random)})
+ *
+ */
+ public void testRandomScoring() throws Exception {
+ Random random = random();
+ final int iterations = atLeast(10);
+ for (int i = 0; i < iterations; i++) {
+ // pull a new similarity to switch up parameters
+ Similarity similarity = getSimilarity(random);
+ for (int j = 0; j < 10; j++) {
+ // for each norm value...
+ for (int k = 0; k < NORM_VALUES.size(); k++) {
+ CollectionStatistics corpus = newCorpus(random, k);
+ for (int l = 0; l < 10; l++) {
+ TermStatistics term = newTerm(random, corpus);
+ final float freq;
+ if (term.totalTermFreq() == -1) {
+ // omit TF
+ freq = 1;
+ } else if (term.docFreq() == 1) {
+ // only one document, all the instances must be here.
+ freq = Math.toIntExact(term.totalTermFreq());
+ } else {
+ // there is at least one other document, and those must have at least 1 instance each.
+ int upperBound = Math.toIntExact(Math.min(term.totalTermFreq() - term.docFreq() + 1, Integer.MAX_VALUE));
+ if (random.nextBoolean()) {
+ freq = TestUtil.nextInt(random, 1, upperBound);
+ } else {
+ float freqCandidate = upperBound * random.nextFloat();
+ // we need to be 2nd float value at a minimum, the pairwise test will check MIN_VALUE in this case.
+ // this avoids testing frequencies of 0 which seem wrong to allow (we should enforce computeSlopFactor etc)
+ if (freqCandidate <= Float.MIN_VALUE) {
+ freqCandidate = Math.nextUp(Float.MIN_VALUE);
+ }
+ freq = freqCandidate;
+ }
+ }
+ // we just limit the test to "reasonable" boost values but don't enforce this anywhere.
+ // too big, and you are asking for overflow. that's hard for a sim to enforce (but definitely possible)
+ // for now, we just want to detect overflow where its a real bug/hazard in the computation with reasonable inputs.
+ final float boost;
+ switch (random.nextInt(5)) {
+ case 0:
+ // minimum value (not enforced)
+ boost = 0F;
+ break;
+ case 1:
+ // tiny value
+ boost = Float.MIN_VALUE;
+ break;
+ case 2:
+ // no-op value (sometimes treated special in explanations)
+ boost = 1F;
+ break;
+ case 3:
+ // maximum value (not enforceD)
+ boost = Integer.MAX_VALUE;
+ break;
+ default:
+ // random value
+ boost = random.nextFloat() * Integer.MAX_VALUE;
+ break;
+ }
+ doTestScoring(similarity, corpus, term, boost, freq, k);
+ }
+ }
+ }
+ }
+ }
+
+ /** runs for a single test case, so that if you hit a test failure you can write a reproducer just for that scenario */
+ private static void doTestScoring(Similarity similarity, CollectionStatistics corpus, TermStatistics term, float boost, float freq, int norm) throws IOException {
+ boolean success = false;
+ SimWeight weight = similarity.computeWeight(boost, corpus, term);
+ SimScorer scorer = similarity.simScorer(weight, NORM_VALUES.get(norm).getContext());
+ try {
+ float score = scorer.score(0, freq);
+ // check that score isn't infinite or negative
+ assertTrue("infinite/NaN score: " + score, Float.isFinite(score));
+ assertTrue("negative score: " + score, score >= 0);
+ // check explanation matches
+ Explanation explanation = scorer.explain(0, Explanation.match(freq, "freq, occurrences of term within document"));
+ if (score != explanation.getValue()) {
+ fail("expected: " + score + ", got: " + explanation);
+ }
+ CheckHits.verifyExplanation("", 0, score, true, explanation);
+
+ // check score(freq-1), given the same norm it should be <= score(freq) [scores non-decreasing for more term occurrences]
+ final float prevFreq;
+ if (random().nextBoolean() && freq == (int)freq && freq > 1 && term.docFreq() > 1) {
+ // previous in integer space
+ prevFreq = freq - 1;
+ } else {
+ // previous in float space (e.g. for sloppyPhrase)
+ prevFreq = Math.nextDown(freq);
+ }
+
+ float prevScore = scorer.score(0, prevFreq);
+ // check that score isn't infinite or negative
+ assertTrue(Float.isFinite(prevScore));
+ assertTrue(prevScore >= 0);
+ // check explanation matches
+ Explanation prevExplanation = scorer.explain(0, Explanation.match(prevFreq, "freq, occurrences of term within document"));
+ if (prevScore != prevExplanation.getValue()) {
+ fail("expected: " + prevScore + ", got: " + prevExplanation);
+ }
+ CheckHits.verifyExplanation("test query (prevFreq)", 0, prevScore, true, prevExplanation);
+
+ if (prevScore > score) {
+ System.out.println(prevExplanation);
+ System.out.println(explanation);
+ fail("score(" + prevFreq + ")=" + prevScore + " > score(" + freq + ")=" + score);
+ }
+
+ // check score(norm-1), given the same freq it should be >= score(norm) [scores non-decreasing as docs get shorter]
+ if (norm > 1) {
+ SimScorer prevNormScorer = similarity.simScorer(weight, NORM_VALUES.get(norm - 1).getContext());
+ float prevNormScore = prevNormScorer.score(0, freq);
+ // check that score isn't infinite or negative
+ assertTrue(Float.isFinite(prevNormScore));
+ assertTrue(prevNormScore >= 0);
+ // check explanation matches
+ Explanation prevNormExplanation = prevNormScorer.explain(0, Explanation.match(freq, "freq, occurrences of term within document"));
+ if (prevNormScore != prevNormExplanation.getValue()) {
+ fail("expected: " + prevNormScore + ", got: " + prevNormExplanation);
+ }
+ CheckHits.verifyExplanation("test query (prevNorm)", 0, prevNormScore, true, prevNormExplanation);
+
+ if (prevNormScore < score) {
+ System.out.println(prevNormExplanation);
+ System.out.println(explanation);
+ fail("score(" + freq + "," + (norm-1) + ")=" + prevNormScore + " < score(" + freq + "," + norm + ")=" + score);
+ }
+ }
+
+ // check score(term-1), given the same freq/norm it should be >= score(term) [scores non-decreasing as terms get rarer]
+ if (term.docFreq() > 1 && (term.totalTermFreq() == -1 || freq < term.totalTermFreq())) {
+ final long prevTotalTermFreq;
+ if (term.totalTermFreq() == -1) {
+ prevTotalTermFreq = -1;
+ } else {
+ prevTotalTermFreq = term.totalTermFreq() - 1;
+ }
+ TermStatistics prevTerm = new TermStatistics(term.term(), term.docFreq() - 1, prevTotalTermFreq);
+ SimWeight prevWeight = similarity.computeWeight(boost, corpus, term);
+ SimScorer prevTermScorer = similarity.simScorer(prevWeight, NORM_VALUES.get(norm).getContext());
+ float prevTermScore = prevTermScorer.score(0, freq);
+ // check that score isn't infinite or negative
+ assertTrue(Float.isFinite(prevTermScore));
+ assertTrue(prevTermScore >= 0);
+ // check explanation matches
+ Explanation prevTermExplanation = prevTermScorer.explain(0, Explanation.match(freq, "freq, occurrences of term within document"));
+ if (prevTermScore != prevTermExplanation.getValue()) {
+ fail("expected: " + prevTermScore + ", got: " + prevTermExplanation);
+ }
+ CheckHits.verifyExplanation("test query (prevTerm)", 0, prevTermScore, true, prevTermExplanation);
+
+ if (prevTermScore < score) {
+ System.out.println(prevTermExplanation);
+ System.out.println(explanation);
+ fail("score(" + freq + "," + (prevTerm) + ")=" + prevTermScore + " < score(" + freq + "," + term + ")=" + score);
+ }
+ }
+
+ success = true;
+ } finally {
+ if (!success) {
+ System.out.println(similarity);
+ System.out.println(corpus);
+ System.out.println(term);
+ if (norm == 0) {
+ System.out.println("norms=omitted");
+ } else {
+ System.out.println("norm=" + norm + " (doc length ~ " + SmallFloat.byte4ToInt((byte) norm) + ")");
+ }
+ System.out.println("freq=" + freq);
+ }
+ }
+ }
+}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
index d3351ab9f63..7f530177b9d 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
@@ -434,7 +434,7 @@ public final class TestUtil {
/** start and end are BOTH inclusive */
public static long nextLong(Random r, long start, long end) {
- assert end >= start;
+ assert end >= start : "start=" + start + ",end=" + end;
final BigInteger range = BigInteger.valueOf(end).add(BigInteger.valueOf(1)).subtract(BigInteger.valueOf(start));
if (range.compareTo(BigInteger.valueOf(Integer.MAX_VALUE)) <= 0) {
return start + r.nextInt(range.intValue());