From cf016f8987e804bcd858a2a414eacdf1b3c54cf5 Mon Sep 17 00:00:00 2001
From: javanna
* Parameters:
*
+ *
+ */
+ public LegacyBM25Similarity() {
+ this.bm25Similarity = new BM25Similarity();
+ }
+
+ /**
+ * BM25 with the supplied parameter values.
+ * @param k1 Controls non-linear term frequency normalization (saturation).
+ * @param b Controls to what degree document length normalizes tf values.
+ * @throws IllegalArgumentException if {@code k1} is infinite or negative, or if {@code b} is
+ * not within the range {@code [0..1]}
+ */
+ public LegacyBM25Similarity(float k1, float b) {
+ this.bm25Similarity = new BM25Similarity(k1, b);
+ }
+
+ @Override
+ public long computeNorm(FieldInvertState state) {
+ return bm25Similarity.computeNorm(state);
+ }
+
+ @Override
+ public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
+ return bm25Similarity.scorer(boost * (1 + bm25Similarity.getK1()), collectionStats, termStats);
+ }
+
+ /**
+ * Returns the k1
parameter
+ * @see #LegacyBM25Similarity(float, float)
+ */
+ public final float getK1() {
+ return bm25Similarity.getK1();
+ }
+
+ /**
+ * Returns the b
parameter
+ * @see #LegacyBM25Similarity(float, float)
+ */
+ public final float getB() {
+ return bm25Similarity.getB();
+ }
+
+ /** Sets whether overlap tokens (Tokens with 0 position increment) are
+ * ignored when computing norm. By default this is true, meaning overlap
+ * tokens do not count when computing norms. */
+ public void setDiscountOverlaps(boolean v) {
+ bm25Similarity.setDiscountOverlaps(v);
+ }
+
+ @Override
+ public String toString() {
+ return bm25Similarity.toString();
+ }
+}
diff --git a/lucene/misc/src/java/org/apache/lucene/search/similarity/package.html b/lucene/misc/src/java/org/apache/lucene/search/similarity/package.html
new file mode 100644
index 00000000000..7f624d41d54
--- /dev/null
+++ b/lucene/misc/src/java/org/apache/lucene/search/similarity/package.html
@@ -0,0 +1,22 @@
+
+
+
+
+Misc similarity implementations.
+
+
\ No newline at end of file
diff --git a/lucene/misc/src/test/org/apache/lucene/search/similarity/TestLegacyBM25Similarity.java b/lucene/misc/src/test/org/apache/lucene/search/similarity/TestLegacyBM25Similarity.java
new file mode 100644
index 00000000000..b3a0cd24ca1
--- /dev/null
+++ b/lucene/misc/src/test/org/apache/lucene/search/similarity/TestLegacyBM25Similarity.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.similarity;
+
+import java.util.Random;
+
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.BaseSimilarityTestCase;
+import org.apache.lucene.search.similarities.Similarity;
+
+public class TestLegacyBM25Similarity extends BaseSimilarityTestCase {
+
+ public void testIllegalK1() {
+ IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+ new LegacyBM25Similarity(Float.POSITIVE_INFINITY, 0.75f);
+ });
+ assertTrue(expected.getMessage().contains("illegal k1 value"));
+
+ expected = expectThrows(IllegalArgumentException.class, () -> {
+ new LegacyBM25Similarity(-1, 0.75f);
+ });
+ assertTrue(expected.getMessage().contains("illegal k1 value"));
+
+ expected = expectThrows(IllegalArgumentException.class, () -> {
+ new LegacyBM25Similarity(Float.NaN, 0.75f);
+ });
+ assertTrue(expected.getMessage().contains("illegal k1 value"));
+ }
+
+ public void testIllegalB() {
+ IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+ new LegacyBM25Similarity(1.2f, 2f);
+ });
+ assertTrue(expected.getMessage().contains("illegal b value"));
+
+ expected = expectThrows(IllegalArgumentException.class, () -> {
+ new LegacyBM25Similarity(1.2f, -1f);
+ });
+ assertTrue(expected.getMessage().contains("illegal b value"));
+
+ expected = expectThrows(IllegalArgumentException.class, () -> {
+ new LegacyBM25Similarity(1.2f, Float.POSITIVE_INFINITY);
+ });
+ assertTrue(expected.getMessage().contains("illegal b value"));
+
+ expected = expectThrows(IllegalArgumentException.class, () -> {
+ new LegacyBM25Similarity(1.2f, Float.NaN);
+ });
+ assertTrue(expected.getMessage().contains("illegal b value"));
+ }
+
+ public void testDefaults() {
+ LegacyBM25Similarity legacyBM25Similarity = new LegacyBM25Similarity();
+ BM25Similarity bm25Similarity = new BM25Similarity();
+ assertEquals(bm25Similarity.getB(), legacyBM25Similarity.getB(), 0f);
+ assertEquals(bm25Similarity.getK1(), legacyBM25Similarity.getK1(), 0f);
+ }
+
+ public void testToString() {
+ LegacyBM25Similarity legacyBM25Similarity = new LegacyBM25Similarity();
+ BM25Similarity bm25Similarity = new BM25Similarity();
+ assertEquals(bm25Similarity.toString(), legacyBM25Similarity.toString());
+ }
+
+ @Override
+ protected Similarity getSimilarity(Random random) {
+ return new LegacyBM25Similarity(randomK1(random), randomB(random));
+ }
+
+ private static float randomK1(Random random) {
+ // term frequency normalization parameter k1
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ return 0;
+ case 1:
+ // tiny value
+ return Float.MIN_VALUE;
+ case 2:
+ // maximum value
+ // upper bounds on individual term's score is 43.262806 * (k1 + 1) * boost
+ // we just limit the test to "reasonable" k1 values but don't enforce this anywhere.
+ return Integer.MAX_VALUE;
+ default:
+ // random value
+ return Integer.MAX_VALUE * random.nextFloat();
+ }
+ }
+
+ private static float randomB(Random random) {
+ // length normalization parameter b [0 .. 1]
+ switch (random.nextInt(4)) {
+ case 0:
+ // minimum value
+ return 0;
+ case 1:
+ // tiny value
+ return Float.MIN_VALUE;
+ case 2:
+ // maximum value
+ return 1;
+ default:
+ // random value
+ return random.nextFloat();
+ }
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java b/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java
index ef8ffbd2654..fd8a48cb7d6 100644
--- a/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java
+++ b/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java
@@ -16,13 +16,13 @@
*/
package org.apache.solr.search.similarities;
-import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarity.LegacyBM25Similarity;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;
/**
- * Factory for {@link BM25Similarity}
+ * Factory for {@link LegacyBM25Similarity}
*
@@ -35,7 +35,7 @@ import org.apache.solr.schema.SimilarityFactory;
* Optional settings:
*
*
* @lucene.experimental
*/
@@ -54,7 +54,7 @@ public class BM25SimilarityFactory extends SimilarityFactory {
@Override
public Similarity getSimilarity() {
- BM25Similarity sim = new BM25Similarity(k1, b);
+ LegacyBM25Similarity sim = new LegacyBM25Similarity(k1, b);
sim.setDiscountOverlaps(discountOverlaps);
return sim;
}
diff --git a/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java b/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java
index 378197c8e4a..6c3dedfb398 100644
--- a/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java
+++ b/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java
@@ -16,10 +16,10 @@
*/
package org.apache.solr.search.similarities;
-import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarity.LegacyBM25Similarity;
import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
@@ -40,7 +40,7 @@ import org.apache.solr.util.plugin.SolrCoreAware;
*
luceneMatchVersion < 6.0
= {@link ClassicSimilarity}luceneMatchVersion >= 6.0
= {@link BM25Similarity}luceneMatchVersion >= 6.0
= {@link LegacyBM25Similarity}
* The defaultSimFromFieldType
option accepts the name of any fieldtype, and uses
@@ -109,7 +109,7 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
Similarity defaultSim = null;
if (null == defaultSimFromFieldType) {
// nothing configured, choose a sensible implicit default...
- defaultSim = new BM25Similarity();
+ defaultSim = new LegacyBM25Similarity();
} else {
FieldType defSimFT = core.getLatestSchema().getFieldTypeByName(defaultSimFromFieldType);
if (null == defSimFT) {
diff --git a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java
index 5d1dab1982b..9a72043913e 100644
--- a/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java
+++ b/solr/core/src/test/org/apache/solr/rest/schema/TestBulkSchemaAPI.java
@@ -16,30 +16,6 @@
*/
package org.apache.solr.rest.schema;
-import org.apache.commons.io.FileUtils;
-
-import org.apache.lucene.search.similarities.DFISimilarity;
-import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
-import org.apache.lucene.search.similarities.BM25Similarity;
-import org.apache.lucene.misc.SweetSpotSimilarity;
-import org.apache.lucene.search.similarities.Similarity;
-
-import org.apache.solr.common.SolrDocumentList;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.core.CoreContainer;
-import org.apache.solr.schema.SimilarityFactory;
-import org.apache.solr.search.similarities.SchemaSimilarityFactory;
-import org.apache.solr.util.RESTfulServerProvider;
-import org.apache.solr.util.RestTestBase;
-import org.apache.solr.util.RestTestHarness;
-
-import org.junit.After;
-import org.junit.Before;
-import org.noggit.JSONParser;
-import org.noggit.ObjectBuilder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import java.io.File;
import java.io.StringReader;
import java.lang.invoke.MethodHandles;
@@ -51,6 +27,27 @@ import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
+import org.apache.commons.io.FileUtils;
+import org.apache.lucene.misc.SweetSpotSimilarity;
+import org.apache.lucene.search.similarities.DFISimilarity;
+import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
+import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarity.LegacyBM25Similarity;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.SimilarityFactory;
+import org.apache.solr.search.similarities.SchemaSimilarityFactory;
+import org.apache.solr.util.RESTfulServerProvider;
+import org.apache.solr.util.RestTestBase;
+import org.apache.solr.util.RestTestHarness;
+import org.junit.After;
+import org.junit.Before;
+import org.noggit.JSONParser;
+import org.noggit.ObjectBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
public class TestBulkSchemaAPI extends RestTestBase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@@ -591,7 +588,7 @@ public class TestBulkSchemaAPI extends RestTestBase {
assertNotNull("field a5 not created", m);
assertEquals("myWhitespaceTxtField", m.get("type"));
assertNull(m.get("uninvertible")); // inherited, but API shouldn't return w/o explicit showDefaults
- assertFieldSimilarity("a5", BM25Similarity.class); // unspecified, expect default
+ assertFieldSimilarity("a5", LegacyBM25Similarity.class); // unspecified, expect default
m = getObj(harness, "wdf_nocase", "fields");
assertNull("field 'wdf_nocase' not deleted", m);
@@ -933,7 +930,7 @@ public class TestBulkSchemaAPI extends RestTestBase {
Map fields = getObj(harness, fieldName, "fields");
assertNotNull("field " + fieldName + " not created", fields);
- assertFieldSimilarity(fieldName, BM25Similarity.class,
+ assertFieldSimilarity(fieldName, LegacyBM25Similarity.class,
sim -> assertEquals("Unexpected k1", k1, sim.getK1(), .001),
sim -> assertEquals("Unexpected b", b, sim.getB(), .001));
diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java b/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java
index 3f6deacec42..6445b3469f0 100644
--- a/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java
+++ b/solr/core/src/test/org/apache/solr/search/similarities/TestBM25SimilarityFactory.java
@@ -16,8 +16,8 @@
*/
package org.apache.solr.search.similarities;
-import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarity.LegacyBM25Similarity;
import org.junit.BeforeClass;
/**
@@ -31,14 +31,14 @@ public class TestBM25SimilarityFactory extends BaseSimilarityTestCase {
/** bm25 with default parameters */
public void test() throws Exception {
- assertEquals(BM25Similarity.class, getSimilarity("text").getClass());
+ assertEquals(LegacyBM25Similarity.class, getSimilarity("text").getClass());
}
/** bm25 with parameters */
public void testParameters() throws Exception {
Similarity sim = getSimilarity("text_params");
- assertEquals(BM25Similarity.class, sim.getClass());
- BM25Similarity bm25 = (BM25Similarity) sim;
+ assertEquals(LegacyBM25Similarity.class, sim.getClass());
+ LegacyBM25Similarity bm25 = (LegacyBM25Similarity) sim;
assertEquals(1.2f, bm25.getK1(), 0.01f);
assertEquals(0.76f, bm25.getB(), 0.01f);
}
diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java b/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java
index 7460652366b..9fe33b7638e 100644
--- a/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java
+++ b/solr/core/src/test/org/apache/solr/search/similarities/TestNonDefinedSimilarityFactory.java
@@ -16,7 +16,7 @@
*/
package org.apache.solr.search.similarities;
-import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarity.LegacyBM25Similarity;
import org.junit.After;
/**
@@ -36,7 +36,7 @@ public class TestNonDefinedSimilarityFactory extends BaseSimilarityTestCase {
public void testCurrentBM25() throws Exception {
// no sys prop set, rely on LATEST
initCore("solrconfig-basic.xml","schema-tiny.xml");
- BM25Similarity sim = getSimilarity("text", BM25Similarity.class);
+ LegacyBM25Similarity sim = getSimilarity("text", LegacyBM25Similarity.class);
assertEquals(0.75F, sim.getB(), 0.0F);
}
}
diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java b/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java
index 58fe6eff2f0..a27837bb8a3 100644
--- a/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java
+++ b/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarity.java
@@ -17,8 +17,8 @@
package org.apache.solr.search.similarities;
import org.apache.lucene.misc.SweetSpotSimilarity;
-import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarity.LegacyBM25Similarity;
import org.junit.BeforeClass;
/**
@@ -58,18 +58,18 @@ public class TestPerFieldSimilarity extends BaseSimilarityTestCase {
/** test a field where no similarity is specified */
public void testDefaults() throws Exception {
Similarity sim = getSimilarity("sim3text");
- assertEquals(BM25Similarity.class, sim.getClass());;
+ assertEquals(LegacyBM25Similarity.class, sim.getClass());;
}
/** ... and for a dynamic field */
public void testDefaultsDynamic() throws Exception {
Similarity sim = getSimilarity("text_sim3");
- assertEquals(BM25Similarity.class, sim.getClass());
+ assertEquals(LegacyBM25Similarity.class, sim.getClass());
}
/** test a field that does not exist */
public void testNonexistent() throws Exception {
Similarity sim = getSimilarity("sdfdsfdsfdswr5fsdfdsfdsfs");
- assertEquals(BM25Similarity.class, sim.getClass());
+ assertEquals(LegacyBM25Similarity.class, sim.getClass());
}
}