mirror of https://github.com/apache/lucene.git
SOLR-13025: SchemaSimilarityFactory fallback to LegacyBM25Similarity for luceneMatchVersion < 8.0
This commit is contained in:
parent
bcdc6dadbb
commit
5affe7421f
|
@ -59,6 +59,13 @@ Upgrade Notes
|
|||
* SOLR-12754: The UnifiedHighlighter hl.weightMatches now defaults to true. If there are unforseen highlight problems,
|
||||
this may be the culprit.
|
||||
|
||||
* If you explicitly use BM25SimilarityFactory in your schema, the absolute scoring will be lower due to SOLR-13025.
|
||||
But ordering of documents will not change in the normal case. Use LegacyBM25SimilarityFactory if you need to force
|
||||
the old 6.x/7.x scoring. Note that if you have not specified any similarity in schema or use the default
|
||||
SchemaSimilarityFactory, then LegacyBM25Similarity is automatically selected for 'luceneMatchVersion' < 8.0.0.
|
||||
See also explanation in Reference Guide chapter "Other Schema Elements".
|
||||
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
|
||||
|
@ -94,6 +101,10 @@ Optimizations
|
|||
|
||||
* SOLR-12725: ParseDateFieldUpdateProcessorFactory should reuse ParsePosition. (ab)
|
||||
|
||||
* SOLR-13025: Due to LUCENE-8563, the BM25Similarity formula no longer includes the (k1+1) factor in the numerator
|
||||
This gives a lower absolute score but doesn't affect ordering, as this is a constant factor which is the same
|
||||
for every document. Use LegacyBM25SimilarityFactory if you need the old 6.x/7.x scoring. See also upgrade notes (janhoy)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ public class TestExternalFeatures extends TestRerankBase {
|
|||
query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3 efi.userTitlePhrase1=w4 efi.userTitlePhrase2=w5}");
|
||||
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.7693934");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.34972426");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0");
|
||||
|
||||
|
@ -80,7 +80,7 @@ public class TestExternalFeatures extends TestRerankBase {
|
|||
query.add("fl", "*,score,[fv efi.user_query=w2 efi.userTitlePhrase1=w4 efi.userTitlePhrase2=w5]");
|
||||
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.7693934");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.34972426");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0");
|
||||
}
|
||||
|
|
|
@ -16,13 +16,15 @@
|
|||
*/
|
||||
package org.apache.solr.search.similarities;
|
||||
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.schema.SimilarityFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link LegacyBM25Similarity}
|
||||
* Factory for BM25Similarity. This is the default similarity since 8.x.
|
||||
* If you need the exact same formula as in 6.x and 7.x you should instead look at
|
||||
* {@link LegacyBM25SimilarityFactory}
|
||||
* <p>
|
||||
* Parameters:
|
||||
* <ul>
|
||||
|
@ -35,9 +37,10 @@ import org.apache.solr.schema.SimilarityFactory;
|
|||
* Optional settings:
|
||||
* <ul>
|
||||
* <li>discountOverlaps (bool): Sets
|
||||
* {@link LegacyBM25Similarity#setDiscountOverlaps(boolean)}</li>
|
||||
* {@link BM25Similarity#setDiscountOverlaps(boolean)}</li>
|
||||
* </ul>
|
||||
* @lucene.experimental
|
||||
* @since 8.0.0
|
||||
*/
|
||||
public class BM25SimilarityFactory extends SimilarityFactory {
|
||||
private boolean discountOverlaps;
|
||||
|
@ -54,7 +57,7 @@ public class BM25SimilarityFactory extends SimilarityFactory {
|
|||
|
||||
@Override
|
||||
public Similarity getSimilarity() {
|
||||
LegacyBM25Similarity sim = new LegacyBM25Similarity(k1, b);
|
||||
BM25Similarity sim = new BM25Similarity(k1, b);
|
||||
sim.setDiscountOverlaps(discountOverlaps);
|
||||
return sim;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.search.similarities;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.schema.SimilarityFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link LegacyBM25Similarity}.
|
||||
* Use this to force explicit creation of the BM25 formula that was used by BM25Similarity before Solr/Lucene 8.0.0.
|
||||
* Note that {@link SchemaSimilarityFactory} will automatically create an instance of LegacyBM25Similarity if luceneMatchVersion is < 8.0.0
|
||||
* <p>
|
||||
* Parameters:
|
||||
* <ul>
|
||||
* <li>k1 (float): Controls non-linear term frequency normalization (saturation).
|
||||
* The default is <code>1.2</code>
|
||||
* <li>b (float): Controls to what degree document length normalizes tf values.
|
||||
* The default is <code>0.75</code>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Optional settings:
|
||||
* <ul>
|
||||
* <li>discountOverlaps (bool): Sets
|
||||
* {@link LegacyBM25Similarity#setDiscountOverlaps(boolean)}</li>
|
||||
* </ul>
|
||||
* @lucene.experimental
|
||||
* @since 8.0.0
|
||||
*/
|
||||
public class LegacyBM25SimilarityFactory extends SimilarityFactory {
|
||||
private boolean discountOverlaps;
|
||||
private float k1;
|
||||
private float b;
|
||||
|
||||
@Override
|
||||
public void init(SolrParams params) {
|
||||
super.init(params);
|
||||
discountOverlaps = params.getBool("discountOverlaps", true);
|
||||
k1 = params.getFloat("k1", 1.2f);
|
||||
b = params.getFloat("b", 0.75f);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity getSimilarity() {
|
||||
LegacyBM25Similarity sim = new LegacyBM25Similarity(k1, b);
|
||||
sim.setDiscountOverlaps(discountOverlaps);
|
||||
return sim;
|
||||
}
|
||||
}
|
|
@ -16,7 +16,7 @@
|
|||
*/
|
||||
package org.apache.solr.search.similarities;
|
||||
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
|
||||
|
@ -39,8 +39,8 @@ import org.apache.solr.util.plugin.SolrCoreAware;
|
|||
* matching configured:
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li><code>luceneMatchVersion < 6.0</code> = {@link ClassicSimilarity}</li>
|
||||
* <li><code>luceneMatchVersion >= 6.0</code> = {@link LegacyBM25Similarity}</li>
|
||||
* <li><code>luceneMatchVersion < 8.0</code> = {@link LegacyBM25Similarity}</li>
|
||||
* <li><code>luceneMatchVersion >= 8.0</code> = {@link BM25Similarity}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The <code>defaultSimFromFieldType</code> option accepts the name of any fieldtype, and uses
|
||||
|
@ -85,10 +85,12 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
|
|||
|
||||
private volatile SolrCore core; // set by inform(SolrCore)
|
||||
private volatile Similarity similarity; // lazy instantiated
|
||||
private Version coreVersion = Version.LATEST;
|
||||
|
||||
@Override
|
||||
public void inform(SolrCore core) {
|
||||
this.core = core;
|
||||
this.coreVersion = this.core.getSolrConfig().luceneMatchVersion;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -109,7 +111,9 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
|
|||
Similarity defaultSim = null;
|
||||
if (null == defaultSimFromFieldType) {
|
||||
// nothing configured, choose a sensible implicit default...
|
||||
defaultSim = new LegacyBM25Similarity();
|
||||
defaultSim = coreVersion.onOrAfter(Version.LUCENE_8_0_0) ?
|
||||
new BM25Similarity() :
|
||||
new LegacyBM25Similarity();
|
||||
} else {
|
||||
FieldType defSimFT = core.getLatestSchema().getFieldTypeByName(defaultSimFromFieldType);
|
||||
if (null == defSimFT) {
|
||||
|
|
|
@ -36,10 +36,27 @@
|
|||
</similarity>
|
||||
</fieldType>
|
||||
|
||||
<!-- legacybm25 with default parameters -->
|
||||
<fieldType name="legacy_text" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||
<similarity class="solr.LegacyBM25SimilarityFactory"/>
|
||||
</fieldType>
|
||||
|
||||
<!-- legacybm25 with parameters -->
|
||||
<fieldType name="legacy_text_params" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||
<similarity class="solr.LegacyBM25SimilarityFactory">
|
||||
<float name="k1">1.2</float>
|
||||
<float name="b">0.76</float>
|
||||
</similarity>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||
<field name="text" type="text" indexed="true" stored="false"/>
|
||||
<field name="text_params" type="text_params" indexed="true" stored="false"/>
|
||||
<field name="legacy_text" type="legacy_text" indexed="true" stored="false"/>
|
||||
<field name="legacy_text_params" type="legacy_text_params" indexed="true" stored="false"/>
|
||||
|
||||
<uniqueKey>id</uniqueKey>
|
||||
|
||||
|
|
|
@ -29,10 +29,10 @@ import java.util.function.Consumer;
|
|||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.lucene.misc.SweetSpotSimilarity;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.DFISimilarity;
|
||||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
@ -588,7 +588,7 @@ public class TestBulkSchemaAPI extends RestTestBase {
|
|||
assertNotNull("field a5 not created", m);
|
||||
assertEquals("myWhitespaceTxtField", m.get("type"));
|
||||
assertNull(m.get("uninvertible")); // inherited, but API shouldn't return w/o explicit showDefaults
|
||||
assertFieldSimilarity("a5", LegacyBM25Similarity.class); // unspecified, expect default
|
||||
assertFieldSimilarity("a5", BM25Similarity.class); // unspecified, expect default
|
||||
|
||||
m = getObj(harness, "wdf_nocase", "fields");
|
||||
assertNull("field 'wdf_nocase' not deleted", m);
|
||||
|
@ -930,7 +930,7 @@ public class TestBulkSchemaAPI extends RestTestBase {
|
|||
Map fields = getObj(harness, fieldName, "fields");
|
||||
assertNotNull("field " + fieldName + " not created", fields);
|
||||
|
||||
assertFieldSimilarity(fieldName, LegacyBM25Similarity.class,
|
||||
assertFieldSimilarity(fieldName, BM25Similarity.class,
|
||||
sim -> assertEquals("Unexpected k1", k1, sim.getK1(), .001),
|
||||
sim -> assertEquals("Unexpected b", b, sim.getB(), .001));
|
||||
|
||||
|
|
|
@ -57,6 +57,6 @@ public class TestPayloadScoreQParserPlugin extends SolrTestCaseJ4 {
|
|||
|
||||
// TODO: fix this includeSpanScore test to be less brittle - score result is score of "A" (via BM25) multipled by 1.0 (payload value)
|
||||
assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min}"), "//float[@name='score']='1.0'");
|
||||
assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min includeSpanScore=true}"), "//float[@name='score']='0.2876821'");
|
||||
assertQ(req("fl","*,score", "q", "{!payload_score f=vals_dpf v=A func=min includeSpanScore=true}"), "//float[@name='score']='0.13076457'");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -389,16 +389,16 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
|
|||
// superman has a higher df (thus lower idf) in one segment, but reversed in the complete index
|
||||
String q ="{!func}query($qq)";
|
||||
String fq="id:120";
|
||||
assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'1.0'");
|
||||
assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'1.0'");
|
||||
assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'0.6'");
|
||||
assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'0.6'");
|
||||
|
||||
// test weighting through a function range query
|
||||
assertQ(req("fl","*,score", "fq",fq, "q", "{!frange l=1 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']");
|
||||
assertQ(req("fl","*,score", "fq",fq, "q", "{!frange l=0.6 u=10}query($qq)", "qq","text:superman"), "//*[@numFound='1']");
|
||||
|
||||
// test weighting through a complex function
|
||||
q ="{!func}sub(div(sum(0.0,product(1,query($qq))),1),0)";
|
||||
assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'1.0'");
|
||||
assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'1.0'");
|
||||
assertQ(req("fl","*,score","q", q, "qq","text:batman", "fq",fq), "//float[@name='score']<'0.6'");
|
||||
assertQ(req("fl","*,score","q", q, "qq","text:superman", "fq",fq), "//float[@name='score']>'0.6'");
|
||||
|
||||
|
||||
// test full param dereferencing
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
*/
|
||||
package org.apache.solr.search.similarities;
|
||||
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
|
@ -31,14 +31,14 @@ public class TestBM25SimilarityFactory extends BaseSimilarityTestCase {
|
|||
|
||||
/** bm25 with default parameters */
|
||||
public void test() throws Exception {
|
||||
assertEquals(LegacyBM25Similarity.class, getSimilarity("text").getClass());
|
||||
assertEquals(BM25Similarity.class, getSimilarity("text").getClass());
|
||||
}
|
||||
|
||||
/** bm25 with parameters */
|
||||
public void testParameters() throws Exception {
|
||||
Similarity sim = getSimilarity("text_params");
|
||||
assertEquals(LegacyBM25Similarity.class, sim.getClass());
|
||||
LegacyBM25Similarity bm25 = (LegacyBM25Similarity) sim;
|
||||
assertEquals(BM25Similarity.class, sim.getClass());
|
||||
BM25Similarity bm25 = (BM25Similarity) sim;
|
||||
assertEquals(1.2f, bm25.getK1(), 0.01f);
|
||||
assertEquals(0.76f, bm25.getB(), 0.01f);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.search.similarities;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
* Tests {@link LegacyBM25SimilarityFactory}
|
||||
*/
|
||||
public class TestLegacyBM25SimilarityFactory extends BaseSimilarityTestCase {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-basic.xml","schema-bm25.xml");
|
||||
}
|
||||
|
||||
/** bm25 with default parameters */
|
||||
public void test() throws Exception {
|
||||
assertEquals(LegacyBM25Similarity.class, getSimilarity("legacy_text").getClass());
|
||||
}
|
||||
|
||||
/** bm25 with parameters */
|
||||
public void testParameters() throws Exception {
|
||||
Similarity sim = getSimilarity("legacy_text_params");
|
||||
assertEquals(LegacyBM25Similarity.class, sim.getClass());
|
||||
LegacyBM25Similarity bm25 = (LegacyBM25Similarity) sim;
|
||||
assertEquals(1.2f, bm25.getK1(), 0.01f);
|
||||
assertEquals(0.76f, bm25.getB(), 0.01f);
|
||||
}
|
||||
}
|
|
@ -16,15 +16,19 @@
|
|||
*/
|
||||
package org.apache.solr.search.similarities;
|
||||
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.junit.After;
|
||||
|
||||
/**
|
||||
* Verifies that the default behavior of the implicit {@link ClassicSimilarityFactory}
|
||||
* Verifies that the default behavior of the implicit {@link BM25Similarity}
|
||||
* (ie: no similarity configured in schema.xml at all) is consistent with
|
||||
* expectations based on the luceneMatchVersion
|
||||
* @see <a href="https://issues.apache.org/jira/browse/SOLR-5561">SOLR-5561</a>
|
||||
* @see <a href="https://issues.apache.org/jira/browse/SOLR-8057">SOLR-8057</a>
|
||||
* @see <a href="https://issues.apache.org/jira/browse/SOLR-13025">SOLR-13025</a>
|
||||
* @see <a href="https://issues.apache.org/jira/browse/LUCENE-8563">LUCENE-8563</a>
|
||||
*/
|
||||
public class TestNonDefinedSimilarityFactory extends BaseSimilarityTestCase {
|
||||
|
||||
|
@ -33,10 +37,30 @@ public class TestNonDefinedSimilarityFactory extends BaseSimilarityTestCase {
|
|||
deleteCore();
|
||||
}
|
||||
|
||||
public void testCurrentBM25() throws Exception {
|
||||
public void testCurrentBM25FromV8() throws Exception {
|
||||
// no sys prop set, rely on LATEST
|
||||
initCore("solrconfig-basic.xml","schema-tiny.xml");
|
||||
LegacyBM25Similarity sim = getSimilarity("text", LegacyBM25Similarity.class);
|
||||
BM25Similarity sim = getSimilarity("text", BM25Similarity.class);
|
||||
assertEquals(0.75F, sim.getB(), 0.0F);
|
||||
}
|
||||
|
||||
public void testLegacyBM25BeforeV8() throws Exception {
|
||||
System.setProperty("tests.luceneMatchVersion", Version.LUCENE_7_0_0.toString());
|
||||
initCore("solrconfig-basic.xml","schema-tiny.xml");
|
||||
System.clearProperty("tests.luceneMatchVersion");
|
||||
LegacyBM25Similarity sim = getSimilarity("text", LegacyBM25Similarity.class);
|
||||
assertEquals(0.75F, sim.getB(), 0.0F);
|
||||
deleteCore();
|
||||
|
||||
System.setProperty("tests.luceneMatchVersion", "5.0.0");
|
||||
initCore("solrconfig-basic.xml","schema-tiny.xml");
|
||||
System.clearProperty("tests.luceneMatchVersion");
|
||||
getSimilarity("text", LegacyBM25Similarity.class);
|
||||
deleteCore();
|
||||
|
||||
System.setProperty("tests.luceneMatchVersion", "6.0.0");
|
||||
initCore("solrconfig-basic.xml","schema-tiny.xml");
|
||||
System.clearProperty("tests.luceneMatchVersion");
|
||||
getSimilarity("text", LegacyBM25Similarity.class);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
package org.apache.solr.search.similarities;
|
||||
|
||||
import org.apache.lucene.misc.SweetSpotSimilarity;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
|
@ -58,18 +58,18 @@ public class TestPerFieldSimilarity extends BaseSimilarityTestCase {
|
|||
/** test a field where no similarity is specified */
|
||||
public void testDefaults() throws Exception {
|
||||
Similarity sim = getSimilarity("sim3text");
|
||||
assertEquals(LegacyBM25Similarity.class, sim.getClass());;
|
||||
assertEquals(BM25Similarity.class, sim.getClass());;
|
||||
}
|
||||
|
||||
/** ... and for a dynamic field */
|
||||
public void testDefaultsDynamic() throws Exception {
|
||||
Similarity sim = getSimilarity("text_sim3");
|
||||
assertEquals(LegacyBM25Similarity.class, sim.getClass());
|
||||
assertEquals(BM25Similarity.class, sim.getClass());
|
||||
}
|
||||
|
||||
/** test a field that does not exist */
|
||||
public void testNonexistent() throws Exception {
|
||||
Similarity sim = getSimilarity("sdfdsfdsfdswr5fsdfdsfdsfs");
|
||||
assertEquals(LegacyBM25Similarity.class, sim.getClass());
|
||||
assertEquals(BM25Similarity.class, sim.getClass());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,3 +23,9 @@
|
|||
// *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
|
||||
|
||||
* Due to LIR redesign in SOLR-11702, all users must be on Solr 7.3 or higher to upgrade to Solr 8
|
||||
|
||||
* If you explicitly use BM25SimilarityFactory in your schema the absolute scoring will be lower, see SOLR-13025.
|
||||
But ordering of documents will not change in the normal case. Use LegacyBM25SimilarityFactory if you need to force
|
||||
the old 6.x/7.x scoring. Note that if you have not specified any similarity in schema or use the default
|
||||
SchemaSimilarityFactory, then LegacyBM25Similarity is automatically selected for 'luceneMatchVersion' < 8.0.0.
|
||||
See also explanation in Reference Guide chapter "Other Schema Elements".
|
||||
|
|
|
@ -90,6 +90,6 @@ In most cases, specifying global level similarity like this will cause an error
|
|||
|
||||
In the example above `IBSimilarityFactory` (using the Information-Based model) will be used for any fields of type `text_ib`, while `DFRSimilarityFactory` (divergence from random) will be used for any fields of type `text_dfr`, as well as any fields using a type that does not explicitly specify a `<similarity/>`.
|
||||
|
||||
If `SchemaSimilarityFactory` is explicitly declared without configuring a `defaultSimFromFieldType`, then `BM25Similarity` is implicitly used as the default.
|
||||
If `SchemaSimilarityFactory` is explicitly declared without configuring a `defaultSimFromFieldType`, then `BM25Similarity` is implicitly used as the default for `luceneMatchVersion >= 8.0.0` and otherwise `LegacyBM25Similarity` is used to mimic the same BM25 formula that was the default in those versions.
|
||||
|
||||
In addition to the various factories mentioned on this page, there are several other similarity implementations that can be used such as the `SweetSpotSimilarityFactory`, `ClassicSimilarityFactory`, etc. For details, see the Solr Javadocs for the {solr-javadocs}/solr-core/org/apache/solr/schema/SimilarityFactory.html[similarity factories].
|
||||
In addition to the various factories mentioned on this page, there are several other similarity implementations that can be used such as the `SweetSpotSimilarityFactory`, `ClassicSimilarityFactory`, `LegacyBM25SimilarityFactory` etc. For details, see the Solr Javadocs for the {solr-javadocs}/solr-core/org/apache/solr/schema/SimilarityFactory.html[similarity factories].
|
||||
|
|
Loading…
Reference in New Issue