From 5affe7421f3f927e1fcfb7c7cb763179ce686997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B8ydahl?= Date: Wed, 12 Dec 2018 09:40:57 +0100 Subject: [PATCH] SOLR-13025: SchemaSimilarityFactory fallback to LegacyBM25Similarity for luceneMatchVersion < 8.0 --- solr/CHANGES.txt | 11 ++++ .../ltr/feature/TestExternalFeatures.java | 4 +- .../similarities/BM25SimilarityFactory.java | 11 ++-- .../LegacyBM25SimilarityFactory.java | 64 +++++++++++++++++++ .../similarities/SchemaSimilarityFactory.java | 12 ++-- .../solr/collection1/conf/schema-bm25.xml | 17 +++++ .../solr/rest/schema/TestBulkSchemaAPI.java | 6 +- .../search/TestPayloadScoreQParserPlugin.java | 2 +- .../search/function/TestFunctionQuery.java | 10 +-- .../TestBM25SimilarityFactory.java | 8 +-- .../TestLegacyBM25SimilarityFactory.java | 45 +++++++++++++ .../TestNonDefinedSimilarityFactory.java | 30 ++++++++- .../similarities/TestPerFieldSimilarity.java | 8 +-- .../src/major-changes-in-solr-8.adoc | 6 ++ .../src/other-schema-elements.adoc | 4 +- 15 files changed, 206 insertions(+), 32 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/search/similarities/LegacyBM25SimilarityFactory.java create mode 100644 solr/core/src/test/org/apache/solr/search/similarities/TestLegacyBM25SimilarityFactory.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index c7ad02a3382..9ab80f4cf91 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -59,6 +59,13 @@ Upgrade Notes * SOLR-12754: The UnifiedHighlighter hl.weightMatches now defaults to true. If there are unforseen highlight problems, this may be the culprit. +* If you explicitly use BM25SimilarityFactory in your schema, the absolute scoring will be lower due to SOLR-13025. + But ordering of documents will not change in the normal case. Use LegacyBM25SimilarityFactory if you need to force + the old 6.x/7.x scoring. Note that if you have not specified any similarity in schema or use the default + SchemaSimilarityFactory, then LegacyBM25Similarity is automatically selected for 'luceneMatchVersion' < 8.0.0. + See also explanation in Reference Guide chapter "Other Schema Elements". + + New Features ---------------------- @@ -94,6 +101,10 @@ Optimizations * SOLR-12725: ParseDateFieldUpdateProcessorFactory should reuse ParsePosition. (ab) +* SOLR-13025: Due to LUCENE-8563, the BM25Similarity formula no longer includes the (k1+1) factor in the numerator + This gives a lower absolute score but doesn't affect ordering, as this is a constant factor which is the same + for every document. Use LegacyBM25SimilarityFactory if you need the old 6.x/7.x scoring. See also upgrade notes (janhoy) + Other Changes ---------------------- diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java index 45e856a08ca..0c97f0f7330 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestExternalFeatures.java @@ -70,7 +70,7 @@ public class TestExternalFeatures extends TestRerankBase { query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3 efi.userTitlePhrase1=w4 efi.userTitlePhrase2=w5}"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.7693934"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.34972426"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0"); @@ -80,7 +80,7 @@ public class TestExternalFeatures extends TestRerankBase { query.add("fl", "*,score,[fv efi.user_query=w2 efi.userTitlePhrase1=w4 efi.userTitlePhrase2=w5]"); assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.7693934"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.34972426"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/score==0.0"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/score==0.0"); } diff --git a/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java b/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java index fd8a48cb7d6..fefe8936e21 100644 --- a/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java +++ b/solr/core/src/java/org/apache/solr/search/similarities/BM25SimilarityFactory.java @@ -16,13 +16,15 @@ */ package org.apache.solr.search.similarities; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarity.LegacyBM25Similarity; import org.apache.solr.common.params.SolrParams; import org.apache.solr.schema.SimilarityFactory; /** - * Factory for {@link LegacyBM25Similarity} + * Factory for BM25Similarity. This is the default similarity since 8.x. + * If you need the exact same formula as in 6.x and 7.x you should instead look at + * {@link LegacyBM25SimilarityFactory} *

* Parameters: *