SOLR-8271: Change implicit default Similarity to use SchemaSimilarityFactory when luceneMatchVersion >= 6

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1715393 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2015-11-20 17:00:34 +00:00
parent 3fdbc588a9
commit cae3ddd5c7
18 changed files with 40 additions and 29 deletions

View File

@ -58,9 +58,9 @@ Upgrading from Solr 5.x
refrenced in your schema.xml, edit your config to use the functionally identical ClassicSimilarityFactory. refrenced in your schema.xml, edit your config to use the functionally identical ClassicSimilarityFactory.
See SOLR-8239 for more details. See SOLR-8239 for more details.
* The implicit default Similarity used when no <similarity/> is configured in schema.xml has * The implicit default Similarity used when no <similarity/> is configured in schema.xml has
been changed to BM25SimilarityFactory. Users who wish to preserve backcompatible behavior should been changed to SchemaSimilarityFactory. Users who wish to preserve backcompatible behavior should
either explicitly configure ClassicSimilarityFactory, or ensure that the luceneMatchVersion either explicitly configure ClassicSimilarityFactory, or ensure that the luceneMatchVersion
for the collection is less then 6.0. See SOLR-8270 for details. for the collection is less then 6.0. See SOLR-8270 + SOLR-8271 for details.
* SchemaSimilarityFactory has been modified to use BM25Similarity as the default for fieldTypes that * SchemaSimilarityFactory has been modified to use BM25Similarity as the default for fieldTypes that
do not explicitly declare a Similarity. The legacy behavior of using ClassicSimilarity as the do not explicitly declare a Similarity. The legacy behavior of using ClassicSimilarity as the
default will occur if the luceneMatchVersion for the collection is less then 6.0. See SOLR-8261 for default will occur if the luceneMatchVersion for the collection is less then 6.0. See SOLR-8261 for
@ -153,6 +153,8 @@ Other Changes
* SOLR-8270: Change implicit default Similarity to use BM25 when luceneMatchVersion >= 6 (hossman) * SOLR-8270: Change implicit default Similarity to use BM25 when luceneMatchVersion >= 6 (hossman)
* SOLR-8271: Change implicit default Similarity to use SchemaSimilarityFactory when luceneMatchVersion >= 6 (hossman)
================== 5.4.0 ================== ================== 5.4.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

View File

@ -41,6 +41,7 @@ import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.Config; import org.apache.solr.core.Config;
@ -49,8 +50,8 @@ import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.SchemaXmlWriter; import org.apache.solr.response.SchemaXmlWriter;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.similarities.BM25SimilarityFactory;
import org.apache.solr.search.similarities.ClassicSimilarityFactory; import org.apache.solr.search.similarities.ClassicSimilarityFactory;
import org.apache.solr.search.similarities.SchemaSimilarityFactory;
import org.apache.solr.util.DOMUtil; import org.apache.solr.util.DOMUtil;
import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -479,14 +480,11 @@ public class IndexSchema {
Node node = (Node) xpath.evaluate(expression, document, XPathConstants.NODE); Node node = (Node) xpath.evaluate(expression, document, XPathConstants.NODE);
similarityFactory = readSimilarity(loader, node); similarityFactory = readSimilarity(loader, node);
if (similarityFactory == null) { if (similarityFactory == null) {
Version luceneVersion = getDefaultLuceneMatchVersion(); final boolean modernSim = getDefaultLuceneMatchVersion().onOrAfter(Version.LUCENE_6_0_0);
if (getDefaultLuceneMatchVersion().onOrAfter(Version.LUCENE_6_0_0)) { final Class simClass = modernSim ? SchemaSimilarityFactory.class : ClassicSimilarityFactory.class;
similarityFactory = new BM25SimilarityFactory(); // use the loader to ensure proper SolrCoreAware handling
} else { similarityFactory = loader.newInstance(simClass.getName(), SimilarityFactory.class);
similarityFactory = new ClassicSimilarityFactory(); similarityFactory.init(new ModifiableSolrParams());
}
final NamedList similarityParams = new NamedList();
similarityFactory.init(SolrParams.toSolrParams(similarityParams));
} else { } else {
isExplicitSimilarity = true; isExplicitSimilarity = true;
} }

View File

@ -17,12 +17,15 @@
--> -->
<schema name="bad-schema-sim-global-vs-ft-mismatch" version="1.0"> <schema name="bad-schema-sim-global-vs-ft-mismatch" version="1.0">
<similarity class="solr.BM25SimilarityFactory" /> <!-- global sim -->
<types> <types>
<fieldType name="sim1" class="solr.TextField"> <fieldType name="sim1" class="solr.TextField">
<analyzer> <analyzer>
<tokenizer class="solr.MockTokenizerFactory"/> <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer> </analyzer>
<!-- BAD: similarity here but no global sim that allows it --> <!-- BAD: similarity here but global sim does not allow it -->
<similarity class="org.apache.lucene.misc.SweetSpotSimilarity"/> <similarity class="org.apache.lucene.misc.SweetSpotSimilarity"/>
</fieldType> </fieldType>
</types> </types>

View File

@ -48,8 +48,4 @@
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<!-- testing with a similarity that is SolrCoreAware -->
<similarity class="solr.SchemaSimilarityFactory"/>
</schema> </schema>

View File

@ -48,5 +48,4 @@
<defaultSearchField>text</defaultSearchField> <defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<similarity class="solr.SchemaSimilarityFactory"/>
</schema> </schema>

View File

@ -39,5 +39,10 @@
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/> <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
<field name="signatureField" type="string" indexed="true" stored="false"/> <field name="signatureField" type="string" indexed="true" stored="false"/>
</fields> </fields>
<!--
Even though SchemaSimilarityFactory is the current implicit default in IndexSchema, we
are explicit about it here in this schema file to verify the short class name is preserved
-->
<similarity class="solr.SchemaSimilarityFactory"/> <similarity class="solr.SchemaSimilarityFactory"/>
</schema> </schema>

View File

@ -66,5 +66,4 @@
<defaultSearchField>text</defaultSearchField> <defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<similarity class="solr.SchemaSimilarityFactory"/>
</schema> </schema>

View File

@ -54,5 +54,4 @@
<defaultSearchField>text</defaultSearchField> <defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<similarity class="solr.SchemaSimilarityFactory"/>
</schema> </schema>

View File

@ -47,5 +47,4 @@
<defaultSearchField>text</defaultSearchField> <defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<similarity class="solr.SchemaSimilarityFactory"/>
</schema> </schema>

View File

@ -47,5 +47,4 @@
<defaultSearchField>text</defaultSearchField> <defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<similarity class="solr.SchemaSimilarityFactory"/>
</schema> </schema>

View File

@ -634,5 +634,4 @@
<solrQueryParser defaultOperator="OR"/> <solrQueryParser defaultOperator="OR"/>
<similarity class="solr.SchemaSimilarityFactory" />
</schema> </schema>

View File

@ -63,6 +63,12 @@
<defaultSearchField>sim1text</defaultSearchField> <defaultSearchField>sim1text</defaultSearchField>
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<!-- default similarity, defers to the fieldType --> <!-- Global similarity, defers to the fieldType.
Even though SchemaSimilarityFactory is the current implicit default in IndexSchema, we
are explicit about it here in this schema file because TestPerFieldSimilarityClassic overrides
the luceneMatchVersion which results in a diff implicit global default - but we still
need per-fieldtype sims for that test.
-->
<similarity class="solr.SchemaSimilarityFactory"/> <similarity class="solr.SchemaSimilarityFactory"/>
</schema> </schema>

View File

@ -72,5 +72,4 @@
<defaultSearchField>text</defaultSearchField> <defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<similarity class="solr.SchemaSimilarityFactory"/>
</schema> </schema>

View File

@ -42,5 +42,4 @@
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<defaultSearchField>text</defaultSearchField> <defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<similarity class="solr.SchemaSimilarityFactory"/>
</schema> </schema>

View File

@ -431,6 +431,4 @@ valued. -->
<!-- field for the QueryParser to use when an explicit fieldname is absent --> <!-- field for the QueryParser to use when an explicit fieldname is absent -->
<defaultSearchField>text</defaultSearchField> <defaultSearchField>text</defaultSearchField>
<similarity class="solr.SchemaSimilarityFactory"/>
</schema> </schema>

View File

@ -50,6 +50,10 @@ public class TestClassNameShortening extends RestTestBase {
"/response/lst[@name='fieldType']/lst[@name='similarity']/str[@name='class'] = 'org.apache.lucene.misc.SweetSpotSimilarity'"); "/response/lst[@name='fieldType']/lst[@name='similarity']/str[@name='class'] = 'org.apache.lucene.misc.SweetSpotSimilarity'");
} }
/**
* See {@link TestSchemaSimilarityResource#testGetSchemaSimilarity} for where the long class name
* is verified when the config doesn't specify a sim at all
*/
@Test @Test
public void testShortenedGlobalSimilarityStaysShortened() throws Exception { public void testShortenedGlobalSimilarityStaysShortened() throws Exception {
assertQ("/schema/similarity?indent=on&wt=xml", assertQ("/schema/similarity?indent=on&wt=xml",

View File

@ -20,11 +20,18 @@ import org.apache.solr.rest.SolrRestletTestBase;
import org.junit.Test; import org.junit.Test;
public class TestSchemaSimilarityResource extends SolrRestletTestBase { public class TestSchemaSimilarityResource extends SolrRestletTestBase {
/**
* NOTE: schema used by parent class doesn't define a global sim, so we get the implicit default
* which causes the FQN of the class to be returned
*
* @see TestClassNameShortening#testShortenedGlobalSimilarityStaysShortened
*/
@Test @Test
public void testGetSchemaSimilarity() throws Exception { public void testGetSchemaSimilarity() throws Exception {
assertQ("/schema/similarity?indent=on&wt=xml", assertQ("/schema/similarity?indent=on&wt=xml",
"count(/response/lst[@name='similarity']) = 1", "count(/response/lst[@name='similarity']) = 1",
"/response/lst[@name='similarity']/str[@name='class'][.='solr.SchemaSimilarityFactory']"); "/response/lst[@name='similarity']/str[@name='class'][.='org.apache.solr.search.similarities.SchemaSimilarityFactory']");
} }
} }

View File

@ -240,7 +240,7 @@ public class SchemaTest extends RestTestBase {
new SchemaRequest.GlobalSimilarity(); new SchemaRequest.GlobalSimilarity();
SchemaResponse.GlobalSimilarityResponse globalSimilarityResponse = globalSimilarityRequest.process(getSolrClient()); SchemaResponse.GlobalSimilarityResponse globalSimilarityResponse = globalSimilarityRequest.process(getSolrClient());
assertValidSchemaResponse(globalSimilarityResponse); assertValidSchemaResponse(globalSimilarityResponse);
assertEquals("org.apache.solr.search.similarities.BM25SimilarityFactory", assertEquals("org.apache.solr.search.similarities.SchemaSimilarityFactory",
globalSimilarityResponse.getSimilarity().get("class")); globalSimilarityResponse.getSimilarity().get("class"));
} }