From 5b0b4194a19de40f22625a724e6a1d9eaf49a99d Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" Date: Mon, 23 Nov 2015 16:50:49 +0000 Subject: [PATCH] SOLR-8329: SchemaSimilarityFactory now supports a 'defaultSimFromFieldType' init option for using a fieldType name to identify which Similarity to use as a default git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1715881 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 + .../similarities/SchemaSimilarityFactory.java | 116 +++++++++++++----- .../bad-schema-sim-default-does-not-exist.xml | 43 +++++++ ...schema-sim-default-has-no-explicit-sim.xml | 43 +++++++ .../conf/schema-sim-default-override.xml | 66 ++++++++++ .../solr/schema/BadIndexSchemaTest.java | 10 ++ ...PerFieldSimilarityWithDefaultOverride.java | 76 ++++++++++++ 7 files changed, 329 insertions(+), 28 deletions(-) create mode 100644 solr/core/src/test-files/solr/collection1/conf/bad-schema-sim-default-does-not-exist.xml create mode 100644 solr/core/src/test-files/solr/collection1/conf/bad-schema-sim-default-has-no-explicit-sim.xml create mode 100644 solr/core/src/test-files/solr/collection1/conf/schema-sim-default-override.xml create mode 100644 solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarityWithDefaultOverride.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 805a7104682..e73099a5046 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -242,6 +242,9 @@ New Features * SOLR-6168: Add a 'sort' local param to the collapse QParser to support using complex sort options to select the representitive doc for each collapsed group. (Umesh Prasad, hossman) +* SOLR-8329: SchemaSimilarityFactory now supports a 'defaultSimFromFieldType' init option for using + a fieldType name to identify which Similarity to use as a default. (hossman) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java b/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java index 2424bff3e41..27b2c7990ab 100644 --- a/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java +++ b/solr/core/src/java/org/apache/solr/search/similarities/SchemaSimilarityFactory.java @@ -23,6 +23,8 @@ import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Version; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.params.SolrParams; import org.apache.solr.core.SolrCore; import org.apache.solr.schema.FieldType; @@ -34,55 +36,72 @@ import org.slf4j.LoggerFactory; /** *

- * SimilarityFactory that returns a {@link PerFieldSimilarityWrapper} - * that delegates to the field type, if it's configured, otherwise - * returns a sensible default depending on the {@link Version} matching configured. + * SimilarityFactory that returns a global {@link PerFieldSimilarityWrapper} + * that delegates to the field type, if it's configured. For field type's that + * do not have a Similarity explicitly configured, the global Similarity + * will use per fieldtype defaults -- either based on an explicitly configured + * defaultSimFromFieldType a sensible default depending on the {@link Version} + * matching configured: *

* *

- * NOTE: Users should be aware that in addition to supporting - * Similarity configurations specified on individual - * field types, this factory also differs in behavior from - * {@link ClassicSimilarityFactory} because of other differences in the - * implementations of PerFieldSimilarityWrapper and - * {@link ClassicSimilarity} - notably in methods such as - * {@link Similarity#coord} and {@link Similarity#queryNorm}. + * The defaultSimFromFieldType option accepts the name of any fieldtype, and uses + * whatever Similarity is explicitly configured for that fieldType as thedefault for + * all other field types. For example: + *

+ *
+ *   <similarity class="solr.SchemaSimilarityFactory" >
+ *     <str name="defaultSimFromFieldType">type-using-custom-dfr</str>
+ *   </similarity>
+ *   ...
+ *   <fieldType name="type-using-custom-dfr" class="solr.TextField">
+ *     ...
+ *     <similarity class="solr.DFRSimilarityFactory">
+ *       <str name="basicModel">I(F)</str>
+ *       <str name="afterEffect">B</str>
+ *       <str name="normalization">H3</str>
+ *       <float name="mu">900</float>
+ *     </similarity>
+ *   </fieldType>
+ * 
+ *

+ * In the example above, any fieldtypes that do not define their own </similarity/> + * will use the Similarity configured for the type-using-custom-dfr. + *

+ * + *

+ * NOTE: Users should be aware that even when this factory uses a single default + * Similarity for some or all fields in a Query, the behavior can be inconsistent + * with the behavior of explicitly configuring that same Similarity globally, because + * of differences in how some multi-field / multi-clause behavior is defined in + * PerFieldSimilarityWrapper. In particular please consider carefully the documentation + * & implementation of {@link Similarity#coord} and {@link Similarity#queryNorm} in + * {@link ClassicSimilarity} compared to {@link PerFieldSimilarityWrapper} *

* * @see FieldType#getSimilarity */ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCoreAware { + + private static final String INIT_OPT = "defaultSimFromFieldType"; - private Similarity similarity; // set by init - private Similarity defaultSimilarity; // set by inform(SolrCore) - private volatile SolrCore core; + private String defaultSimFromFieldType; // set by init, if null use sensible implicit default + + private volatile SolrCore core; // set by inform(SolrCore) + private volatile Similarity similarity; // lazy instantiated @Override public void inform(SolrCore core) { this.core = core; - this.defaultSimilarity = this.core.getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_6_0_0) - ? new BM25Similarity() - : new ClassicSimilarity(); } @Override public void init(SolrParams args) { + defaultSimFromFieldType = args.get(INIT_OPT, null); super.init(args); - similarity = new PerFieldSimilarityWrapper() { - @Override - public Similarity get(String name) { - FieldType fieldType = core.getLatestSchema().getFieldTypeNoEx(name); - if (fieldType == null) { - return defaultSimilarity; - } else { - Similarity similarity = fieldType.getSimilarity(); - return similarity == null ? defaultSimilarity : similarity; - } - } - }; } @Override @@ -90,6 +109,47 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo if (null == core) { throw new IllegalStateException("SchemaSimilarityFactory can not be used until SolrCoreAware.inform has been called"); } + if (null == similarity) { + // Need to instantiate lazily, can't do this in inform(SolrCore) because of chicken/egg + // circular initialization hell with core.getLatestSchema() to lookup defaultSimFromFieldType + + Similarity defaultSim = null; + if (null == defaultSimFromFieldType) { + // nothing configured, choose a sensible implicit default... + defaultSim = this.core.getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_6_0_0) + ? new BM25Similarity() + : new ClassicSimilarity(); + } else { + FieldType defSimFT = core.getLatestSchema().getFieldTypeByName(defaultSimFromFieldType); + if (null == defSimFT) { + throw new SolrException(ErrorCode.SERVER_ERROR, + "SchemaSimilarityFactory configured with " + INIT_OPT + "='" + + defaultSimFromFieldType + "' but that does not exist"); + + } + defaultSim = defSimFT.getSimilarity(); + if (null == defaultSim) { + throw new SolrException(ErrorCode.SERVER_ERROR, + "SchemaSimilarityFactory configured with " + INIT_OPT + "='" + + defaultSimFromFieldType + + "' but that does not define a "); + } + } + assert null != defaultSim; + final Similarity defaultSimilarity = defaultSim; + similarity = new PerFieldSimilarityWrapper() { + @Override + public Similarity get(String name) { + FieldType fieldType = core.getLatestSchema().getFieldTypeNoEx(name); + if (fieldType == null) { + return defaultSimilarity; + } else { + Similarity similarity = fieldType.getSimilarity(); + return similarity == null ? defaultSimilarity : similarity; + } + } + }; + } return similarity; } } diff --git a/solr/core/src/test-files/solr/collection1/conf/bad-schema-sim-default-does-not-exist.xml b/solr/core/src/test-files/solr/collection1/conf/bad-schema-sim-default-does-not-exist.xml new file mode 100644 index 00000000000..828aa578090 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/bad-schema-sim-default-does-not-exist.xml @@ -0,0 +1,43 @@ + + + + + + + + ft-does-not-exist + + + + + + + + + + + + + + + + + + sim1text + + diff --git a/solr/core/src/test-files/solr/collection1/conf/bad-schema-sim-default-has-no-explicit-sim.xml b/solr/core/src/test-files/solr/collection1/conf/bad-schema-sim-default-has-no-explicit-sim.xml new file mode 100644 index 00000000000..b5ce4aa1b3d --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/bad-schema-sim-default-has-no-explicit-sim.xml @@ -0,0 +1,43 @@ + + + + + + + + ft-has-no-sim + + + + + + + + + + + + + + + + + + sim1text + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sim-default-override.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sim-default-override.xml new file mode 100644 index 00000000000..100caaf031e --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sim-default-override.xml @@ -0,0 +1,66 @@ + + + + + + + + + sim-used-as-default-override + + + + + + + + + + + + + + + + + + is there an echo? + + + + + + + + + + + + + + + + + + + + + + id + + diff --git a/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java b/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java index 7c782b10db1..874ad2ddc34 100644 --- a/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java +++ b/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java @@ -119,4 +119,14 @@ public class BadIndexSchemaTest extends AbstractBadConfigTestBase { doTest("bad-schema-bogus-analysis-parameters.xml", "Unknown parameters"); } + public void testSimDefaultFieldTypeHasNoExplicitSim() throws Exception { + doTest("bad-schema-sim-default-has-no-explicit-sim.xml", + "ft-has-no-sim"); + } + + public void testSimDefaultFieldTypeDoesNotExist() throws Exception { + doTest("bad-schema-sim-default-does-not-exist.xml", + "ft-does-not-exist"); + } + } diff --git a/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarityWithDefaultOverride.java b/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarityWithDefaultOverride.java new file mode 100644 index 00000000000..84cbef597a8 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/similarities/TestPerFieldSimilarityWithDefaultOverride.java @@ -0,0 +1,76 @@ +package org.apache.solr.search.similarities; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.misc.SweetSpotSimilarity; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.Similarity; +import org.junit.BeforeClass; + +/** + * Tests per-field similarity support in the schema when SchemaSimilarityFactory is explicitly + * configured to use a custom default sim for field types that do not override it. + * @see TestPerFieldSimilarity + */ +public class TestPerFieldSimilarityWithDefaultOverride extends BaseSimilarityTestCase { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-basic.xml","schema-sim-default-override.xml"); + } + + /** test a field where the sim is specified directly */ + public void testDirect() throws Exception { + assertNotNull(getSimilarity("sim1text", SweetSpotSimilarity.class)); + } + + /** ... and for a dynamic field */ + public void testDirectDynamic() throws Exception { + assertNotNull(getSimilarity("text_sim1", SweetSpotSimilarity.class)); + } + + /** test a field where a configurable sim factory is explicitly defined */ + public void testDirectFactory() throws Exception { + MockConfigurableSimilarity sim = getSimilarity("sim2text", MockConfigurableSimilarity.class); + assertEquals("is there an echo?", sim.getPassthrough()); + } + + /** ... and for a dynamic field */ + public void testDirectFactoryDynamic() throws Exception { + MockConfigurableSimilarity sim = getSimilarity("text_sim2", MockConfigurableSimilarity.class); + assertEquals("is there an echo?", sim.getPassthrough()); + } + + /** test a field where no similarity is specified */ + public void testDefaults() throws Exception { + MockConfigurableSimilarity sim = getSimilarity("sim3text", MockConfigurableSimilarity.class); + assertEquals("is there an echo?", sim.getPassthrough()); + } + + /** ... and for a dynamic field */ + public void testDefaultsDynamic() throws Exception { + MockConfigurableSimilarity sim = getSimilarity("text_sim3", MockConfigurableSimilarity.class); + assertEquals("is there an echo?", sim.getPassthrough()); + } + + /** test a field that does not exist */ + public void testNonexistent() throws Exception { + MockConfigurableSimilarity sim = getSimilarity("text_sim3", MockConfigurableSimilarity.class); + assertEquals("is there an echo?", sim.getPassthrough()); + } +}