mirror of https://github.com/apache/lucene.git
SOLR-8329: SchemaSimilarityFactory now supports a 'defaultSimFromFieldType' init option for using a fieldType name to identify which Similarity to use as a default
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1715881 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8090380ecd
commit
5b0b4194a1
|
@ -242,6 +242,9 @@ New Features
|
|||
* SOLR-6168: Add a 'sort' local param to the collapse QParser to support using complex sort options
|
||||
to select the representitive doc for each collapsed group. (Umesh Prasad, hossman)
|
||||
|
||||
* SOLR-8329: SchemaSimilarityFactory now supports a 'defaultSimFromFieldType' init option for using
|
||||
a fieldType name to identify which Similarity to use as a default. (hossman)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -23,6 +23,8 @@ import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
|||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
@ -34,43 +36,107 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
/**
|
||||
* <p>
|
||||
* SimilarityFactory that returns a {@link PerFieldSimilarityWrapper}
|
||||
* that delegates to the field type, if it's configured, otherwise
|
||||
* returns a sensible default depending on the {@link Version} matching configured.
|
||||
* <code>SimilarityFactory</code> that returns a global {@link PerFieldSimilarityWrapper}
|
||||
* that delegates to the field type, if it's configured. For field type's that
|
||||
* do not have a <code>Similarity</code> explicitly configured, the global <code>Similarity</code>
|
||||
* will use per fieldtype defaults -- either based on an explicitly configured
|
||||
* <code>defaultSimFromFieldType</code> a sensible default depending on the {@link Version}
|
||||
* matching configured:
|
||||
* </p>
|
||||
* <ul>
|
||||
* <li><code>luceneMatchVersion < 6.0</code> = {@link ClassicSimilarity}</li>
|
||||
* <li><code>luceneMatchVersion >= 6.0</code> = {@link BM25Similarity}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <b>NOTE:</b> Users should be aware that in addition to supporting
|
||||
* <code>Similarity</code> configurations specified on individual
|
||||
* field types, this factory also differs in behavior from
|
||||
* {@link ClassicSimilarityFactory} because of other differences in the
|
||||
* implementations of <code>PerFieldSimilarityWrapper</code> and
|
||||
* {@link ClassicSimilarity} - notably in methods such as
|
||||
* {@link Similarity#coord} and {@link Similarity#queryNorm}.
|
||||
* The <code>defaultSimFromFieldType</code> option accepts the name of any fieldtype, and uses
|
||||
* whatever <code>Similarity</code> is explicitly configured for that fieldType as thedefault for
|
||||
* all other field types. For example:
|
||||
* </p>
|
||||
* <pre class="prettyprint">
|
||||
* <similarity class="solr.SchemaSimilarityFactory" >
|
||||
* <str name="defaultSimFromFieldType">type-using-custom-dfr</str>
|
||||
* </similarity>
|
||||
* ...
|
||||
* <fieldType name="type-using-custom-dfr" class="solr.TextField">
|
||||
* ...
|
||||
* <similarity class="solr.DFRSimilarityFactory">
|
||||
* <str name="basicModel">I(F)</str>
|
||||
* <str name="afterEffect">B</str>
|
||||
* <str name="normalization">H3</str>
|
||||
* <float name="mu">900</float>
|
||||
* </similarity>
|
||||
* </fieldType>
|
||||
* </pre>
|
||||
* <p>
|
||||
* In the example above, any fieldtypes that do not define their own <code></similarity/></code>
|
||||
* will use the <code>Similarity</code> configured for the <code>type-using-custom-dfr</code>.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> Users should be aware that even when this factory uses a single default
|
||||
* <code>Similarity</code> for some or all fields in a Query, the behavior can be inconsistent
|
||||
* with the behavior of explicitly configuring that same <code>Similarity</code> globally, because
|
||||
* of differences in how some multi-field / multi-clause behavior is defined in
|
||||
* <code>PerFieldSimilarityWrapper</code>. In particular please consider carefully the documentation
|
||||
* & implementation of {@link Similarity#coord} and {@link Similarity#queryNorm} in
|
||||
* {@link ClassicSimilarity} compared to {@link PerFieldSimilarityWrapper}
|
||||
* </p>
|
||||
*
|
||||
* @see FieldType#getSimilarity
|
||||
*/
|
||||
public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCoreAware {
|
||||
|
||||
private Similarity similarity; // set by init
|
||||
private Similarity defaultSimilarity; // set by inform(SolrCore)
|
||||
private volatile SolrCore core;
|
||||
private static final String INIT_OPT = "defaultSimFromFieldType";
|
||||
|
||||
private String defaultSimFromFieldType; // set by init, if null use sensible implicit default
|
||||
|
||||
private volatile SolrCore core; // set by inform(SolrCore)
|
||||
private volatile Similarity similarity; // lazy instantiated
|
||||
|
||||
@Override
|
||||
public void inform(SolrCore core) {
|
||||
this.core = core;
|
||||
this.defaultSimilarity = this.core.getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_6_0_0)
|
||||
? new BM25Similarity()
|
||||
: new ClassicSimilarity();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(SolrParams args) {
|
||||
defaultSimFromFieldType = args.get(INIT_OPT, null);
|
||||
super.init(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity getSimilarity() {
|
||||
if (null == core) {
|
||||
throw new IllegalStateException("SchemaSimilarityFactory can not be used until SolrCoreAware.inform has been called");
|
||||
}
|
||||
if (null == similarity) {
|
||||
// Need to instantiate lazily, can't do this in inform(SolrCore) because of chicken/egg
|
||||
// circular initialization hell with core.getLatestSchema() to lookup defaultSimFromFieldType
|
||||
|
||||
Similarity defaultSim = null;
|
||||
if (null == defaultSimFromFieldType) {
|
||||
// nothing configured, choose a sensible implicit default...
|
||||
defaultSim = this.core.getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_6_0_0)
|
||||
? new BM25Similarity()
|
||||
: new ClassicSimilarity();
|
||||
} else {
|
||||
FieldType defSimFT = core.getLatestSchema().getFieldTypeByName(defaultSimFromFieldType);
|
||||
if (null == defSimFT) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"SchemaSimilarityFactory configured with " + INIT_OPT + "='" +
|
||||
defaultSimFromFieldType + "' but that <fieldType> does not exist");
|
||||
|
||||
}
|
||||
defaultSim = defSimFT.getSimilarity();
|
||||
if (null == defaultSim) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"SchemaSimilarityFactory configured with " + INIT_OPT + "='" +
|
||||
defaultSimFromFieldType +
|
||||
"' but that <fieldType> does not define a <similarity>");
|
||||
}
|
||||
}
|
||||
assert null != defaultSim;
|
||||
final Similarity defaultSimilarity = defaultSim;
|
||||
similarity = new PerFieldSimilarityWrapper() {
|
||||
@Override
|
||||
public Similarity get(String name) {
|
||||
|
@ -84,12 +150,6 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity getSimilarity() {
|
||||
if (null == core) {
|
||||
throw new IllegalStateException("SchemaSimilarityFactory can not be used until SolrCoreAware.inform has been called");
|
||||
}
|
||||
return similarity;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<schema name="bad-schema-sim-global-vs-ft-mismatch" version="1.0">
|
||||
|
||||
<similarity class="solr.SchemaSimilarityFactory" >
|
||||
<!-- BROKEN: the named fieldType does not exist -->
|
||||
<str name="defaultSimFromFieldType">ft-does-not-exist</str>
|
||||
</similarity>
|
||||
|
||||
<fieldType name="ft-has-no-sim" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="ft-overrides-default-sim" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
<similarity class="org.apache.lucene.misc.SweetSpotSimilarity"/>
|
||||
</fieldType>
|
||||
|
||||
<field name="sim1text" type="ft-overrides-default-sim" indexed="true" stored="true"/>
|
||||
|
||||
<defaultSearchField>sim1text</defaultSearchField>
|
||||
|
||||
</schema>
|
|
@ -0,0 +1,43 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<schema name="bad-schema-sim-global-vs-ft-mismatch" version="1.0">
|
||||
|
||||
<similarity class="solr.SchemaSimilarityFactory" >
|
||||
<!-- BROKEN: the named fieldType does not have an explicit sim defined, circular default -->
|
||||
<str name="defaultSimFromFieldType">ft-has-no-sim</str>
|
||||
</similarity>
|
||||
|
||||
<fieldType name="ft-has-no-sim" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="ft-overrides-default-sim" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
<similarity class="org.apache.lucene.misc.SweetSpotSimilarity"/>
|
||||
</fieldType>
|
||||
|
||||
<field name="sim1text" type="ft-overrides-default-sim" indexed="true" stored="true"/>
|
||||
|
||||
<defaultSearchField>sim1text</defaultSearchField>
|
||||
|
||||
</schema>
|
|
@ -0,0 +1,66 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- Per-field similarity testing where default for fieldtypes w/o sim is overridden -->
|
||||
|
||||
<schema name="test" version="1.0">
|
||||
|
||||
<similarity class="solr.SchemaSimilarityFactory" >
|
||||
<str name="defaultSimFromFieldType">sim-used-as-default-override</str>
|
||||
</similarity>
|
||||
|
||||
<!-- some per-field similarity examples -->
|
||||
<!-- specify a Similarity classname directly -->
|
||||
<fieldType name="sim-explicit" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
<similarity class="org.apache.lucene.misc.SweetSpotSimilarity"/>
|
||||
</fieldType>
|
||||
|
||||
<!-- specify a Similarity factory -->
|
||||
<fieldType name="sim-used-as-default-override" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
<similarity class="solr.CustomSimilarityFactory">
|
||||
<str name="echo">is there an echo?</str>
|
||||
</similarity>
|
||||
</fieldType>
|
||||
|
||||
<!-- don't specify any sim at all: get the default -->
|
||||
<fieldType name="sim-none-get-default" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||
<field name="sim1text" type="sim-explicit" indexed="true" stored="true"/>
|
||||
<field name="sim2text" type="sim-used-as-default-override" indexed="true" stored="true"/>
|
||||
<field name="sim3text" type="sim-none-get-default" indexed="true" stored="true"/>
|
||||
|
||||
<!-- make sure custom sims work with dynamic fields -->
|
||||
<dynamicField name="*_sim1" type="sim-explicit" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_sim2" type="sim-used-as-default-override" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_sim3" type="sim-none-get-default" indexed="true" stored="true"/>
|
||||
|
||||
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<uniqueKey>id</uniqueKey>
|
||||
|
||||
</schema>
|
|
@ -119,4 +119,14 @@ public class BadIndexSchemaTest extends AbstractBadConfigTestBase {
|
|||
doTest("bad-schema-bogus-analysis-parameters.xml", "Unknown parameters");
|
||||
}
|
||||
|
||||
public void testSimDefaultFieldTypeHasNoExplicitSim() throws Exception {
|
||||
doTest("bad-schema-sim-default-has-no-explicit-sim.xml",
|
||||
"ft-has-no-sim");
|
||||
}
|
||||
|
||||
public void testSimDefaultFieldTypeDoesNotExist() throws Exception {
|
||||
doTest("bad-schema-sim-default-does-not-exist.xml",
|
||||
"ft-does-not-exist");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
package org.apache.solr.search.similarities;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.misc.SweetSpotSimilarity;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
* Tests per-field similarity support in the schema when SchemaSimilarityFactory is explicitly
|
||||
* configured to use a custom default sim for field types that do not override it.
|
||||
* @see TestPerFieldSimilarity
|
||||
*/
|
||||
public class TestPerFieldSimilarityWithDefaultOverride extends BaseSimilarityTestCase {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-basic.xml","schema-sim-default-override.xml");
|
||||
}
|
||||
|
||||
/** test a field where the sim is specified directly */
|
||||
public void testDirect() throws Exception {
|
||||
assertNotNull(getSimilarity("sim1text", SweetSpotSimilarity.class));
|
||||
}
|
||||
|
||||
/** ... and for a dynamic field */
|
||||
public void testDirectDynamic() throws Exception {
|
||||
assertNotNull(getSimilarity("text_sim1", SweetSpotSimilarity.class));
|
||||
}
|
||||
|
||||
/** test a field where a configurable sim factory is explicitly defined */
|
||||
public void testDirectFactory() throws Exception {
|
||||
MockConfigurableSimilarity sim = getSimilarity("sim2text", MockConfigurableSimilarity.class);
|
||||
assertEquals("is there an echo?", sim.getPassthrough());
|
||||
}
|
||||
|
||||
/** ... and for a dynamic field */
|
||||
public void testDirectFactoryDynamic() throws Exception {
|
||||
MockConfigurableSimilarity sim = getSimilarity("text_sim2", MockConfigurableSimilarity.class);
|
||||
assertEquals("is there an echo?", sim.getPassthrough());
|
||||
}
|
||||
|
||||
/** test a field where no similarity is specified */
|
||||
public void testDefaults() throws Exception {
|
||||
MockConfigurableSimilarity sim = getSimilarity("sim3text", MockConfigurableSimilarity.class);
|
||||
assertEquals("is there an echo?", sim.getPassthrough());
|
||||
}
|
||||
|
||||
/** ... and for a dynamic field */
|
||||
public void testDefaultsDynamic() throws Exception {
|
||||
MockConfigurableSimilarity sim = getSimilarity("text_sim3", MockConfigurableSimilarity.class);
|
||||
assertEquals("is there an echo?", sim.getPassthrough());
|
||||
}
|
||||
|
||||
/** test a field that does not exist */
|
||||
public void testNonexistent() throws Exception {
|
||||
MockConfigurableSimilarity sim = getSimilarity("text_sim3", MockConfigurableSimilarity.class);
|
||||
assertEquals("is there an echo?", sim.getPassthrough());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue