SOLR-8329: SchemaSimilarityFactory now supports a 'defaultSimFromFieldType' init option for using a fieldType name to identify which Similarity to use as a default

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1715881 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2015-11-23 16:50:49 +00:00
parent 8090380ecd
commit 5b0b4194a1
7 changed files with 329 additions and 28 deletions

View File

@ -242,6 +242,9 @@ New Features
* SOLR-6168: Add a 'sort' local param to the collapse QParser to support using complex sort options * SOLR-6168: Add a 'sort' local param to the collapse QParser to support using complex sort options
to select the representitive doc for each collapsed group. (Umesh Prasad, hossman) to select the representitive doc for each collapsed group. (Umesh Prasad, hossman)
* SOLR-8329: SchemaSimilarityFactory now supports a 'defaultSimFromFieldType' init option for using
a fieldType name to identify which Similarity to use as a default. (hossman)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -23,6 +23,8 @@ import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
@ -34,55 +36,72 @@ import org.slf4j.LoggerFactory;
/** /**
* <p> * <p>
* SimilarityFactory that returns a {@link PerFieldSimilarityWrapper} * <code>SimilarityFactory</code> that returns a global {@link PerFieldSimilarityWrapper}
* that delegates to the field type, if it's configured, otherwise * that delegates to the field type, if it's configured. For field type's that
* returns a sensible default depending on the {@link Version} matching configured. * do not have a <code>Similarity</code> explicitly configured, the global <code>Similarity</code>
* will use per fieldtype defaults -- either based on an explicitly configured
* <code>defaultSimFromFieldType</code> a sensible default depending on the {@link Version}
* matching configured:
* </p> * </p>
* <ul> * <ul>
* <li><code>luceneMatchVersion &lt; 6.0</code> = {@link ClassicSimilarity}</li> * <li><code>luceneMatchVersion &lt; 6.0</code> = {@link ClassicSimilarity}</li>
* <li><code>luceneMatchVersion &gt;= 6.0</code> = {@link BM25Similarity}</li> * <li><code>luceneMatchVersion &gt;= 6.0</code> = {@link BM25Similarity}</li>
* </ul> * </ul>
* <p> * <p>
* <b>NOTE:</b> Users should be aware that in addition to supporting * The <code>defaultSimFromFieldType</code> option accepts the name of any fieldtype, and uses
* <code>Similarity</code> configurations specified on individual * whatever <code>Similarity</code> is explicitly configured for that fieldType as thedefault for
* field types, this factory also differs in behavior from * all other field types. For example:
* {@link ClassicSimilarityFactory} because of other differences in the * </p>
* implementations of <code>PerFieldSimilarityWrapper</code> and * <pre class="prettyprint">
* {@link ClassicSimilarity} - notably in methods such as * &lt;similarity class="solr.SchemaSimilarityFactory" &gt;
* {@link Similarity#coord} and {@link Similarity#queryNorm}. * &lt;str name="defaultSimFromFieldType"&gt;type-using-custom-dfr&lt;/str&gt;
* &lt;/similarity&gt;
* ...
* &lt;fieldType name="type-using-custom-dfr" class="solr.TextField"&gt;
* ...
* &lt;similarity class="solr.DFRSimilarityFactory"&gt;
* &lt;str name="basicModel"&gt;I(F)&lt;/str&gt;
* &lt;str name="afterEffect"&gt;B&lt;/str&gt;
* &lt;str name="normalization"&gt;H3&lt;/str&gt;
* &lt;float name="mu"&gt;900&lt;/float&gt;
* &lt;/similarity&gt;
* &lt;/fieldType&gt;
* </pre>
* <p>
* In the example above, any fieldtypes that do not define their own <code>&lt;/similarity/&gt;</code>
* will use the <code>Similarity</code> configured for the <code>type-using-custom-dfr</code>.
* </p>
*
* <p>
* <b>NOTE:</b> Users should be aware that even when this factory uses a single default
* <code>Similarity</code> for some or all fields in a Query, the behavior can be inconsistent
* with the behavior of explicitly configuring that same <code>Similarity</code> globally, because
* of differences in how some multi-field / multi-clause behavior is defined in
* <code>PerFieldSimilarityWrapper</code>. In particular please consider carefully the documentation
* &amp; implementation of {@link Similarity#coord} and {@link Similarity#queryNorm} in
* {@link ClassicSimilarity} compared to {@link PerFieldSimilarityWrapper}
* </p> * </p>
* *
* @see FieldType#getSimilarity * @see FieldType#getSimilarity
*/ */
public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCoreAware { public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCoreAware {
private static final String INIT_OPT = "defaultSimFromFieldType";
private Similarity similarity; // set by init private String defaultSimFromFieldType; // set by init, if null use sensible implicit default
private Similarity defaultSimilarity; // set by inform(SolrCore)
private volatile SolrCore core; private volatile SolrCore core; // set by inform(SolrCore)
private volatile Similarity similarity; // lazy instantiated
@Override @Override
public void inform(SolrCore core) { public void inform(SolrCore core) {
this.core = core; this.core = core;
this.defaultSimilarity = this.core.getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_6_0_0)
? new BM25Similarity()
: new ClassicSimilarity();
} }
@Override @Override
public void init(SolrParams args) { public void init(SolrParams args) {
defaultSimFromFieldType = args.get(INIT_OPT, null);
super.init(args); super.init(args);
similarity = new PerFieldSimilarityWrapper() {
@Override
public Similarity get(String name) {
FieldType fieldType = core.getLatestSchema().getFieldTypeNoEx(name);
if (fieldType == null) {
return defaultSimilarity;
} else {
Similarity similarity = fieldType.getSimilarity();
return similarity == null ? defaultSimilarity : similarity;
}
}
};
} }
@Override @Override
@ -90,6 +109,47 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
if (null == core) { if (null == core) {
throw new IllegalStateException("SchemaSimilarityFactory can not be used until SolrCoreAware.inform has been called"); throw new IllegalStateException("SchemaSimilarityFactory can not be used until SolrCoreAware.inform has been called");
} }
if (null == similarity) {
// Need to instantiate lazily, can't do this in inform(SolrCore) because of chicken/egg
// circular initialization hell with core.getLatestSchema() to lookup defaultSimFromFieldType
Similarity defaultSim = null;
if (null == defaultSimFromFieldType) {
// nothing configured, choose a sensible implicit default...
defaultSim = this.core.getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_6_0_0)
? new BM25Similarity()
: new ClassicSimilarity();
} else {
FieldType defSimFT = core.getLatestSchema().getFieldTypeByName(defaultSimFromFieldType);
if (null == defSimFT) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"SchemaSimilarityFactory configured with " + INIT_OPT + "='" +
defaultSimFromFieldType + "' but that <fieldType> does not exist");
}
defaultSim = defSimFT.getSimilarity();
if (null == defaultSim) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"SchemaSimilarityFactory configured with " + INIT_OPT + "='" +
defaultSimFromFieldType +
"' but that <fieldType> does not define a <similarity>");
}
}
assert null != defaultSim;
final Similarity defaultSimilarity = defaultSim;
similarity = new PerFieldSimilarityWrapper() {
@Override
public Similarity get(String name) {
FieldType fieldType = core.getLatestSchema().getFieldTypeNoEx(name);
if (fieldType == null) {
return defaultSimilarity;
} else {
Similarity similarity = fieldType.getSimilarity();
return similarity == null ? defaultSimilarity : similarity;
}
}
};
}
return similarity; return similarity;
} }
} }

View File

@ -0,0 +1,43 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="bad-schema-sim-global-vs-ft-mismatch" version="1.0">
<similarity class="solr.SchemaSimilarityFactory" >
<!-- BROKEN: the named fieldType does not exist -->
<str name="defaultSimFromFieldType">ft-does-not-exist</str>
</similarity>
<fieldType name="ft-has-no-sim" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="ft-overrides-default-sim" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
<similarity class="org.apache.lucene.misc.SweetSpotSimilarity"/>
</fieldType>
<field name="sim1text" type="ft-overrides-default-sim" indexed="true" stored="true"/>
<defaultSearchField>sim1text</defaultSearchField>
</schema>

View File

@ -0,0 +1,43 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="bad-schema-sim-global-vs-ft-mismatch" version="1.0">
<similarity class="solr.SchemaSimilarityFactory" >
<!-- BROKEN: the named fieldType does not have an explicit sim defined, circular default -->
<str name="defaultSimFromFieldType">ft-has-no-sim</str>
</similarity>
<fieldType name="ft-has-no-sim" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="ft-overrides-default-sim" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
<similarity class="org.apache.lucene.misc.SweetSpotSimilarity"/>
</fieldType>
<field name="sim1text" type="ft-overrides-default-sim" indexed="true" stored="true"/>
<defaultSearchField>sim1text</defaultSearchField>
</schema>

View File

@ -0,0 +1,66 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Per-field similarity testing where default for fieldtypes w/o sim is overridden -->
<schema name="test" version="1.0">
<similarity class="solr.SchemaSimilarityFactory" >
<str name="defaultSimFromFieldType">sim-used-as-default-override</str>
</similarity>
<!-- some per-field similarity examples -->
<!-- specify a Similarity classname directly -->
<fieldType name="sim-explicit" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
<similarity class="org.apache.lucene.misc.SweetSpotSimilarity"/>
</fieldType>
<!-- specify a Similarity factory -->
<fieldType name="sim-used-as-default-override" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
<similarity class="solr.CustomSimilarityFactory">
<str name="echo">is there an echo?</str>
</similarity>
</fieldType>
<!-- don't specify any sim at all: get the default -->
<fieldType name="sim-none-get-default" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="sim1text" type="sim-explicit" indexed="true" stored="true"/>
<field name="sim2text" type="sim-used-as-default-override" indexed="true" stored="true"/>
<field name="sim3text" type="sim-none-get-default" indexed="true" stored="true"/>
<!-- make sure custom sims work with dynamic fields -->
<dynamicField name="*_sim1" type="sim-explicit" indexed="true" stored="true"/>
<dynamicField name="*_sim2" type="sim-used-as-default-override" indexed="true" stored="true"/>
<dynamicField name="*_sim3" type="sim-none-get-default" indexed="true" stored="true"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -119,4 +119,14 @@ public class BadIndexSchemaTest extends AbstractBadConfigTestBase {
doTest("bad-schema-bogus-analysis-parameters.xml", "Unknown parameters"); doTest("bad-schema-bogus-analysis-parameters.xml", "Unknown parameters");
} }
public void testSimDefaultFieldTypeHasNoExplicitSim() throws Exception {
doTest("bad-schema-sim-default-has-no-explicit-sim.xml",
"ft-has-no-sim");
}
public void testSimDefaultFieldTypeDoesNotExist() throws Exception {
doTest("bad-schema-sim-default-does-not-exist.xml",
"ft-does-not-exist");
}
} }

View File

@ -0,0 +1,76 @@
package org.apache.solr.search.similarities;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.misc.SweetSpotSimilarity;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
import org.junit.BeforeClass;
/**
* Tests per-field similarity support in the schema when SchemaSimilarityFactory is explicitly
* configured to use a custom default sim for field types that do not override it.
* @see TestPerFieldSimilarity
*/
public class TestPerFieldSimilarityWithDefaultOverride extends BaseSimilarityTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-sim-default-override.xml");
}
/** test a field where the sim is specified directly */
public void testDirect() throws Exception {
assertNotNull(getSimilarity("sim1text", SweetSpotSimilarity.class));
}
/** ... and for a dynamic field */
public void testDirectDynamic() throws Exception {
assertNotNull(getSimilarity("text_sim1", SweetSpotSimilarity.class));
}
/** test a field where a configurable sim factory is explicitly defined */
public void testDirectFactory() throws Exception {
MockConfigurableSimilarity sim = getSimilarity("sim2text", MockConfigurableSimilarity.class);
assertEquals("is there an echo?", sim.getPassthrough());
}
/** ... and for a dynamic field */
public void testDirectFactoryDynamic() throws Exception {
MockConfigurableSimilarity sim = getSimilarity("text_sim2", MockConfigurableSimilarity.class);
assertEquals("is there an echo?", sim.getPassthrough());
}
/** test a field where no similarity is specified */
public void testDefaults() throws Exception {
MockConfigurableSimilarity sim = getSimilarity("sim3text", MockConfigurableSimilarity.class);
assertEquals("is there an echo?", sim.getPassthrough());
}
/** ... and for a dynamic field */
public void testDefaultsDynamic() throws Exception {
MockConfigurableSimilarity sim = getSimilarity("text_sim3", MockConfigurableSimilarity.class);
assertEquals("is there an echo?", sim.getPassthrough());
}
/** test a field that does not exist */
public void testNonexistent() throws Exception {
MockConfigurableSimilarity sim = getSimilarity("text_sim3", MockConfigurableSimilarity.class);
assertEquals("is there an echo?", sim.getPassthrough());
}
}