SOLR-2754: add similarity factories for new ranking algorithms

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1173776 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-09-21 18:20:44 +00:00
parent 80654ae7be
commit 2d5d5226a6
36 changed files with 1299 additions and 54 deletions

View File

@ -336,4 +336,12 @@ public class BM25Similarity extends Similarity {
public String toString() { public String toString() {
return "BM25(k1=" + k1 + ",b=" + b + ")"; return "BM25(k1=" + k1 + ",b=" + b + ")";
} }
public float getK1() {
return k1;
}
public float getB() {
return b;
}
} }

View File

@ -83,4 +83,16 @@ public class DFRSimilarity extends SimilarityBase {
return "DFR " + basicModel.toString() + afterEffect.toString() return "DFR " + basicModel.toString() + afterEffect.toString()
+ normalization.toString(); + normalization.toString();
} }
public BasicModel getBasicModel() {
return basicModel;
}
public AfterEffect getAfterEffect() {
return afterEffect;
}
public Normalization getNormalization() {
return normalization;
}
} }

View File

@ -91,4 +91,16 @@ public class IBSimilarity extends SimilarityBase {
return "IB " + distribution.toString() + "-" + lambda.toString() return "IB " + distribution.toString() + "-" + lambda.toString()
+ normalization.toString(); + normalization.toString();
} }
public Distribution getDistribution() {
return distribution;
}
public Lambda getLambda() {
return lambda;
}
public Normalization getNormalization() {
return normalization;
}
} }

View File

@ -41,4 +41,8 @@ public class NormalizationH3 extends Normalization {
public String toString() { public String toString() {
return "3(" + mu + ")"; return "3(" + mu + ")";
} }
public float getMu() {
return mu;
}
} }

View File

@ -41,4 +41,8 @@ public class NormalizationZ extends Normalization {
public String toString() { public String toString() {
return "Z(" + z + ")"; return "Z(" + z + ")";
} }
public float getZ() {
return z;
}
} }

View File

@ -171,6 +171,13 @@ New Features
* SOLR-2703: Added support for Lucene's "surround" query parser. (Simon Rosenthal, ehatcher) * SOLR-2703: Added support for Lucene's "surround" query parser. (Simon Rosenthal, ehatcher)
* SOLR-2754: Added factories for several ranking algorithms:
BM25SimilarityFactory: Okapi BM25
DFRSimilarityFactory: Divergence from Randomness models
IBSimilarityFactory: Information-based models
LMDirichletSimilarity: LM with Dirichlet smoothing
LMJelinekMercerSimilarity: LM with Jelinek-Mercer smoothing
(David Mark Nemeskey, Robert Muir)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -470,7 +470,7 @@ public final class IndexSchema {
}; };
log.debug("using default similarityProvider"); log.debug("using default similarityProvider");
} else { } else {
final Object obj = loader.newInstance(((Element) node).getAttribute("class")); final Object obj = loader.newInstance(((Element) node).getAttribute("class"), "search.similarities.");
// just like always, assume it's a SimilarityProviderFactory and get a ClassCastException - reasonable error handling // just like always, assume it's a SimilarityProviderFactory and get a ClassCastException - reasonable error handling
// configure a factory, get a similarity back // configure a factory, get a similarity back
NamedList<?> args = DOMUtil.childNodesToNamedList(node); NamedList<?> args = DOMUtil.childNodesToNamedList(node);
@ -714,7 +714,7 @@ public final class IndexSchema {
return null; return null;
} else { } else {
SimilarityFactory similarityFactory; SimilarityFactory similarityFactory;
final Object obj = loader.newInstance(((Element) node).getAttribute("class")); final Object obj = loader.newInstance(((Element) node).getAttribute("class"), "search.similarities.");
if (obj instanceof SimilarityFactory) { if (obj instanceof SimilarityFactory) {
// configure a factory, get a similarity back // configure a factory, get a similarity back
SolrParams params = SolrParams.toSolrParams(DOMUtil.childNodesToNamedList(node)); SolrParams params = SolrParams.toSolrParams(DOMUtil.childNodesToNamedList(node));

View File

@ -0,0 +1,62 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;
/**
* Factory for {@link BM25Similarity}
* <p>
* Parameters:
* <ul>
* <li>k1 (float): Controls non-linear term frequency normalization (saturation).
* The default is <code>1.2</code>
* <li>b (float): Controls to what degree document length normalizes tf values.
* The default is <code>0.75</code>
* </ul>
* <p>
* Optional settings:
* <ul>
* <li>discountOverlaps (bool): Sets
* {@link BM25Similarity#setDiscountOverlaps(boolean)}</li>
* </ul>
* @lucene.experimental
*/
public class BM25SimilarityFactory extends SimilarityFactory {
private boolean discountOverlaps;
private float k1;
private float b;
@Override
public void init(SolrParams params) {
super.init(params);
discountOverlaps = params.getBool("discountOverlaps", true);
k1 = params.getFloat("k1", 1.2f);
b = params.getFloat("b", 0.75f);
}
@Override
public Similarity getSimilarity() {
BM25Similarity sim = new BM25Similarity(k1, b);
sim.setDiscountOverlaps(discountOverlaps);
return sim;
}
}

View File

@ -0,0 +1,47 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.SimilarityProvider; // javadoc
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SimilarityProviderFactory;
import org.apache.solr.search.SolrSimilarityProvider;
/**
* This class is aimed at non-VSM models, and therefore both the
* {@link SimilarityProvider#coord} and
* {@link SimilarityProvider#queryNorm} methods return {@code 1}.
* @lucene.experimental
*/
public class BasicSimilarityProviderFactory extends SimilarityProviderFactory {
@Override
public SolrSimilarityProvider getSimilarityProvider(IndexSchema schema) {
return new SolrSimilarityProvider(schema) {
@Override
public float coord(int overlap, int maxOverlap) {
return 1f;
}
@Override
public float queryNorm(float sumOfSquaredWeights) {
return 1f;
}
};
}
}

View File

@ -0,0 +1,186 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.AfterEffect;
import org.apache.lucene.search.similarities.AfterEffect.NoAfterEffect; // javadoc
import org.apache.lucene.search.similarities.AfterEffectB;
import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BasicModel;
import org.apache.lucene.search.similarities.BasicModelBE;
import org.apache.lucene.search.similarities.BasicModelD;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BasicModelIF;
import org.apache.lucene.search.similarities.BasicModelIn;
import org.apache.lucene.search.similarities.BasicModelIne;
import org.apache.lucene.search.similarities.BasicModelP;
import org.apache.lucene.search.similarities.DFRSimilarity;
import org.apache.lucene.search.similarities.Normalization;
import org.apache.lucene.search.similarities.Normalization.NoNormalization; // javadoc
import org.apache.lucene.search.similarities.NormalizationH1;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarities.NormalizationH3;
import org.apache.lucene.search.similarities.NormalizationZ;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;
/**
* Factory for {@link DFRSimilarity}
* <p>
* You must specify the implementations for all three components of
* DFR (strings). In general the models are parameter-free, but two of the
* normalizations take floating point parameters (see below):
* <ol>
* <li>{@link BasicModel basicModel}: Basic model of information content:
* <ul>
* <li>{@link BasicModelBE Be}: Limiting form of Bose-Einstein
* <li>{@link BasicModelG G}: Geometric approximation of Bose-Einstein
* <li>{@link BasicModelP P}: Poisson approximation of the Binomial
* <li>{@link BasicModelD D}: Divergence approximation of the Binomial
* <li>{@link BasicModelIn I(n)}: Inverse document frequency
* <li>{@link BasicModelIne I(ne)}: Inverse expected document
* frequency [mixture of Poisson and IDF]
* <li>{@link BasicModelIF I(F)}: Inverse term frequency
* [approximation of I(ne)]
* </ul>
* <li>{@link AfterEffect afterEffect}: First normalization of information
* gain:
* <ul>
* <li>{@link AfterEffectL L}: Laplace's law of succession
* <li>{@link AfterEffectB B}: Ratio of two Bernoulli processes
* <li>{@link NoAfterEffect none}: no first normalization
* </ul>
* <li>{@link Normalization normalization}: Second (length) normalization:
* <ul>
* <li>{@link NormalizationH1 H1}: Uniform distribution of term
* frequency
* <li>{@link NormalizationH2 H2}: term frequency density inversely
* related to length
* <li>{@link NormalizationH3 H3}: term frequency normalization
* provided by Dirichlet prior
* <ul>
* <li>parameter mu (float): smoothing parameter &mu;. The
* default is <code>800</code>
* </ul>
* <li>{@link NormalizationZ Z}: term frequency normalization provided
* by a Zipfian relation
* <ul>
* <li>parameter z (float): represents <code>A/(A+1)</code>
* where A measures the specificity of the language.
* The default is <code>0.3</code>
* </ul>
* <li>{@link NoNormalization none}: no second normalization
* </ul>
* </ol>
* <p>
* <p>
* Optional settings:
* <ul>
* <li>discountOverlaps (bool): Sets
* {@link DFRSimilarity#setDiscountOverlaps(boolean)}</li>
* </ul>
* @lucene.experimental
*/
public class DFRSimilarityFactory extends SimilarityFactory {
private boolean discountOverlaps;
private BasicModel basicModel;
private AfterEffect afterEffect;
private Normalization normalization;
@Override
public void init(SolrParams params) {
super.init(params);
discountOverlaps = params.getBool("discountOverlaps", true);
basicModel = parseBasicModel(params.get("basicModel"));
afterEffect = parseAfterEffect(params.get("afterEffect"));
normalization = parseNormalization(
params.get("normalization"), params.get("mu"), params.get("z"));
}
private BasicModel parseBasicModel(String expr) {
if ("Be".equals(expr)) {
return new BasicModelBE();
} else if ("D".equals(expr)) {
return new BasicModelD();
} else if ("G".equals(expr)) {
return new BasicModelG();
} else if ("I(F)".equals(expr)) {
return new BasicModelIF();
} else if ("I(n)".equals(expr)) {
return new BasicModelIn();
} else if ("I(ne)".equals(expr)) {
return new BasicModelIne();
} else if ("P".equals(expr)) {
return new BasicModelP();
} else {
throw new RuntimeException("Invalid basicModel: " + expr);
}
}
private AfterEffect parseAfterEffect(String expr) {
if ("B".equals(expr)) {
return new AfterEffectB();
} else if ("L".equals(expr)) {
return new AfterEffectL();
} else if ("none".equals(expr)) {
return new AfterEffect.NoAfterEffect();
} else {
throw new RuntimeException("Invalid afterEffect: " + expr);
}
}
// also used by IBSimilarityFactory
static Normalization parseNormalization(String expr, String mu, String z) {
if (mu != null && z != null) {
throw new RuntimeException(
"specifying mu and z make no sense for: " + expr);
}
if (mu != null && !"H3".equals(expr)) {
throw new RuntimeException(
"parameter mu only makes sense for normalization H3");
}
if (z != null && !"Z".equals(expr)) {
throw new RuntimeException(
"parameter z only makes sense for normalization Z");
}
if ("H1".equals(expr)) {
return new NormalizationH1();
} else if ("H2".equals(expr)) {
return new NormalizationH2();
} else if ("H3".equals(expr)) {
return (mu != null) ? new NormalizationH3(Float.parseFloat(mu))
: new NormalizationH3();
} else if ("Z".equals(expr)) {
return (z != null) ? new NormalizationZ(Float.parseFloat(z))
: new NormalizationZ();
} else if ("none".equals(expr)) {
return new Normalization.NoNormalization();
} else {
throw new RuntimeException("Invalid normalization: " + expr);
}
}
@Override
public Similarity getSimilarity() {
DFRSimilarity sim = new DFRSimilarity(basicModel, afterEffect, normalization);
sim.setDiscountOverlaps(discountOverlaps);
return sim;
}
}

View File

@ -0,0 +1,55 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity; // javadoc
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;
/**
* Factory for {@link DefaultSimilarity}
* <p>
* DefaultSimilarity is Lucene's default scoring implementation, based
* upon the Vector Space Model.
* <p>
* Optional settings:
* <ul>
* <li>discountOverlaps (bool): Sets
* {@link DefaultSimilarity#setDiscountOverlaps(boolean)}</li>
* </ul>
* @see TFIDFSimilarity
* @lucene.experimental
*/
public class DefaultSimilarityFactory extends SimilarityFactory {
private boolean discountOverlaps;
@Override
public void init(SolrParams params) {
super.init(params);
discountOverlaps = params.getBool("discountOverlaps", true);
}
@Override
public Similarity getSimilarity() {
DefaultSimilarity sim = new DefaultSimilarity();
sim.setDiscountOverlaps(discountOverlaps);
return sim;
}
}

View File

@ -0,0 +1,111 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.Distribution;
import org.apache.lucene.search.similarities.DistributionLL;
import org.apache.lucene.search.similarities.DistributionSPL;
import org.apache.lucene.search.similarities.IBSimilarity;
import org.apache.lucene.search.similarities.Lambda;
import org.apache.lucene.search.similarities.LambdaDF;
import org.apache.lucene.search.similarities.LambdaTTF;
import org.apache.lucene.search.similarities.Normalization;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;
import org.apache.solr.search.similarities.DFRSimilarityFactory; // javadoc
/**
* Factory for {@link IBSimilarity}
* <p>
* You must specify the implementations for all three components of the
* Information-Based model (strings).
* <ol>
* <li>{@link Distribution distribution}: Probabilistic distribution used to
* model term occurrence
* <ul>
* <li>{@link DistributionLL LL}: Log-logistic</li>
* <li>{@link DistributionLL SPL}: Smoothed power-law</li>
* </ul>
* </li>
* <li>{@link Lambda lambda}: &lambda;<sub>w</sub> parameter of the
* probability distribution
* <ul>
* <li>{@link LambdaDF DF}: <code>N<sub>w</sub>/N</code> or average
* number of documents where w occurs</li>
* <li>{@link LambdaTTF TTF}: <code>F<sub>w</sub>/N</code> or
* average number of occurrences of w in the collection</li>
* </ul>
* </li>
* <li>{@link Normalization normalization}: Term frequency normalization
* <blockquote>Any supported DFR normalization listed in
{@link DFRSimilarityFactory}</blockquote>
</li>
* </ol>
* <p>
* Optional settings:
* <ul>
* <li>discountOverlaps (bool): Sets
* {@link IBSimilarity#setDiscountOverlaps(boolean)}</li>
* </ul>
* @lucene.experimental
*/
public class IBSimilarityFactory extends SimilarityFactory {
private boolean discountOverlaps;
private Distribution distribution;
private Lambda lambda;
private Normalization normalization;
@Override
public void init(SolrParams params) {
super.init(params);
discountOverlaps = params.getBool("discountOverlaps", true);
distribution = parseDistribution(params.get("distribution"));
lambda = parseLambda(params.get("lambda"));
normalization = DFRSimilarityFactory.parseNormalization(
params.get("normalization"), params.get("mu"), params.get("z"));
}
private Distribution parseDistribution(String expr) {
if ("LL".equals(expr)) {
return new DistributionLL();
} else if ("SPL".equals(expr)) {
return new DistributionSPL();
} else {
throw new RuntimeException("Invalid distribution: " + expr);
}
}
private Lambda parseLambda(String expr) {
if ("DF".equals(expr)) {
return new LambdaDF();
} else if ("TTF".equals(expr)) {
return new LambdaTTF();
} else {
throw new RuntimeException("Invalid lambda: " + expr);
}
}
@Override
public Similarity getSimilarity() {
IBSimilarity sim = new IBSimilarity(distribution, lambda, normalization);
sim.setDiscountOverlaps(discountOverlaps);
return sim;
}
}

View File

@ -0,0 +1,60 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;
/**
* Factory for {@link LMDirichletSimilarity}
* <p>
* Parameters:
* <ul>
* <li>parameter mu (float): smoothing parameter &mu;. The default is
* <code>2000</code></li>
* </ul>
* <p>
* Optional settings:
* <ul>
* <li>discountOverlaps (bool): Sets
* {@link LMDirichletSimilarity#setDiscountOverlaps(boolean)}</li>
* </ul>
* @lucene.experimental
*/
public class LMDirichletSimilarityFactory extends SimilarityFactory {
private boolean discountOverlaps;
private Float mu;
@Override
public void init(SolrParams params) {
super.init(params);
discountOverlaps = params.getBool("discountOverlaps", true);
mu = params.getFloat("mu");
}
@Override
public Similarity getSimilarity() {
LMDirichletSimilarity sim = (mu != null) ? new LMDirichletSimilarity(mu)
: new LMDirichletSimilarity();
sim.setDiscountOverlaps(discountOverlaps);
return sim;
}
}

View File

@ -0,0 +1,59 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;
/**
* Factory for {@link LMJelinekMercerSimilarity}
* <p>
* Parameters:
* <ul>
* <li>parameter lambda (float): smoothing parameter &lambda;. The default
* is <code>0.7</code></li>
* </ul>
* <p>
* Optional settings:
* <ul>
* <li>discountOverlaps (bool): Sets
* {@link LMJelinekMercerSimilarity#setDiscountOverlaps(boolean)}</li>
* </ul>
* @lucene.experimental
*/
public class LMJelinekMercerSimilarityFactory extends SimilarityFactory {
private boolean discountOverlaps;
private float lambda;
@Override
public void init(SolrParams params) {
super.init(params);
discountOverlaps = params.getBool("discountOverlaps", true);
lambda = params.getFloat("lambda", 0.7f);
}
@Override
public Similarity getSimilarity() {
LMJelinekMercerSimilarity sim = new LMJelinekMercerSimilarity(lambda);
sim.setDiscountOverlaps(discountOverlaps);
return sim;
}
}

View File

@ -0,0 +1,50 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Test schema file for BM25SimilarityFactory -->
<schema name="test" version="1.0">
<types>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<!-- default parameters -->
<fieldType name="text" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.BM25SimilarityFactory"/>
</fieldType>
<!-- with parameters -->
<fieldType name="text_params" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.BM25SimilarityFactory">
<float name="k1">1.2</float>
<float name="b">0.76</float>
</similarity>
</fieldType>
</types>
<fields>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="text_params" type="text_params" indexed="true" stored="false"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -0,0 +1,56 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Test schema file for DFRSimilarityFactory -->
<schema name="test" version="1.0">
<types>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<!-- default parameters -->
<fieldType name="text" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.DFRSimilarityFactory">
<str name="basicModel">I(F)</str>
<str name="afterEffect">B</str>
<str name="normalization">H2</str>
</similarity>
</fieldType>
<!-- with parameters -->
<fieldType name="text_params" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.DFRSimilarityFactory">
<str name="basicModel">I(F)</str>
<str name="afterEffect">B</str>
<str name="normalization">H3</str>
<float name="mu">900</float>
</similarity>
</fieldType>
</types>
<fields>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="text_params" type="text_params" indexed="true" stored="false"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -0,0 +1,56 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Test schema file for IBSimilarityFactory -->
<schema name="test" version="1.0">
<types>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<!-- default parameters -->
<fieldType name="text" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.IBSimilarityFactory">
<str name="distribution">SPL</str>
<str name="lambda">DF</str>
<str name="normalization">H2</str>
</similarity>
</fieldType>
<!-- with parameters -->
<fieldType name="text_params" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.IBSimilarityFactory">
<str name="distribution">LL</str>
<str name="lambda">TTF</str>
<str name="normalization">H3</str>
<float name="mu">900</float>
</similarity>
</fieldType>
</types>
<fields>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="text_params" type="text_params" indexed="true" stored="false"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -0,0 +1,49 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Test schema file for LMDirichletSimilarityFactory -->
<schema name="test" version="1.0">
<types>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<!-- default parameters -->
<fieldType name="text" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.LMDirichletSimilarityFactory"/>
</fieldType>
<!-- with parameters -->
<fieldType name="text_params" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.LMDirichletSimilarityFactory">
<float name="mu">1000</float>
</similarity>
</fieldType>
</types>
<fields>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="text_params" type="text_params" indexed="true" stored="false"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -0,0 +1,49 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Test schema file for LMJelinekMercerSimilarityFactory -->
<schema name="test" version="1.0">
<types>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<!-- default parameters -->
<fieldType name="text" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.LMJelinekMercerSimilarityFactory"/>
</fieldType>
<!-- with parameters -->
<fieldType name="text_params" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.LMJelinekMercerSimilarityFactory">
<float name="lambda">0.4</float>
</similarity>
</fieldType>
</types>
<fields>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="text_params" type="text_params" indexed="true" stored="false"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -0,0 +1,40 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Test schema file for DefaultSimilarityFactory -->
<schema name="test" version="1.0">
<types>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<!-- TF/IDF -->
<fieldType name="text" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<similarity class="solr.DefaultSimilarityFactory"/>
</fieldType>
</types>
<fields>
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="text" type="text" indexed="true" stored="false"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -406,7 +406,7 @@
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer> </analyzer>
<similarity class="org.apache.solr.schema.CustomSimilarityFactory"> <similarity class="solr.CustomSimilarityFactory">
<str name="echo">is there an echo?</str> <str name="echo">is there an echo?</str>
</similarity> </similarity>
</fieldType> </fieldType>
@ -672,13 +672,13 @@
A custom similarity provider may be specified here, but the default is fine A custom similarity provider may be specified here, but the default is fine
for most applications. for most applications.
--> -->
<similarityProvider class="org.apache.solr.schema.CustomSimilarityProviderFactory"> <similarityProvider class="solr.CustomSimilarityProviderFactory">
<str name="echo">is there an echo?</str> <str name="echo">is there an echo?</str>
</similarityProvider> </similarityProvider>
<!-- default similarity, unless otherwise specified by the fieldType <!-- default similarity, unless otherwise specified by the fieldType
--> -->
<similarity class="org.apache.solr.schema.CustomSimilarityFactory"> <similarity class="solr.CustomSimilarityFactory">
<str name="echo">I am your default sim</str> <str name="echo">I am your default sim</str>
</similarity> </similarity>
</schema> </schema>

View File

@ -17,6 +17,8 @@
limitations under the License. limitations under the License.
--> -->
<!-- a basic solrconfig that tests can use when they want simple minimal solrconfig/schema
DO NOT ADD THINGS TO THIS CONFIG! -->
<config> <config>
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion> <luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler> <requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>

View File

@ -27,6 +27,7 @@ import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.similarities.MockConfigurableSimilarityProvider;
import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.search.similarities.SimilarityProvider;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
@ -80,14 +81,6 @@ public class IndexSchemaTest extends SolrTestCaseJ4 {
clearIndex(); clearIndex();
} }
@Test
public void testSimilarityProviderFactory() {
SolrCore core = h.getCore();
SimilarityProvider similarityProvider = core.getSchema().getSimilarityProvider();
assertTrue("wrong class", similarityProvider instanceof MockConfigurableSimilarityProvider);
assertEquals("is there an echo?", ((MockConfigurableSimilarityProvider)similarityProvider).getPassthrough());
}
@Test @Test
public void testRuntimeFieldCreation() public void testRuntimeFieldCreation()
{ {

View File

@ -73,7 +73,7 @@ public class TestCollationField extends SolrTestCaseJ4 {
confDir.mkdir(); confDir.mkdir();
// copy over configuration files // copy over configuration files
FileUtils.copyFile(getFile("solr/conf/solrconfig-collate.xml"), new File(confDir, "solrconfig.xml")); FileUtils.copyFile(getFile("solr/conf/solrconfig-basic.xml"), new File(confDir, "solrconfig.xml"));
FileUtils.copyFile(getFile("solr/conf/schema-collate.xml"), new File(confDir, "schema.xml")); FileUtils.copyFile(getFile("solr/conf/schema-collate.xml"), new File(confDir, "schema.xml"));
// generate custom collation rules (DIN 5007-2), saving to customrules.dat // generate custom collation rules (DIN 5007-2), saving to customrules.dat

View File

@ -0,0 +1,46 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.SimilarityProvider;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
public abstract class BaseSimilarityTestCase extends SolrTestCaseJ4 {
/** returns the similarity in use for the field */
protected Similarity getSimilarity(String field) {
SolrCore core = h.getCore();
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
Similarity sim = searcher.get().getSimilarityProvider().get(field);
searcher.decref();
return sim;
}
/** returns the (Solr)SimilarityProvider */
protected SimilarityProvider getSimilarityProvider() {
SolrCore core = h.getCore();
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
SimilarityProvider prov = searcher.get().getSimilarityProvider();
searcher.decref();
return prov;
}
}

View File

@ -14,9 +14,10 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.schema; package org.apache.solr.search.similarities;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.schema.SimilarityFactory;
public class CustomSimilarityFactory extends SimilarityFactory { public class CustomSimilarityFactory extends SimilarityFactory {
@Override @Override

View File

@ -14,9 +14,11 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.schema; package org.apache.solr.search.similarities;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SimilarityProviderFactory;
import org.apache.solr.search.SolrSimilarityProvider; import org.apache.solr.search.SolrSimilarityProvider;
public class CustomSimilarityProviderFactory extends SimilarityProviderFactory { public class CustomSimilarityProviderFactory extends SimilarityProviderFactory {

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.schema; package org.apache.solr.search.similarities;
import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.DefaultSimilarity;

View File

@ -14,8 +14,9 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.solr.schema; package org.apache.solr.search.similarities;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.SolrSimilarityProvider; import org.apache.solr.search.SolrSimilarityProvider;
public class MockConfigurableSimilarityProvider extends SolrSimilarityProvider { public class MockConfigurableSimilarityProvider extends SolrSimilarityProvider {

View File

@ -0,0 +1,46 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
import org.junit.BeforeClass;
/**
* Tests {@link BM25SimilarityFactory}
*/
public class TestBM25SimilarityFactory extends BaseSimilarityTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-bm25.xml");
}
/** bm25 with default parameters */
public void test() throws Exception {
assertEquals(BM25Similarity.class, getSimilarity("text").getClass());
}
/** bm25 with parameters */
public void testParameters() throws Exception {
Similarity sim = getSimilarity("text_params");
assertEquals(BM25Similarity.class, sim.getClass());
BM25Similarity bm25 = (BM25Similarity) sim;
assertEquals(1.2f, bm25.getK1(), 0.01f);
assertEquals(0.76f, bm25.getB(), 0.01f);
}
}

View File

@ -0,0 +1,58 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.AfterEffectB;
import org.apache.lucene.search.similarities.BasicModelIF;
import org.apache.lucene.search.similarities.DFRSimilarity;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarities.NormalizationH3;
import org.apache.lucene.search.similarities.Similarity;
import org.junit.BeforeClass;
/**
* Tests {@link DFRSimilarityFactory}
*/
public class TestDFRSimilarityFactory extends BaseSimilarityTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-dfr.xml");
}
/** dfr with default parameters */
public void test() throws Exception {
Similarity sim = getSimilarity("text");
assertEquals(DFRSimilarity.class, sim.getClass());
DFRSimilarity dfr = (DFRSimilarity) sim;
assertEquals(BasicModelIF.class, dfr.getBasicModel().getClass());
assertEquals(AfterEffectB.class, dfr.getAfterEffect().getClass());
assertEquals(NormalizationH2.class, dfr.getNormalization().getClass());
}
/** dfr with parameterized normalization */
public void testParameters() throws Exception {
Similarity sim = getSimilarity("text_params");
assertEquals(DFRSimilarity.class, sim.getClass());
DFRSimilarity dfr = (DFRSimilarity) sim;
assertEquals(BasicModelIF.class, dfr.getBasicModel().getClass());
assertEquals(AfterEffectB.class, dfr.getAfterEffect().getClass());
assertEquals(NormalizationH3.class, dfr.getNormalization().getClass());
NormalizationH3 norm = (NormalizationH3) dfr.getNormalization();
assertEquals(900f, norm.getMu(), 0.01f);
}
}

View File

@ -0,0 +1,36 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.junit.BeforeClass;
/**
* Tests {@link DefaultSimilarityFactory}
*/
public class TestDefaultSimilarityFactory extends BaseSimilarityTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-tfidf.xml");
}
/** default parameters */
public void test() throws Exception {
assertEquals(DefaultSimilarity.class, getSimilarity("text").getClass());
}
}

View File

@ -0,0 +1,60 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.DistributionLL;
import org.apache.lucene.search.similarities.DistributionSPL;
import org.apache.lucene.search.similarities.IBSimilarity;
import org.apache.lucene.search.similarities.LambdaDF;
import org.apache.lucene.search.similarities.LambdaTTF;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarities.NormalizationH3;
import org.apache.lucene.search.similarities.Similarity;
import org.junit.BeforeClass;
/**
* Tests {@link IBSimilarityFactory}
*/
public class TestIBSimilarityFactory extends BaseSimilarityTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-ib.xml");
}
/** spl/df/h2 with default parameters */
public void test() throws Exception {
Similarity sim = getSimilarity("text");
assertEquals(IBSimilarity.class, sim.getClass());
IBSimilarity ib = (IBSimilarity) sim;
assertEquals(DistributionSPL.class, ib.getDistribution().getClass());
assertEquals(LambdaDF.class, ib.getLambda().getClass());
assertEquals(NormalizationH2.class, ib.getNormalization().getClass());
}
/** ll/ttf/h3 with parameterized normalization */
public void testParameters() throws Exception {
Similarity sim = getSimilarity("text_params");
assertEquals(IBSimilarity.class, sim.getClass());
IBSimilarity ib = (IBSimilarity) sim;
assertEquals(DistributionLL.class, ib.getDistribution().getClass());
assertEquals(LambdaTTF.class, ib.getLambda().getClass());
assertEquals(NormalizationH3.class, ib.getNormalization().getClass());
NormalizationH3 norm = (NormalizationH3) ib.getNormalization();
assertEquals(900f, norm.getMu(), 0.01f);
}
}

View File

@ -0,0 +1,45 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.junit.BeforeClass;
/**
* Tests {@link LMDirichletSimilarityFactory}
*/
public class TestLMDirichletSimilarityFactory extends BaseSimilarityTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-lmdirichlet.xml");
}
/** dirichlet with default parameters */
public void test() throws Exception {
assertEquals(LMDirichletSimilarity.class, getSimilarity("text").getClass());
}
/** dirichlet with parameters */
public void testParameters() throws Exception {
Similarity sim = getSimilarity("text_params");
assertEquals(LMDirichletSimilarity.class, sim.getClass());
LMDirichletSimilarity lm = (LMDirichletSimilarity) sim;
assertEquals(1000f, lm.getMu(), 0.01f);
}
}

View File

@ -0,0 +1,45 @@
package org.apache.solr.search.similarities;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.junit.BeforeClass;
/**
* Tests {@link LMJelinekMercerSimilarityFactory}
*/
public class TestLMJelinekMercerSimilarityFactory extends BaseSimilarityTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-basic.xml","schema-lmjelinekmercer.xml");
}
/** jelinek-mercer with default parameters */
public void test() throws Exception {
assertEquals(LMJelinekMercerSimilarity.class, getSimilarity("text").getClass());
}
/** jelinek-mercer with parameters */
public void testParameters() throws Exception {
Similarity sim = getSimilarity("text_params");
assertEquals(LMJelinekMercerSimilarity.class, sim.getClass());
LMJelinekMercerSimilarity lm = (LMJelinekMercerSimilarity) sim;
assertEquals(0.4f, lm.getLambda(), 0.01f);
}
}

View File

@ -1,4 +1,4 @@
package org.apache.solr.schema; package org.apache.solr.search.similarities;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -18,18 +18,16 @@ package org.apache.solr.schema;
*/ */
import org.apache.lucene.misc.SweetSpotSimilarity; import org.apache.lucene.misc.SweetSpotSimilarity;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.SolrTestCaseJ4; import org.apache.lucene.search.similarities.SimilarityProvider;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test;
/** /**
* Tests per-field similarity support in the schema * Tests per-field similarity support in the schema
*/ */
public class TestPerFieldSimilarity extends SolrTestCaseJ4 { public class TestPerFieldSimilarity extends BaseSimilarityTestCase {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
@ -38,69 +36,54 @@ public class TestPerFieldSimilarity extends SolrTestCaseJ4 {
/** test a field where the sim is specified directly */ /** test a field where the sim is specified directly */
public void testDirect() throws Exception { public void testDirect() throws Exception {
SolrCore core = h.getCore(); assertEquals(SweetSpotSimilarity.class, getSimilarity("sim1text").getClass());
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
Similarity sim = searcher.get().getSimilarityProvider().get("sim1text");
assertEquals(SweetSpotSimilarity.class, sim.getClass());
searcher.decref();
} }
/** ... and for a dynamic field */ /** ... and for a dynamic field */
public void testDirectDynamic() throws Exception { public void testDirectDynamic() throws Exception {
SolrCore core = h.getCore(); assertEquals(SweetSpotSimilarity.class, getSimilarity("text_sim1").getClass());
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
Similarity sim = searcher.get().getSimilarityProvider().get("text_sim1");
assertEquals(SweetSpotSimilarity.class, sim.getClass());
searcher.decref();
} }
/** test a field where a configurable sim factory is defined */ /** test a field where a configurable sim factory is defined */
public void testFactory() throws Exception { public void testFactory() throws Exception {
SolrCore core = h.getCore(); Similarity sim = getSimilarity("sim2text");
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
Similarity sim = searcher.get().getSimilarityProvider().get("sim2text");
assertEquals(MockConfigurableSimilarity.class, sim.getClass()); assertEquals(MockConfigurableSimilarity.class, sim.getClass());
assertEquals("is there an echo?", ((MockConfigurableSimilarity)sim).getPassthrough()); assertEquals("is there an echo?", ((MockConfigurableSimilarity)sim).getPassthrough());
searcher.decref();
} }
/** ... and for a dynamic field */ /** ... and for a dynamic field */
public void testFactoryDynamic() throws Exception { public void testFactoryDynamic() throws Exception {
SolrCore core = h.getCore(); Similarity sim = getSimilarity("text_sim2");
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
Similarity sim = searcher.get().getSimilarityProvider().get("text_sim2");
assertEquals(MockConfigurableSimilarity.class, sim.getClass()); assertEquals(MockConfigurableSimilarity.class, sim.getClass());
assertEquals("is there an echo?", ((MockConfigurableSimilarity)sim).getPassthrough()); assertEquals("is there an echo?", ((MockConfigurableSimilarity)sim).getPassthrough());
searcher.decref();
} }
/** test a field where no similarity is specified */ /** test a field where no similarity is specified */
public void testDefaults() throws Exception { public void testDefaults() throws Exception {
SolrCore core = h.getCore(); Similarity sim = getSimilarity("sim3text");
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
Similarity sim = searcher.get().getSimilarityProvider().get("sim3text");
assertEquals(MockConfigurableSimilarity.class, sim.getClass()); assertEquals(MockConfigurableSimilarity.class, sim.getClass());
assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough()); assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough());
searcher.decref();
} }
/** ... and for a dynamic field */ /** ... and for a dynamic field */
public void testDefaultsDynamic() throws Exception { public void testDefaultsDynamic() throws Exception {
SolrCore core = h.getCore(); Similarity sim = getSimilarity("text_sim3");
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
Similarity sim = searcher.get().getSimilarityProvider().get("text_sim3");
assertEquals(MockConfigurableSimilarity.class, sim.getClass()); assertEquals(MockConfigurableSimilarity.class, sim.getClass());
assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough()); assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough());
searcher.decref();
} }
/** test a field that does not exist */ /** test a field that does not exist */
public void testNonexistent() throws Exception { public void testNonexistent() throws Exception {
SolrCore core = h.getCore(); Similarity sim = getSimilarity("sdfdsfdsfdswr5fsdfdsfdsfs");
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
Similarity sim = searcher.get().getSimilarityProvider().get("sdfdsfdsfdswr5fsdfdsfdsfs");
assertEquals(MockConfigurableSimilarity.class, sim.getClass()); assertEquals(MockConfigurableSimilarity.class, sim.getClass());
assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough()); assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough());
searcher.decref(); }
@Test
public void testSimilarityProviderFactory() {
SolrCore core = h.getCore();
SimilarityProvider similarityProvider = core.getSchema().getSimilarityProvider();
assertTrue("wrong class", similarityProvider instanceof MockConfigurableSimilarityProvider);
assertEquals("is there an echo?", ((MockConfigurableSimilarityProvider)similarityProvider).getPassthrough());
} }
} }