mirror of https://github.com/apache/lucene.git
SOLR-2754: add similarity factories for new ranking algorithms
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1173776 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
80654ae7be
commit
2d5d5226a6
|
@ -336,4 +336,12 @@ public class BM25Similarity extends Similarity {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "BM25(k1=" + k1 + ",b=" + b + ")";
|
return "BM25(k1=" + k1 + ",b=" + b + ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public float getK1() {
|
||||||
|
return k1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float getB() {
|
||||||
|
return b;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -83,4 +83,16 @@ public class DFRSimilarity extends SimilarityBase {
|
||||||
return "DFR " + basicModel.toString() + afterEffect.toString()
|
return "DFR " + basicModel.toString() + afterEffect.toString()
|
||||||
+ normalization.toString();
|
+ normalization.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public BasicModel getBasicModel() {
|
||||||
|
return basicModel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AfterEffect getAfterEffect() {
|
||||||
|
return afterEffect;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Normalization getNormalization() {
|
||||||
|
return normalization;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -91,4 +91,16 @@ public class IBSimilarity extends SimilarityBase {
|
||||||
return "IB " + distribution.toString() + "-" + lambda.toString()
|
return "IB " + distribution.toString() + "-" + lambda.toString()
|
||||||
+ normalization.toString();
|
+ normalization.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Distribution getDistribution() {
|
||||||
|
return distribution;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Lambda getLambda() {
|
||||||
|
return lambda;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Normalization getNormalization() {
|
||||||
|
return normalization;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,4 +41,8 @@ public class NormalizationH3 extends Normalization {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "3(" + mu + ")";
|
return "3(" + mu + ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public float getMu() {
|
||||||
|
return mu;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,4 +41,8 @@ public class NormalizationZ extends Normalization {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "Z(" + z + ")";
|
return "Z(" + z + ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public float getZ() {
|
||||||
|
return z;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -171,6 +171,13 @@ New Features
|
||||||
|
|
||||||
* SOLR-2703: Added support for Lucene's "surround" query parser. (Simon Rosenthal, ehatcher)
|
* SOLR-2703: Added support for Lucene's "surround" query parser. (Simon Rosenthal, ehatcher)
|
||||||
|
|
||||||
|
* SOLR-2754: Added factories for several ranking algorithms:
|
||||||
|
BM25SimilarityFactory: Okapi BM25
|
||||||
|
DFRSimilarityFactory: Divergence from Randomness models
|
||||||
|
IBSimilarityFactory: Information-based models
|
||||||
|
LMDirichletSimilarity: LM with Dirichlet smoothing
|
||||||
|
LMJelinekMercerSimilarity: LM with Jelinek-Mercer smoothing
|
||||||
|
(David Mark Nemeskey, Robert Muir)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -470,7 +470,7 @@ public final class IndexSchema {
|
||||||
};
|
};
|
||||||
log.debug("using default similarityProvider");
|
log.debug("using default similarityProvider");
|
||||||
} else {
|
} else {
|
||||||
final Object obj = loader.newInstance(((Element) node).getAttribute("class"));
|
final Object obj = loader.newInstance(((Element) node).getAttribute("class"), "search.similarities.");
|
||||||
// just like always, assume it's a SimilarityProviderFactory and get a ClassCastException - reasonable error handling
|
// just like always, assume it's a SimilarityProviderFactory and get a ClassCastException - reasonable error handling
|
||||||
// configure a factory, get a similarity back
|
// configure a factory, get a similarity back
|
||||||
NamedList<?> args = DOMUtil.childNodesToNamedList(node);
|
NamedList<?> args = DOMUtil.childNodesToNamedList(node);
|
||||||
|
@ -714,7 +714,7 @@ public final class IndexSchema {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
SimilarityFactory similarityFactory;
|
SimilarityFactory similarityFactory;
|
||||||
final Object obj = loader.newInstance(((Element) node).getAttribute("class"));
|
final Object obj = loader.newInstance(((Element) node).getAttribute("class"), "search.similarities.");
|
||||||
if (obj instanceof SimilarityFactory) {
|
if (obj instanceof SimilarityFactory) {
|
||||||
// configure a factory, get a similarity back
|
// configure a factory, get a similarity back
|
||||||
SolrParams params = SolrParams.toSolrParams(DOMUtil.childNodesToNamedList(node));
|
SolrParams params = SolrParams.toSolrParams(DOMUtil.childNodesToNamedList(node));
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.schema.SimilarityFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link BM25Similarity}
|
||||||
|
* <p>
|
||||||
|
* Parameters:
|
||||||
|
* <ul>
|
||||||
|
* <li>k1 (float): Controls non-linear term frequency normalization (saturation).
|
||||||
|
* The default is <code>1.2</code>
|
||||||
|
* <li>b (float): Controls to what degree document length normalizes tf values.
|
||||||
|
* The default is <code>0.75</code>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* Optional settings:
|
||||||
|
* <ul>
|
||||||
|
* <li>discountOverlaps (bool): Sets
|
||||||
|
* {@link BM25Similarity#setDiscountOverlaps(boolean)}</li>
|
||||||
|
* </ul>
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class BM25SimilarityFactory extends SimilarityFactory {
|
||||||
|
private boolean discountOverlaps;
|
||||||
|
private float k1;
|
||||||
|
private float b;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(SolrParams params) {
|
||||||
|
super.init(params);
|
||||||
|
discountOverlaps = params.getBool("discountOverlaps", true);
|
||||||
|
k1 = params.getFloat("k1", 1.2f);
|
||||||
|
b = params.getFloat("b", 0.75f);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Similarity getSimilarity() {
|
||||||
|
BM25Similarity sim = new BM25Similarity(k1, b);
|
||||||
|
sim.setDiscountOverlaps(discountOverlaps);
|
||||||
|
return sim;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,47 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.SimilarityProvider; // javadoc
|
||||||
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.schema.SimilarityProviderFactory;
|
||||||
|
import org.apache.solr.search.SolrSimilarityProvider;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class is aimed at non-VSM models, and therefore both the
|
||||||
|
* {@link SimilarityProvider#coord} and
|
||||||
|
* {@link SimilarityProvider#queryNorm} methods return {@code 1}.
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class BasicSimilarityProviderFactory extends SimilarityProviderFactory {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SolrSimilarityProvider getSimilarityProvider(IndexSchema schema) {
|
||||||
|
return new SolrSimilarityProvider(schema) {
|
||||||
|
@Override
|
||||||
|
public float coord(int overlap, int maxOverlap) {
|
||||||
|
return 1f;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float queryNorm(float sumOfSquaredWeights) {
|
||||||
|
return 1f;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,186 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.AfterEffect;
|
||||||
|
import org.apache.lucene.search.similarities.AfterEffect.NoAfterEffect; // javadoc
|
||||||
|
import org.apache.lucene.search.similarities.AfterEffectB;
|
||||||
|
import org.apache.lucene.search.similarities.AfterEffectL;
|
||||||
|
import org.apache.lucene.search.similarities.BasicModel;
|
||||||
|
import org.apache.lucene.search.similarities.BasicModelBE;
|
||||||
|
import org.apache.lucene.search.similarities.BasicModelD;
|
||||||
|
import org.apache.lucene.search.similarities.BasicModelG;
|
||||||
|
import org.apache.lucene.search.similarities.BasicModelIF;
|
||||||
|
import org.apache.lucene.search.similarities.BasicModelIn;
|
||||||
|
import org.apache.lucene.search.similarities.BasicModelIne;
|
||||||
|
import org.apache.lucene.search.similarities.BasicModelP;
|
||||||
|
import org.apache.lucene.search.similarities.DFRSimilarity;
|
||||||
|
import org.apache.lucene.search.similarities.Normalization;
|
||||||
|
import org.apache.lucene.search.similarities.Normalization.NoNormalization; // javadoc
|
||||||
|
import org.apache.lucene.search.similarities.NormalizationH1;
|
||||||
|
import org.apache.lucene.search.similarities.NormalizationH2;
|
||||||
|
import org.apache.lucene.search.similarities.NormalizationH3;
|
||||||
|
import org.apache.lucene.search.similarities.NormalizationZ;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.schema.SimilarityFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link DFRSimilarity}
|
||||||
|
* <p>
|
||||||
|
* You must specify the implementations for all three components of
|
||||||
|
* DFR (strings). In general the models are parameter-free, but two of the
|
||||||
|
* normalizations take floating point parameters (see below):
|
||||||
|
* <ol>
|
||||||
|
* <li>{@link BasicModel basicModel}: Basic model of information content:
|
||||||
|
* <ul>
|
||||||
|
* <li>{@link BasicModelBE Be}: Limiting form of Bose-Einstein
|
||||||
|
* <li>{@link BasicModelG G}: Geometric approximation of Bose-Einstein
|
||||||
|
* <li>{@link BasicModelP P}: Poisson approximation of the Binomial
|
||||||
|
* <li>{@link BasicModelD D}: Divergence approximation of the Binomial
|
||||||
|
* <li>{@link BasicModelIn I(n)}: Inverse document frequency
|
||||||
|
* <li>{@link BasicModelIne I(ne)}: Inverse expected document
|
||||||
|
* frequency [mixture of Poisson and IDF]
|
||||||
|
* <li>{@link BasicModelIF I(F)}: Inverse term frequency
|
||||||
|
* [approximation of I(ne)]
|
||||||
|
* </ul>
|
||||||
|
* <li>{@link AfterEffect afterEffect}: First normalization of information
|
||||||
|
* gain:
|
||||||
|
* <ul>
|
||||||
|
* <li>{@link AfterEffectL L}: Laplace's law of succession
|
||||||
|
* <li>{@link AfterEffectB B}: Ratio of two Bernoulli processes
|
||||||
|
* <li>{@link NoAfterEffect none}: no first normalization
|
||||||
|
* </ul>
|
||||||
|
* <li>{@link Normalization normalization}: Second (length) normalization:
|
||||||
|
* <ul>
|
||||||
|
* <li>{@link NormalizationH1 H1}: Uniform distribution of term
|
||||||
|
* frequency
|
||||||
|
* <li>{@link NormalizationH2 H2}: term frequency density inversely
|
||||||
|
* related to length
|
||||||
|
* <li>{@link NormalizationH3 H3}: term frequency normalization
|
||||||
|
* provided by Dirichlet prior
|
||||||
|
* <ul>
|
||||||
|
* <li>parameter mu (float): smoothing parameter μ. The
|
||||||
|
* default is <code>800</code>
|
||||||
|
* </ul>
|
||||||
|
* <li>{@link NormalizationZ Z}: term frequency normalization provided
|
||||||
|
* by a Zipfian relation
|
||||||
|
* <ul>
|
||||||
|
* <li>parameter z (float): represents <code>A/(A+1)</code>
|
||||||
|
* where A measures the specificity of the language.
|
||||||
|
* The default is <code>0.3</code>
|
||||||
|
* </ul>
|
||||||
|
* <li>{@link NoNormalization none}: no second normalization
|
||||||
|
* </ul>
|
||||||
|
* </ol>
|
||||||
|
* <p>
|
||||||
|
* <p>
|
||||||
|
* Optional settings:
|
||||||
|
* <ul>
|
||||||
|
* <li>discountOverlaps (bool): Sets
|
||||||
|
* {@link DFRSimilarity#setDiscountOverlaps(boolean)}</li>
|
||||||
|
* </ul>
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class DFRSimilarityFactory extends SimilarityFactory {
|
||||||
|
private boolean discountOverlaps;
|
||||||
|
private BasicModel basicModel;
|
||||||
|
private AfterEffect afterEffect;
|
||||||
|
private Normalization normalization;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(SolrParams params) {
|
||||||
|
super.init(params);
|
||||||
|
discountOverlaps = params.getBool("discountOverlaps", true);
|
||||||
|
basicModel = parseBasicModel(params.get("basicModel"));
|
||||||
|
afterEffect = parseAfterEffect(params.get("afterEffect"));
|
||||||
|
normalization = parseNormalization(
|
||||||
|
params.get("normalization"), params.get("mu"), params.get("z"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private BasicModel parseBasicModel(String expr) {
|
||||||
|
if ("Be".equals(expr)) {
|
||||||
|
return new BasicModelBE();
|
||||||
|
} else if ("D".equals(expr)) {
|
||||||
|
return new BasicModelD();
|
||||||
|
} else if ("G".equals(expr)) {
|
||||||
|
return new BasicModelG();
|
||||||
|
} else if ("I(F)".equals(expr)) {
|
||||||
|
return new BasicModelIF();
|
||||||
|
} else if ("I(n)".equals(expr)) {
|
||||||
|
return new BasicModelIn();
|
||||||
|
} else if ("I(ne)".equals(expr)) {
|
||||||
|
return new BasicModelIne();
|
||||||
|
} else if ("P".equals(expr)) {
|
||||||
|
return new BasicModelP();
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("Invalid basicModel: " + expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private AfterEffect parseAfterEffect(String expr) {
|
||||||
|
if ("B".equals(expr)) {
|
||||||
|
return new AfterEffectB();
|
||||||
|
} else if ("L".equals(expr)) {
|
||||||
|
return new AfterEffectL();
|
||||||
|
} else if ("none".equals(expr)) {
|
||||||
|
return new AfterEffect.NoAfterEffect();
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("Invalid afterEffect: " + expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// also used by IBSimilarityFactory
|
||||||
|
static Normalization parseNormalization(String expr, String mu, String z) {
|
||||||
|
if (mu != null && z != null) {
|
||||||
|
throw new RuntimeException(
|
||||||
|
"specifying mu and z make no sense for: " + expr);
|
||||||
|
}
|
||||||
|
if (mu != null && !"H3".equals(expr)) {
|
||||||
|
throw new RuntimeException(
|
||||||
|
"parameter mu only makes sense for normalization H3");
|
||||||
|
}
|
||||||
|
if (z != null && !"Z".equals(expr)) {
|
||||||
|
throw new RuntimeException(
|
||||||
|
"parameter z only makes sense for normalization Z");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("H1".equals(expr)) {
|
||||||
|
return new NormalizationH1();
|
||||||
|
} else if ("H2".equals(expr)) {
|
||||||
|
return new NormalizationH2();
|
||||||
|
} else if ("H3".equals(expr)) {
|
||||||
|
return (mu != null) ? new NormalizationH3(Float.parseFloat(mu))
|
||||||
|
: new NormalizationH3();
|
||||||
|
} else if ("Z".equals(expr)) {
|
||||||
|
return (z != null) ? new NormalizationZ(Float.parseFloat(z))
|
||||||
|
: new NormalizationZ();
|
||||||
|
} else if ("none".equals(expr)) {
|
||||||
|
return new Normalization.NoNormalization();
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("Invalid normalization: " + expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Similarity getSimilarity() {
|
||||||
|
DFRSimilarity sim = new DFRSimilarity(basicModel, afterEffect, normalization);
|
||||||
|
sim.setDiscountOverlaps(discountOverlaps);
|
||||||
|
return sim;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,55 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.lucene.search.similarities.TFIDFSimilarity; // javadoc
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.schema.SimilarityFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link DefaultSimilarity}
|
||||||
|
* <p>
|
||||||
|
* DefaultSimilarity is Lucene's default scoring implementation, based
|
||||||
|
* upon the Vector Space Model.
|
||||||
|
* <p>
|
||||||
|
* Optional settings:
|
||||||
|
* <ul>
|
||||||
|
* <li>discountOverlaps (bool): Sets
|
||||||
|
* {@link DefaultSimilarity#setDiscountOverlaps(boolean)}</li>
|
||||||
|
* </ul>
|
||||||
|
* @see TFIDFSimilarity
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class DefaultSimilarityFactory extends SimilarityFactory {
|
||||||
|
private boolean discountOverlaps;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(SolrParams params) {
|
||||||
|
super.init(params);
|
||||||
|
discountOverlaps = params.getBool("discountOverlaps", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Similarity getSimilarity() {
|
||||||
|
DefaultSimilarity sim = new DefaultSimilarity();
|
||||||
|
sim.setDiscountOverlaps(discountOverlaps);
|
||||||
|
return sim;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,111 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.Distribution;
|
||||||
|
import org.apache.lucene.search.similarities.DistributionLL;
|
||||||
|
import org.apache.lucene.search.similarities.DistributionSPL;
|
||||||
|
import org.apache.lucene.search.similarities.IBSimilarity;
|
||||||
|
import org.apache.lucene.search.similarities.Lambda;
|
||||||
|
import org.apache.lucene.search.similarities.LambdaDF;
|
||||||
|
import org.apache.lucene.search.similarities.LambdaTTF;
|
||||||
|
import org.apache.lucene.search.similarities.Normalization;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.schema.SimilarityFactory;
|
||||||
|
import org.apache.solr.search.similarities.DFRSimilarityFactory; // javadoc
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link IBSimilarity}
|
||||||
|
* <p>
|
||||||
|
* You must specify the implementations for all three components of the
|
||||||
|
* Information-Based model (strings).
|
||||||
|
* <ol>
|
||||||
|
* <li>{@link Distribution distribution}: Probabilistic distribution used to
|
||||||
|
* model term occurrence
|
||||||
|
* <ul>
|
||||||
|
* <li>{@link DistributionLL LL}: Log-logistic</li>
|
||||||
|
* <li>{@link DistributionLL SPL}: Smoothed power-law</li>
|
||||||
|
* </ul>
|
||||||
|
* </li>
|
||||||
|
* <li>{@link Lambda lambda}: λ<sub>w</sub> parameter of the
|
||||||
|
* probability distribution
|
||||||
|
* <ul>
|
||||||
|
* <li>{@link LambdaDF DF}: <code>N<sub>w</sub>/N</code> or average
|
||||||
|
* number of documents where w occurs</li>
|
||||||
|
* <li>{@link LambdaTTF TTF}: <code>F<sub>w</sub>/N</code> or
|
||||||
|
* average number of occurrences of w in the collection</li>
|
||||||
|
* </ul>
|
||||||
|
* </li>
|
||||||
|
* <li>{@link Normalization normalization}: Term frequency normalization
|
||||||
|
* <blockquote>Any supported DFR normalization listed in
|
||||||
|
{@link DFRSimilarityFactory}</blockquote>
|
||||||
|
</li>
|
||||||
|
* </ol>
|
||||||
|
* <p>
|
||||||
|
* Optional settings:
|
||||||
|
* <ul>
|
||||||
|
* <li>discountOverlaps (bool): Sets
|
||||||
|
* {@link IBSimilarity#setDiscountOverlaps(boolean)}</li>
|
||||||
|
* </ul>
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class IBSimilarityFactory extends SimilarityFactory {
|
||||||
|
private boolean discountOverlaps;
|
||||||
|
private Distribution distribution;
|
||||||
|
private Lambda lambda;
|
||||||
|
private Normalization normalization;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(SolrParams params) {
|
||||||
|
super.init(params);
|
||||||
|
discountOverlaps = params.getBool("discountOverlaps", true);
|
||||||
|
distribution = parseDistribution(params.get("distribution"));
|
||||||
|
lambda = parseLambda(params.get("lambda"));
|
||||||
|
normalization = DFRSimilarityFactory.parseNormalization(
|
||||||
|
params.get("normalization"), params.get("mu"), params.get("z"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Distribution parseDistribution(String expr) {
|
||||||
|
if ("LL".equals(expr)) {
|
||||||
|
return new DistributionLL();
|
||||||
|
} else if ("SPL".equals(expr)) {
|
||||||
|
return new DistributionSPL();
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("Invalid distribution: " + expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Lambda parseLambda(String expr) {
|
||||||
|
if ("DF".equals(expr)) {
|
||||||
|
return new LambdaDF();
|
||||||
|
} else if ("TTF".equals(expr)) {
|
||||||
|
return new LambdaTTF();
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException("Invalid lambda: " + expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Similarity getSimilarity() {
|
||||||
|
IBSimilarity sim = new IBSimilarity(distribution, lambda, normalization);
|
||||||
|
sim.setDiscountOverlaps(discountOverlaps);
|
||||||
|
return sim;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,60 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.schema.SimilarityFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link LMDirichletSimilarity}
|
||||||
|
* <p>
|
||||||
|
* Parameters:
|
||||||
|
* <ul>
|
||||||
|
* <li>parameter mu (float): smoothing parameter μ. The default is
|
||||||
|
* <code>2000</code></li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* Optional settings:
|
||||||
|
* <ul>
|
||||||
|
* <li>discountOverlaps (bool): Sets
|
||||||
|
* {@link LMDirichletSimilarity#setDiscountOverlaps(boolean)}</li>
|
||||||
|
* </ul>
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class LMDirichletSimilarityFactory extends SimilarityFactory {
|
||||||
|
private boolean discountOverlaps;
|
||||||
|
private Float mu;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(SolrParams params) {
|
||||||
|
super.init(params);
|
||||||
|
discountOverlaps = params.getBool("discountOverlaps", true);
|
||||||
|
mu = params.getFloat("mu");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Similarity getSimilarity() {
|
||||||
|
LMDirichletSimilarity sim = (mu != null) ? new LMDirichletSimilarity(mu)
|
||||||
|
: new LMDirichletSimilarity();
|
||||||
|
sim.setDiscountOverlaps(discountOverlaps);
|
||||||
|
return sim;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,59 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.schema.SimilarityFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link LMJelinekMercerSimilarity}
|
||||||
|
* <p>
|
||||||
|
* Parameters:
|
||||||
|
* <ul>
|
||||||
|
* <li>parameter lambda (float): smoothing parameter λ. The default
|
||||||
|
* is <code>0.7</code></li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* Optional settings:
|
||||||
|
* <ul>
|
||||||
|
* <li>discountOverlaps (bool): Sets
|
||||||
|
* {@link LMJelinekMercerSimilarity#setDiscountOverlaps(boolean)}</li>
|
||||||
|
* </ul>
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class LMJelinekMercerSimilarityFactory extends SimilarityFactory {
|
||||||
|
private boolean discountOverlaps;
|
||||||
|
private float lambda;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(SolrParams params) {
|
||||||
|
super.init(params);
|
||||||
|
discountOverlaps = params.getBool("discountOverlaps", true);
|
||||||
|
lambda = params.getFloat("lambda", 0.7f);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Similarity getSimilarity() {
|
||||||
|
LMJelinekMercerSimilarity sim = new LMJelinekMercerSimilarity(lambda);
|
||||||
|
sim.setDiscountOverlaps(discountOverlaps);
|
||||||
|
return sim;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,50 @@
|
||||||
|
<?xml version="1.0" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Test schema file for BM25SimilarityFactory -->
|
||||||
|
|
||||||
|
<schema name="test" version="1.0">
|
||||||
|
<types>
|
||||||
|
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
|
||||||
|
<!-- default parameters -->
|
||||||
|
<fieldType name="text" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
<similarity class="solr.BM25SimilarityFactory"/>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- with parameters -->
|
||||||
|
<fieldType name="text_params" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
<similarity class="solr.BM25SimilarityFactory">
|
||||||
|
<float name="k1">1.2</float>
|
||||||
|
<float name="b">0.76</float>
|
||||||
|
</similarity>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
</types>
|
||||||
|
|
||||||
|
<fields>
|
||||||
|
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||||
|
<field name="text" type="text" indexed="true" stored="false"/>
|
||||||
|
<field name="text_params" type="text_params" indexed="true" stored="false"/>
|
||||||
|
</fields>
|
||||||
|
|
||||||
|
<defaultSearchField>text</defaultSearchField>
|
||||||
|
<uniqueKey>id</uniqueKey>
|
||||||
|
</schema>
|
|
@ -0,0 +1,56 @@
|
||||||
|
<?xml version="1.0" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Test schema file for DFRSimilarityFactory -->
|
||||||
|
|
||||||
|
<schema name="test" version="1.0">
|
||||||
|
<types>
|
||||||
|
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
|
||||||
|
<!-- default parameters -->
|
||||||
|
<fieldType name="text" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
<similarity class="solr.DFRSimilarityFactory">
|
||||||
|
<str name="basicModel">I(F)</str>
|
||||||
|
<str name="afterEffect">B</str>
|
||||||
|
<str name="normalization">H2</str>
|
||||||
|
</similarity>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- with parameters -->
|
||||||
|
<fieldType name="text_params" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
<similarity class="solr.DFRSimilarityFactory">
|
||||||
|
<str name="basicModel">I(F)</str>
|
||||||
|
<str name="afterEffect">B</str>
|
||||||
|
<str name="normalization">H3</str>
|
||||||
|
<float name="mu">900</float>
|
||||||
|
</similarity>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
</types>
|
||||||
|
|
||||||
|
<fields>
|
||||||
|
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||||
|
<field name="text" type="text" indexed="true" stored="false"/>
|
||||||
|
<field name="text_params" type="text_params" indexed="true" stored="false"/>
|
||||||
|
</fields>
|
||||||
|
|
||||||
|
<defaultSearchField>text</defaultSearchField>
|
||||||
|
<uniqueKey>id</uniqueKey>
|
||||||
|
</schema>
|
|
@ -0,0 +1,56 @@
|
||||||
|
<?xml version="1.0" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Test schema file for IBSimilarityFactory -->
|
||||||
|
|
||||||
|
<schema name="test" version="1.0">
|
||||||
|
<types>
|
||||||
|
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
|
||||||
|
<!-- default parameters -->
|
||||||
|
<fieldType name="text" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
<similarity class="solr.IBSimilarityFactory">
|
||||||
|
<str name="distribution">SPL</str>
|
||||||
|
<str name="lambda">DF</str>
|
||||||
|
<str name="normalization">H2</str>
|
||||||
|
</similarity>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- with parameters -->
|
||||||
|
<fieldType name="text_params" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
<similarity class="solr.IBSimilarityFactory">
|
||||||
|
<str name="distribution">LL</str>
|
||||||
|
<str name="lambda">TTF</str>
|
||||||
|
<str name="normalization">H3</str>
|
||||||
|
<float name="mu">900</float>
|
||||||
|
</similarity>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
</types>
|
||||||
|
|
||||||
|
<fields>
|
||||||
|
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||||
|
<field name="text" type="text" indexed="true" stored="false"/>
|
||||||
|
<field name="text_params" type="text_params" indexed="true" stored="false"/>
|
||||||
|
</fields>
|
||||||
|
|
||||||
|
<defaultSearchField>text</defaultSearchField>
|
||||||
|
<uniqueKey>id</uniqueKey>
|
||||||
|
</schema>
|
|
@ -0,0 +1,49 @@
|
||||||
|
<?xml version="1.0" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Test schema file for LMDirichletSimilarityFactory -->
|
||||||
|
|
||||||
|
<schema name="test" version="1.0">
|
||||||
|
<types>
|
||||||
|
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
|
||||||
|
<!-- default parameters -->
|
||||||
|
<fieldType name="text" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
<similarity class="solr.LMDirichletSimilarityFactory"/>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- with parameters -->
|
||||||
|
<fieldType name="text_params" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
<similarity class="solr.LMDirichletSimilarityFactory">
|
||||||
|
<float name="mu">1000</float>
|
||||||
|
</similarity>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
</types>
|
||||||
|
|
||||||
|
<fields>
|
||||||
|
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||||
|
<field name="text" type="text" indexed="true" stored="false"/>
|
||||||
|
<field name="text_params" type="text_params" indexed="true" stored="false"/>
|
||||||
|
</fields>
|
||||||
|
|
||||||
|
<defaultSearchField>text</defaultSearchField>
|
||||||
|
<uniqueKey>id</uniqueKey>
|
||||||
|
</schema>
|
|
@ -0,0 +1,49 @@
|
||||||
|
<?xml version="1.0" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Test schema file for LMJelinekMercerSimilarityFactory -->
|
||||||
|
|
||||||
|
<schema name="test" version="1.0">
|
||||||
|
<types>
|
||||||
|
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
|
||||||
|
<!-- default parameters -->
|
||||||
|
<fieldType name="text" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
<similarity class="solr.LMJelinekMercerSimilarityFactory"/>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- with parameters -->
|
||||||
|
<fieldType name="text_params" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
<similarity class="solr.LMJelinekMercerSimilarityFactory">
|
||||||
|
<float name="lambda">0.4</float>
|
||||||
|
</similarity>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
</types>
|
||||||
|
|
||||||
|
<fields>
|
||||||
|
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||||
|
<field name="text" type="text" indexed="true" stored="false"/>
|
||||||
|
<field name="text_params" type="text_params" indexed="true" stored="false"/>
|
||||||
|
</fields>
|
||||||
|
|
||||||
|
<defaultSearchField>text</defaultSearchField>
|
||||||
|
<uniqueKey>id</uniqueKey>
|
||||||
|
</schema>
|
|
@ -0,0 +1,40 @@
|
||||||
|
<?xml version="1.0" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Test schema file for DefaultSimilarityFactory -->
|
||||||
|
|
||||||
|
<schema name="test" version="1.0">
|
||||||
|
<types>
|
||||||
|
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
|
||||||
|
<!-- TF/IDF -->
|
||||||
|
<fieldType name="text" class="solr.TextField">
|
||||||
|
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
|
||||||
|
<similarity class="solr.DefaultSimilarityFactory"/>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
</types>
|
||||||
|
|
||||||
|
<fields>
|
||||||
|
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||||
|
<field name="text" type="text" indexed="true" stored="false"/>
|
||||||
|
</fields>
|
||||||
|
|
||||||
|
<defaultSearchField>text</defaultSearchField>
|
||||||
|
<uniqueKey>id</uniqueKey>
|
||||||
|
</schema>
|
|
@ -406,7 +406,7 @@
|
||||||
<analyzer>
|
<analyzer>
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
<similarity class="org.apache.solr.schema.CustomSimilarityFactory">
|
<similarity class="solr.CustomSimilarityFactory">
|
||||||
<str name="echo">is there an echo?</str>
|
<str name="echo">is there an echo?</str>
|
||||||
</similarity>
|
</similarity>
|
||||||
</fieldType>
|
</fieldType>
|
||||||
|
@ -672,13 +672,13 @@
|
||||||
A custom similarity provider may be specified here, but the default is fine
|
A custom similarity provider may be specified here, but the default is fine
|
||||||
for most applications.
|
for most applications.
|
||||||
-->
|
-->
|
||||||
<similarityProvider class="org.apache.solr.schema.CustomSimilarityProviderFactory">
|
<similarityProvider class="solr.CustomSimilarityProviderFactory">
|
||||||
<str name="echo">is there an echo?</str>
|
<str name="echo">is there an echo?</str>
|
||||||
</similarityProvider>
|
</similarityProvider>
|
||||||
|
|
||||||
<!-- default similarity, unless otherwise specified by the fieldType
|
<!-- default similarity, unless otherwise specified by the fieldType
|
||||||
-->
|
-->
|
||||||
<similarity class="org.apache.solr.schema.CustomSimilarityFactory">
|
<similarity class="solr.CustomSimilarityFactory">
|
||||||
<str name="echo">I am your default sim</str>
|
<str name="echo">I am your default sim</str>
|
||||||
</similarity>
|
</similarity>
|
||||||
</schema>
|
</schema>
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
-->
|
-->
|
||||||
|
|
||||||
|
<!-- a basic solrconfig that tests can use when they want simple minimal solrconfig/schema
|
||||||
|
DO NOT ADD THINGS TO THIS CONFIG! -->
|
||||||
<config>
|
<config>
|
||||||
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
|
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
|
||||||
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
|
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
|
|
@ -27,6 +27,7 @@ import org.apache.solr.common.params.MapSolrParams;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.search.similarities.MockConfigurableSimilarityProvider;
|
||||||
import org.apache.lucene.search.similarities.SimilarityProvider;
|
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -80,14 +81,6 @@ public class IndexSchemaTest extends SolrTestCaseJ4 {
|
||||||
clearIndex();
|
clearIndex();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testSimilarityProviderFactory() {
|
|
||||||
SolrCore core = h.getCore();
|
|
||||||
SimilarityProvider similarityProvider = core.getSchema().getSimilarityProvider();
|
|
||||||
assertTrue("wrong class", similarityProvider instanceof MockConfigurableSimilarityProvider);
|
|
||||||
assertEquals("is there an echo?", ((MockConfigurableSimilarityProvider)similarityProvider).getPassthrough());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRuntimeFieldCreation()
|
public void testRuntimeFieldCreation()
|
||||||
{
|
{
|
||||||
|
|
|
@ -73,7 +73,7 @@ public class TestCollationField extends SolrTestCaseJ4 {
|
||||||
confDir.mkdir();
|
confDir.mkdir();
|
||||||
|
|
||||||
// copy over configuration files
|
// copy over configuration files
|
||||||
FileUtils.copyFile(getFile("solr/conf/solrconfig-collate.xml"), new File(confDir, "solrconfig.xml"));
|
FileUtils.copyFile(getFile("solr/conf/solrconfig-basic.xml"), new File(confDir, "solrconfig.xml"));
|
||||||
FileUtils.copyFile(getFile("solr/conf/schema-collate.xml"), new File(confDir, "schema.xml"));
|
FileUtils.copyFile(getFile("solr/conf/schema-collate.xml"), new File(confDir, "schema.xml"));
|
||||||
|
|
||||||
// generate custom collation rules (DIN 5007-2), saving to customrules.dat
|
// generate custom collation rules (DIN 5007-2), saving to customrules.dat
|
||||||
|
|
|
@ -0,0 +1,46 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
import org.apache.solr.util.RefCounted;
|
||||||
|
|
||||||
|
public abstract class BaseSimilarityTestCase extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
/** returns the similarity in use for the field */
|
||||||
|
protected Similarity getSimilarity(String field) {
|
||||||
|
SolrCore core = h.getCore();
|
||||||
|
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
||||||
|
Similarity sim = searcher.get().getSimilarityProvider().get(field);
|
||||||
|
searcher.decref();
|
||||||
|
return sim;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns the (Solr)SimilarityProvider */
|
||||||
|
protected SimilarityProvider getSimilarityProvider() {
|
||||||
|
SolrCore core = h.getCore();
|
||||||
|
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
||||||
|
SimilarityProvider prov = searcher.get().getSimilarityProvider();
|
||||||
|
searcher.decref();
|
||||||
|
return prov;
|
||||||
|
}
|
||||||
|
}
|
|
@ -14,9 +14,10 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.schema;
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.solr.schema.SimilarityFactory;
|
||||||
|
|
||||||
public class CustomSimilarityFactory extends SimilarityFactory {
|
public class CustomSimilarityFactory extends SimilarityFactory {
|
||||||
@Override
|
@Override
|
|
@ -14,9 +14,11 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.schema;
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.schema.SimilarityProviderFactory;
|
||||||
import org.apache.solr.search.SolrSimilarityProvider;
|
import org.apache.solr.search.SolrSimilarityProvider;
|
||||||
|
|
||||||
public class CustomSimilarityProviderFactory extends SimilarityProviderFactory {
|
public class CustomSimilarityProviderFactory extends SimilarityProviderFactory {
|
|
@ -14,7 +14,7 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.schema;
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||||
|
|
|
@ -14,8 +14,9 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.schema;
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.search.SolrSimilarityProvider;
|
import org.apache.solr.search.SolrSimilarityProvider;
|
||||||
|
|
||||||
public class MockConfigurableSimilarityProvider extends SolrSimilarityProvider {
|
public class MockConfigurableSimilarityProvider extends SolrSimilarityProvider {
|
|
@ -0,0 +1,46 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests {@link BM25SimilarityFactory}
|
||||||
|
*/
|
||||||
|
public class TestBM25SimilarityFactory extends BaseSimilarityTestCase {
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig-basic.xml","schema-bm25.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** bm25 with default parameters */
|
||||||
|
public void test() throws Exception {
|
||||||
|
assertEquals(BM25Similarity.class, getSimilarity("text").getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** bm25 with parameters */
|
||||||
|
public void testParameters() throws Exception {
|
||||||
|
Similarity sim = getSimilarity("text_params");
|
||||||
|
assertEquals(BM25Similarity.class, sim.getClass());
|
||||||
|
BM25Similarity bm25 = (BM25Similarity) sim;
|
||||||
|
assertEquals(1.2f, bm25.getK1(), 0.01f);
|
||||||
|
assertEquals(0.76f, bm25.getB(), 0.01f);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.AfterEffectB;
|
||||||
|
import org.apache.lucene.search.similarities.BasicModelIF;
|
||||||
|
import org.apache.lucene.search.similarities.DFRSimilarity;
|
||||||
|
import org.apache.lucene.search.similarities.NormalizationH2;
|
||||||
|
import org.apache.lucene.search.similarities.NormalizationH3;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests {@link DFRSimilarityFactory}
|
||||||
|
*/
|
||||||
|
public class TestDFRSimilarityFactory extends BaseSimilarityTestCase {
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig-basic.xml","schema-dfr.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** dfr with default parameters */
|
||||||
|
public void test() throws Exception {
|
||||||
|
Similarity sim = getSimilarity("text");
|
||||||
|
assertEquals(DFRSimilarity.class, sim.getClass());
|
||||||
|
DFRSimilarity dfr = (DFRSimilarity) sim;
|
||||||
|
assertEquals(BasicModelIF.class, dfr.getBasicModel().getClass());
|
||||||
|
assertEquals(AfterEffectB.class, dfr.getAfterEffect().getClass());
|
||||||
|
assertEquals(NormalizationH2.class, dfr.getNormalization().getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** dfr with parameterized normalization */
|
||||||
|
public void testParameters() throws Exception {
|
||||||
|
Similarity sim = getSimilarity("text_params");
|
||||||
|
assertEquals(DFRSimilarity.class, sim.getClass());
|
||||||
|
DFRSimilarity dfr = (DFRSimilarity) sim;
|
||||||
|
assertEquals(BasicModelIF.class, dfr.getBasicModel().getClass());
|
||||||
|
assertEquals(AfterEffectB.class, dfr.getAfterEffect().getClass());
|
||||||
|
assertEquals(NormalizationH3.class, dfr.getNormalization().getClass());
|
||||||
|
NormalizationH3 norm = (NormalizationH3) dfr.getNormalization();
|
||||||
|
assertEquals(900f, norm.getMu(), 0.01f);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests {@link DefaultSimilarityFactory}
|
||||||
|
*/
|
||||||
|
public class TestDefaultSimilarityFactory extends BaseSimilarityTestCase {
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig-basic.xml","schema-tfidf.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** default parameters */
|
||||||
|
public void test() throws Exception {
|
||||||
|
assertEquals(DefaultSimilarity.class, getSimilarity("text").getClass());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,60 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.DistributionLL;
|
||||||
|
import org.apache.lucene.search.similarities.DistributionSPL;
|
||||||
|
import org.apache.lucene.search.similarities.IBSimilarity;
|
||||||
|
import org.apache.lucene.search.similarities.LambdaDF;
|
||||||
|
import org.apache.lucene.search.similarities.LambdaTTF;
|
||||||
|
import org.apache.lucene.search.similarities.NormalizationH2;
|
||||||
|
import org.apache.lucene.search.similarities.NormalizationH3;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests {@link IBSimilarityFactory}
|
||||||
|
*/
|
||||||
|
public class TestIBSimilarityFactory extends BaseSimilarityTestCase {
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig-basic.xml","schema-ib.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** spl/df/h2 with default parameters */
|
||||||
|
public void test() throws Exception {
|
||||||
|
Similarity sim = getSimilarity("text");
|
||||||
|
assertEquals(IBSimilarity.class, sim.getClass());
|
||||||
|
IBSimilarity ib = (IBSimilarity) sim;
|
||||||
|
assertEquals(DistributionSPL.class, ib.getDistribution().getClass());
|
||||||
|
assertEquals(LambdaDF.class, ib.getLambda().getClass());
|
||||||
|
assertEquals(NormalizationH2.class, ib.getNormalization().getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** ll/ttf/h3 with parameterized normalization */
|
||||||
|
public void testParameters() throws Exception {
|
||||||
|
Similarity sim = getSimilarity("text_params");
|
||||||
|
assertEquals(IBSimilarity.class, sim.getClass());
|
||||||
|
IBSimilarity ib = (IBSimilarity) sim;
|
||||||
|
assertEquals(DistributionLL.class, ib.getDistribution().getClass());
|
||||||
|
assertEquals(LambdaTTF.class, ib.getLambda().getClass());
|
||||||
|
assertEquals(NormalizationH3.class, ib.getNormalization().getClass());
|
||||||
|
NormalizationH3 norm = (NormalizationH3) ib.getNormalization();
|
||||||
|
assertEquals(900f, norm.getMu(), 0.01f);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests {@link LMDirichletSimilarityFactory}
|
||||||
|
*/
|
||||||
|
public class TestLMDirichletSimilarityFactory extends BaseSimilarityTestCase {
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig-basic.xml","schema-lmdirichlet.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** dirichlet with default parameters */
|
||||||
|
public void test() throws Exception {
|
||||||
|
assertEquals(LMDirichletSimilarity.class, getSimilarity("text").getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** dirichlet with parameters */
|
||||||
|
public void testParameters() throws Exception {
|
||||||
|
Similarity sim = getSimilarity("text_params");
|
||||||
|
assertEquals(LMDirichletSimilarity.class, sim.getClass());
|
||||||
|
LMDirichletSimilarity lm = (LMDirichletSimilarity) sim;
|
||||||
|
assertEquals(1000f, lm.getMu(), 0.01f);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests {@link LMJelinekMercerSimilarityFactory}
|
||||||
|
*/
|
||||||
|
public class TestLMJelinekMercerSimilarityFactory extends BaseSimilarityTestCase {
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
initCore("solrconfig-basic.xml","schema-lmjelinekmercer.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** jelinek-mercer with default parameters */
|
||||||
|
public void test() throws Exception {
|
||||||
|
assertEquals(LMJelinekMercerSimilarity.class, getSimilarity("text").getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** jelinek-mercer with parameters */
|
||||||
|
public void testParameters() throws Exception {
|
||||||
|
Similarity sim = getSimilarity("text_params");
|
||||||
|
assertEquals(LMJelinekMercerSimilarity.class, sim.getClass());
|
||||||
|
LMJelinekMercerSimilarity lm = (LMJelinekMercerSimilarity) sim;
|
||||||
|
assertEquals(0.4f, lm.getLambda(), 0.01f);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.solr.schema;
|
package org.apache.solr.search.similarities;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -18,18 +18,16 @@ package org.apache.solr.schema;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.misc.SweetSpotSimilarity;
|
import org.apache.lucene.misc.SweetSpotSimilarity;
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
|
||||||
import org.apache.solr.util.RefCounted;
|
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests per-field similarity support in the schema
|
* Tests per-field similarity support in the schema
|
||||||
*/
|
*/
|
||||||
public class TestPerFieldSimilarity extends SolrTestCaseJ4 {
|
public class TestPerFieldSimilarity extends BaseSimilarityTestCase {
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeClass() throws Exception {
|
public static void beforeClass() throws Exception {
|
||||||
|
@ -38,69 +36,54 @@ public class TestPerFieldSimilarity extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
/** test a field where the sim is specified directly */
|
/** test a field where the sim is specified directly */
|
||||||
public void testDirect() throws Exception {
|
public void testDirect() throws Exception {
|
||||||
SolrCore core = h.getCore();
|
assertEquals(SweetSpotSimilarity.class, getSimilarity("sim1text").getClass());
|
||||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
|
||||||
Similarity sim = searcher.get().getSimilarityProvider().get("sim1text");
|
|
||||||
assertEquals(SweetSpotSimilarity.class, sim.getClass());
|
|
||||||
searcher.decref();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ... and for a dynamic field */
|
/** ... and for a dynamic field */
|
||||||
public void testDirectDynamic() throws Exception {
|
public void testDirectDynamic() throws Exception {
|
||||||
SolrCore core = h.getCore();
|
assertEquals(SweetSpotSimilarity.class, getSimilarity("text_sim1").getClass());
|
||||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
|
||||||
Similarity sim = searcher.get().getSimilarityProvider().get("text_sim1");
|
|
||||||
assertEquals(SweetSpotSimilarity.class, sim.getClass());
|
|
||||||
searcher.decref();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** test a field where a configurable sim factory is defined */
|
/** test a field where a configurable sim factory is defined */
|
||||||
public void testFactory() throws Exception {
|
public void testFactory() throws Exception {
|
||||||
SolrCore core = h.getCore();
|
Similarity sim = getSimilarity("sim2text");
|
||||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
|
||||||
Similarity sim = searcher.get().getSimilarityProvider().get("sim2text");
|
|
||||||
assertEquals(MockConfigurableSimilarity.class, sim.getClass());
|
assertEquals(MockConfigurableSimilarity.class, sim.getClass());
|
||||||
assertEquals("is there an echo?", ((MockConfigurableSimilarity)sim).getPassthrough());
|
assertEquals("is there an echo?", ((MockConfigurableSimilarity)sim).getPassthrough());
|
||||||
searcher.decref();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ... and for a dynamic field */
|
/** ... and for a dynamic field */
|
||||||
public void testFactoryDynamic() throws Exception {
|
public void testFactoryDynamic() throws Exception {
|
||||||
SolrCore core = h.getCore();
|
Similarity sim = getSimilarity("text_sim2");
|
||||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
|
||||||
Similarity sim = searcher.get().getSimilarityProvider().get("text_sim2");
|
|
||||||
assertEquals(MockConfigurableSimilarity.class, sim.getClass());
|
assertEquals(MockConfigurableSimilarity.class, sim.getClass());
|
||||||
assertEquals("is there an echo?", ((MockConfigurableSimilarity)sim).getPassthrough());
|
assertEquals("is there an echo?", ((MockConfigurableSimilarity)sim).getPassthrough());
|
||||||
searcher.decref();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** test a field where no similarity is specified */
|
/** test a field where no similarity is specified */
|
||||||
public void testDefaults() throws Exception {
|
public void testDefaults() throws Exception {
|
||||||
SolrCore core = h.getCore();
|
Similarity sim = getSimilarity("sim3text");
|
||||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
|
||||||
Similarity sim = searcher.get().getSimilarityProvider().get("sim3text");
|
|
||||||
assertEquals(MockConfigurableSimilarity.class, sim.getClass());
|
assertEquals(MockConfigurableSimilarity.class, sim.getClass());
|
||||||
assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough());
|
assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough());
|
||||||
searcher.decref();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ... and for a dynamic field */
|
/** ... and for a dynamic field */
|
||||||
public void testDefaultsDynamic() throws Exception {
|
public void testDefaultsDynamic() throws Exception {
|
||||||
SolrCore core = h.getCore();
|
Similarity sim = getSimilarity("text_sim3");
|
||||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
|
||||||
Similarity sim = searcher.get().getSimilarityProvider().get("text_sim3");
|
|
||||||
assertEquals(MockConfigurableSimilarity.class, sim.getClass());
|
assertEquals(MockConfigurableSimilarity.class, sim.getClass());
|
||||||
assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough());
|
assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough());
|
||||||
searcher.decref();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** test a field that does not exist */
|
/** test a field that does not exist */
|
||||||
public void testNonexistent() throws Exception {
|
public void testNonexistent() throws Exception {
|
||||||
SolrCore core = h.getCore();
|
Similarity sim = getSimilarity("sdfdsfdsfdswr5fsdfdsfdsfs");
|
||||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
|
||||||
Similarity sim = searcher.get().getSimilarityProvider().get("sdfdsfdsfdswr5fsdfdsfdsfs");
|
|
||||||
assertEquals(MockConfigurableSimilarity.class, sim.getClass());
|
assertEquals(MockConfigurableSimilarity.class, sim.getClass());
|
||||||
assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough());
|
assertEquals("I am your default sim", ((MockConfigurableSimilarity)sim).getPassthrough());
|
||||||
searcher.decref();
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSimilarityProviderFactory() {
|
||||||
|
SolrCore core = h.getCore();
|
||||||
|
SimilarityProvider similarityProvider = core.getSchema().getSimilarityProvider();
|
||||||
|
assertTrue("wrong class", similarityProvider instanceof MockConfigurableSimilarityProvider);
|
||||||
|
assertEquals("is there an echo?", ((MockConfigurableSimilarityProvider)similarityProvider).getPassthrough());
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue