Merge pull request #15446 from jimferenczi/classic_similarity

Renames `default` similarity into `classic`
This commit is contained in:
Jim Ferenczi 2015-12-30 08:42:20 -08:00
commit 992ffac509
14 changed files with 154 additions and 86 deletions

View File

@ -54,7 +54,7 @@ import java.util.Objects;
* While aggregating the total term frequency is trivial since it * While aggregating the total term frequency is trivial since it
* can be summed up not every {@link org.apache.lucene.search.similarities.Similarity} * can be summed up not every {@link org.apache.lucene.search.similarities.Similarity}
* makes use of this statistic. The document frequency which is used in the * makes use of this statistic. The document frequency which is used in the
* {@link org.apache.lucene.search.similarities.DefaultSimilarity} * {@link org.apache.lucene.search.similarities.ClassicSimilarity}
* can only be estimated as an lower-bound since it is a document based statistic. For * can only be estimated as an lower-bound since it is a document based statistic. For
* the document frequency the maximum frequency across all fields per term is used * the document frequency the maximum frequency across all fields per term is used
* which is the minimum number of documents the terms occurs in. * which is the minimum number of documents the terms occurs in.

View File

@ -24,7 +24,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity; import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
@ -67,7 +67,7 @@ public class TermVectorsFilter {
this.dfs = dfs; this.dfs = dfs;
this.scoreTerms = new HashMap<>(); this.scoreTerms = new HashMap<>();
this.similarity = new DefaultSimilarity(); this.similarity = new ClassicSimilarity();
} }
public void setSettings(TermVectorsRequest.FilterSettings settings) { public void setSettings(TermVectorsRequest.FilterSettings settings) {

View File

@ -30,7 +30,7 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity; import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -138,7 +138,7 @@ public class MoreLikeThisQuery extends Query {
if (rewritten != this) { if (rewritten != this) {
return rewritten; return rewritten;
} }
XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity); XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new ClassicSimilarity() : similarity);
mlt.setFieldNames(moreLikeFields); mlt.setFieldNames(moreLikeFields);
mlt.setAnalyzer(analyzer); mlt.setAnalyzer(analyzer);

View File

@ -52,7 +52,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity; import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.CharsRefBuilder;
@ -304,7 +304,7 @@ public final class XMoreLikeThis {
/** /**
* For idf() calculations. * For idf() calculations.
*/ */
private TFIDFSimilarity similarity;// = new DefaultSimilarity(); private TFIDFSimilarity similarity;// = new ClassicSimilarity();
/** /**
* IndexReader to use * IndexReader to use
@ -346,7 +346,7 @@ public final class XMoreLikeThis {
* Constructor requiring an IndexReader. * Constructor requiring an IndexReader.
*/ */
public XMoreLikeThis(IndexReader ir) { public XMoreLikeThis(IndexReader ir) {
this(ir, new DefaultSimilarity()); this(ir, new ClassicSimilarity());
} }
public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) { public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {

View File

@ -35,6 +35,8 @@ import org.elasticsearch.index.mapper.MappedFieldType.Loading;
import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.object.ObjectMapper; import org.elasticsearch.index.mapper.object.ObjectMapper;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityService;
import java.util.Collections; import java.util.Collections;
import java.util.Iterator; import java.util.Iterator;
@ -79,7 +81,8 @@ public class TypeParsers {
builder.omitNorms(nodeBooleanValue(propNode)); builder.omitNorms(nodeBooleanValue(propNode));
iterator.remove(); iterator.remove();
} else if (propName.equals("similarity")) { } else if (propName.equals("similarity")) {
builder.similarity(parserContext.getSimilarity(propNode.toString())); SimilarityProvider similarityProvider = resolveSimilarity(parserContext, name, propNode.toString());
builder.similarity(similarityProvider);
iterator.remove(); iterator.remove();
} else if (parseMultiField(builder, name, parserContext, propName, propNode)) { } else if (parseMultiField(builder, name, parserContext, propName, propNode)) {
iterator.remove(); iterator.remove();
@ -210,7 +213,8 @@ public class TypeParsers {
// ignore for old indexes // ignore for old indexes
iterator.remove(); iterator.remove();
} else if (propName.equals("similarity")) { } else if (propName.equals("similarity")) {
builder.similarity(parserContext.getSimilarity(propNode.toString())); SimilarityProvider similarityProvider = resolveSimilarity(parserContext, name, propNode.toString());
builder.similarity(similarityProvider);
iterator.remove(); iterator.remove();
} else if (propName.equals("fielddata")) { } else if (propName.equals("fielddata")) {
final Settings settings = Settings.builder().put(SettingsLoader.Helper.loadNestedFromMap(nodeMapValue(propNode, "fielddata"))).build(); final Settings settings = Settings.builder().put(SettingsLoader.Helper.loadNestedFromMap(nodeMapValue(propNode, "fielddata"))).build();
@ -369,4 +373,15 @@ public class TypeParsers {
builder.copyTo(copyToBuilder.build()); builder.copyTo(copyToBuilder.build());
} }
private static SimilarityProvider resolveSimilarity(Mapper.TypeParser.ParserContext parserContext, String name, String value) {
if (parserContext.indexVersionCreated().before(Version.V_3_0_0) && "default".equals(value)) {
// "default" similarity has been renamed into "classic" in 3.x.
value = SimilarityService.DEFAULT_SIMILARITY;
}
SimilarityProvider similarityProvider = parserContext.getSimilarity(value);
if (similarityProvider == null) {
throw new MapperParsingException("Unknown Similarity type [" + value + "] for [" + name + "]");
}
return similarityProvider;
}
} }

View File

@ -19,23 +19,23 @@
package org.elasticsearch.index.similarity; package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
/** /**
* {@link SimilarityProvider} for {@link DefaultSimilarity}. * {@link SimilarityProvider} for {@link ClassicSimilarity}.
* <p> * <p>
* Configuration options available: * Configuration options available:
* <ul> * <ul>
* <li>discount_overlaps</li> * <li>discount_overlaps</li>
* </ul> * </ul>
* @see DefaultSimilarity For more information about configuration * @see ClassicSimilarity For more information about configuration
*/ */
public class DefaultSimilarityProvider extends AbstractSimilarityProvider { public class ClassicSimilarityProvider extends AbstractSimilarityProvider {
private final DefaultSimilarity similarity = new DefaultSimilarity(); private final ClassicSimilarity similarity = new ClassicSimilarity();
public DefaultSimilarityProvider(String name, Settings settings) { public ClassicSimilarityProvider(String name, Settings settings) {
super(name); super(name);
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true); boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
this.similarity.setDiscountOverlaps(discountOverlaps); this.similarity.setDiscountOverlaps(discountOverlaps);
@ -45,7 +45,7 @@ public class DefaultSimilarityProvider extends AbstractSimilarityProvider {
* {@inheritDoc} * {@inheritDoc}
*/ */
@Override @Override
public DefaultSimilarity get() { public ClassicSimilarity get() {
return similarity; return similarity;
} }
} }

View File

@ -35,7 +35,7 @@ import java.util.function.BiFunction;
public final class SimilarityService extends AbstractIndexComponent { public final class SimilarityService extends AbstractIndexComponent {
public final static String DEFAULT_SIMILARITY = "default"; public final static String DEFAULT_SIMILARITY = "classic";
private final Similarity defaultSimilarity; private final Similarity defaultSimilarity;
private final Similarity baseSimilarity; private final Similarity baseSimilarity;
private final Map<String, SimilarityProvider> similarities; private final Map<String, SimilarityProvider> similarities;
@ -44,9 +44,9 @@ public final class SimilarityService extends AbstractIndexComponent {
static { static {
Map<String, BiFunction<String, Settings, SimilarityProvider>> defaults = new HashMap<>(); Map<String, BiFunction<String, Settings, SimilarityProvider>> defaults = new HashMap<>();
Map<String, BiFunction<String, Settings, SimilarityProvider>> buildIn = new HashMap<>(); Map<String, BiFunction<String, Settings, SimilarityProvider>> buildIn = new HashMap<>();
defaults.put("default", DefaultSimilarityProvider::new); defaults.put("classic", ClassicSimilarityProvider::new);
defaults.put("BM25", BM25SimilarityProvider::new); defaults.put("BM25", BM25SimilarityProvider::new);
buildIn.put("default", DefaultSimilarityProvider::new); buildIn.put("classic", ClassicSimilarityProvider::new);
buildIn.put("BM25", BM25SimilarityProvider::new); buildIn.put("BM25", BM25SimilarityProvider::new);
buildIn.put("DFR", DFRSimilarityProvider::new); buildIn.put("DFR", DFRSimilarityProvider::new);
buildIn.put("IB", IBSimilarityProvider::new); buildIn.put("IB", IBSimilarityProvider::new);

View File

@ -37,7 +37,7 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
@ -214,7 +214,7 @@ public class BlendedTermQueryTests extends ESTestCase {
} }
public IndexSearcher setSimilarity(IndexSearcher searcher) { public IndexSearcher setSimilarity(IndexSearcher searcher) {
Similarity similarity = random().nextBoolean() ? new BM25Similarity() : new DefaultSimilarity(); Similarity similarity = random().nextBoolean() ? new BM25Similarity() : new ClassicSimilarity();
searcher.setSimilarity(similarity); searcher.setSimilarity(similarity);
return searcher; return searcher;
} }

View File

@ -19,11 +19,11 @@
package org.elasticsearch.index.similarity; package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.AfterEffectL; import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BasicModelG; import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.DFRSimilarity; import org.apache.lucene.search.similarities.DFRSimilarity;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.DistributionSPL; import org.apache.lucene.search.similarities.DistributionSPL;
import org.apache.lucene.search.similarities.IBSimilarity; import org.apache.lucene.search.similarities.IBSimilarity;
import org.apache.lucene.search.similarities.LMDirichletSimilarity; import org.apache.lucene.search.similarities.LMDirichletSimilarity;
@ -31,11 +31,16 @@ import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
import org.apache.lucene.search.similarities.LambdaTTF; import org.apache.lucene.search.similarities.LambdaTTF;
import org.apache.lucene.search.similarities.NormalizationH2; import org.apache.lucene.search.similarities.NormalizationH2;
import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.VersionUtils;
import java.io.IOException; import java.io.IOException;
@ -45,42 +50,43 @@ import static org.hamcrest.CoreMatchers.instanceOf;
public class SimilarityTests extends ESSingleNodeTestCase { public class SimilarityTests extends ESSingleNodeTestCase {
public void testResolveDefaultSimilarities() { public void testResolveDefaultSimilarities() {
SimilarityService similarityService = createIndex("foo").similarityService(); SimilarityService similarityService = createIndex("foo").similarityService();
assertThat(similarityService.getSimilarity("default").get(), instanceOf(DefaultSimilarity.class)); assertThat(similarityService.getSimilarity("classic").get(), instanceOf(ClassicSimilarity.class));
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class)); assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
assertThat(similarityService.getSimilarity("default"), equalTo(null));
} }
public void testResolveSimilaritiesFromMapping_default() throws IOException { public void testResolveSimilaritiesFromMapping_classic() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties") .startObject("properties")
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject() .startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
.endObject() .endObject()
.endObject().endObject().string(); .endObject().endObject().string();
Settings indexSettings = Settings.settingsBuilder() Settings indexSettings = Settings.settingsBuilder()
.put("index.similarity.my_similarity.type", "default") .put("index.similarity.my_similarity.type", "classic")
.put("index.similarity.my_similarity.discount_overlaps", false) .put("index.similarity.my_similarity.discount_overlaps", false)
.build(); .build();
IndexService indexService = createIndex("foo", indexSettings); IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(DefaultSimilarityProvider.class)); assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(ClassicSimilarityProvider.class));
DefaultSimilarity similarity = (DefaultSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); ClassicSimilarity similarity = (ClassicSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
assertThat(similarity.getDiscountOverlaps(), equalTo(false)); assertThat(similarity.getDiscountOverlaps(), equalTo(false));
} }
public void testResolveSimilaritiesFromMapping_bm25() throws IOException { public void testResolveSimilaritiesFromMapping_bm25() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties") .startObject("properties")
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject() .startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
.endObject() .endObject()
.endObject().endObject().string(); .endObject().endObject().string();
Settings indexSettings = Settings.settingsBuilder() Settings indexSettings = Settings.settingsBuilder()
.put("index.similarity.my_similarity.type", "BM25") .put("index.similarity.my_similarity.type", "BM25")
.put("index.similarity.my_similarity.k1", 2.0f) .put("index.similarity.my_similarity.k1", 2.0f)
.put("index.similarity.my_similarity.b", 1.5f) .put("index.similarity.my_similarity.b", 1.5f)
.put("index.similarity.my_similarity.discount_overlaps", false) .put("index.similarity.my_similarity.discount_overlaps", false)
.build(); .build();
IndexService indexService = createIndex("foo", indexSettings); IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(BM25SimilarityProvider.class)); assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(BM25SimilarityProvider.class));
@ -93,18 +99,18 @@ public class SimilarityTests extends ESSingleNodeTestCase {
public void testResolveSimilaritiesFromMapping_DFR() throws IOException { public void testResolveSimilaritiesFromMapping_DFR() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties") .startObject("properties")
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject() .startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
.endObject() .endObject()
.endObject().endObject().string(); .endObject().endObject().string();
Settings indexSettings = Settings.settingsBuilder() Settings indexSettings = Settings.settingsBuilder()
.put("index.similarity.my_similarity.type", "DFR") .put("index.similarity.my_similarity.type", "DFR")
.put("index.similarity.my_similarity.basic_model", "g") .put("index.similarity.my_similarity.basic_model", "g")
.put("index.similarity.my_similarity.after_effect", "l") .put("index.similarity.my_similarity.after_effect", "l")
.put("index.similarity.my_similarity.normalization", "h2") .put("index.similarity.my_similarity.normalization", "h2")
.put("index.similarity.my_similarity.normalization.h2.c", 3f) .put("index.similarity.my_similarity.normalization.h2.c", 3f)
.build(); .build();
IndexService indexService = createIndex("foo", indexSettings); IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(DFRSimilarityProvider.class)); assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(DFRSimilarityProvider.class));
@ -118,18 +124,18 @@ public class SimilarityTests extends ESSingleNodeTestCase {
public void testResolveSimilaritiesFromMapping_IB() throws IOException { public void testResolveSimilaritiesFromMapping_IB() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties") .startObject("properties")
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject() .startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
.endObject() .endObject()
.endObject().endObject().string(); .endObject().endObject().string();
Settings indexSettings = Settings.settingsBuilder() Settings indexSettings = Settings.settingsBuilder()
.put("index.similarity.my_similarity.type", "IB") .put("index.similarity.my_similarity.type", "IB")
.put("index.similarity.my_similarity.distribution", "spl") .put("index.similarity.my_similarity.distribution", "spl")
.put("index.similarity.my_similarity.lambda", "ttf") .put("index.similarity.my_similarity.lambda", "ttf")
.put("index.similarity.my_similarity.normalization", "h2") .put("index.similarity.my_similarity.normalization", "h2")
.put("index.similarity.my_similarity.normalization.h2.c", 3f) .put("index.similarity.my_similarity.normalization.h2.c", 3f)
.build(); .build();
IndexService indexService = createIndex("foo", indexSettings); IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(IBSimilarityProvider.class)); assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(IBSimilarityProvider.class));
@ -143,15 +149,15 @@ public class SimilarityTests extends ESSingleNodeTestCase {
public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException { public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties") .startObject("properties")
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject() .startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
.endObject() .endObject()
.endObject().endObject().string(); .endObject().endObject().string();
Settings indexSettings = Settings.settingsBuilder() Settings indexSettings = Settings.settingsBuilder()
.put("index.similarity.my_similarity.type", "LMDirichlet") .put("index.similarity.my_similarity.type", "LMDirichlet")
.put("index.similarity.my_similarity.mu", 3000f) .put("index.similarity.my_similarity.mu", 3000f)
.build(); .build();
IndexService indexService = createIndex("foo", indexSettings); IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMDirichletSimilarityProvider.class)); assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMDirichletSimilarityProvider.class));
@ -162,15 +168,15 @@ public class SimilarityTests extends ESSingleNodeTestCase {
public void testResolveSimilaritiesFromMapping_LMJelinekMercer() throws IOException { public void testResolveSimilaritiesFromMapping_LMJelinekMercer() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties") .startObject("properties")
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject() .startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
.endObject() .endObject()
.endObject().endObject().string(); .endObject().endObject().string();
Settings indexSettings = Settings.settingsBuilder() Settings indexSettings = Settings.settingsBuilder()
.put("index.similarity.my_similarity.type", "LMJelinekMercer") .put("index.similarity.my_similarity.type", "LMJelinekMercer")
.put("index.similarity.my_similarity.lambda", 0.7f) .put("index.similarity.my_similarity.lambda", 0.7f)
.build(); .build();
IndexService indexService = createIndex("foo", indexSettings); IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMJelinekMercerSimilarityProvider.class)); assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMJelinekMercerSimilarityProvider.class));
@ -178,4 +184,47 @@ public class SimilarityTests extends ESSingleNodeTestCase {
LMJelinekMercerSimilarity similarity = (LMJelinekMercerSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); LMJelinekMercerSimilarity similarity = (LMJelinekMercerSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
assertThat(similarity.getLambda(), equalTo(0.7f)); assertThat(similarity.getLambda(), equalTo(0.7f));
} }
public void testResolveSimilaritiesFromMapping_Unknown() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field1").field("type", "string").field("similarity", "unknown_similarity").endObject()
.endObject()
.endObject().endObject().string();
IndexService indexService = createIndex("foo");
try {
indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
fail("Expected MappingParsingException");
} catch (MapperParsingException e) {
assertThat(e.getMessage(), equalTo("Unknown Similarity type [unknown_similarity] for [field1]"));
}
}
public void testSimilarityDefaultBackCompat() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field1")
.field("similarity", "default")
.field("type", "string")
.endObject()
.endObject()
.endObject().string();
Settings settings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.V_2_2_0))
.build();
DocumentMapperParser parser = createIndex("test_v2.x", settings).mapperService().documentMapperParser();
DocumentMapper documentMapper = parser.parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(ClassicSimilarityProvider.class));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().name(), equalTo("classic"));
parser = createIndex("test_v3.x").mapperService().documentMapperParser();
try {
parser.parse("type", new CompressedXContent(mapping));
fail("Expected MappingParsingException");
} catch (MapperParsingException e) {
assertThat(e.getMessage(), equalTo("Unknown Similarity type [default] for [field1]"));
}
}
} }

View File

@ -28,7 +28,7 @@ import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.not;
public class SimilarityIT extends ESIntegTestCase { public class SimilarityIT extends ESIntegTestCase {
public void testCustomBM25Similarity() throws Exception { public void testCustomBM25Similarity() throws Exception {
try { try {
client().admin().indices().prepareDelete("test").execute().actionGet(); client().admin().indices().prepareDelete("test").execute().actionGet();
@ -45,7 +45,7 @@ public class SimilarityIT extends ESIntegTestCase {
.field("type", "string") .field("type", "string")
.endObject() .endObject()
.startObject("field2") .startObject("field2")
.field("similarity", "default") .field("similarity", "classic")
.field("type", "string") .field("type", "string")
.endObject() .endObject()
.endObject() .endObject()

View File

@ -48,10 +48,10 @@ Here we configure the DFRSimilarity so it can be referenced as
=== Available similarities === Available similarities
[float] [float]
[[default-similarity]] [[classic-similarity]]
==== Default similarity ==== Classic similarity
The default similarity that is based on the TF/IDF model. This The classic similarity that is based on the TF/IDF model. This
similarity has the following option: similarity has the following option:
`discount_overlaps`:: `discount_overlaps`::
@ -59,7 +59,7 @@ similarity has the following option:
0 position increment) are ignored when computing norm. By default this 0 position increment) are ignored when computing norm. By default this
is true, meaning overlap tokens do not count when computing norms. is true, meaning overlap tokens do not count when computing norms.
Type name: `default` Type name: `classic`
[float] [float]
[[bm25]] [[bm25]]

View File

@ -15,7 +15,7 @@ similarities. For more details about this expert options, see the
The only similarities which can be used out of the box, without any further The only similarities which can be used out of the box, without any further
configuration are: configuration are:
`default`:: `classic`::
The Default TF/IDF algorithm used by Elasticsearch and The Default TF/IDF algorithm used by Elasticsearch and
Lucene. See {defguide}/practical-scoring-function.html[Lucenes Practical Scoring Function] Lucene. See {defguide}/practical-scoring-function.html[Lucenes Practical Scoring Function]
for more information. for more information.
@ -49,6 +49,6 @@ PUT my_index
} }
-------------------------------------------------- --------------------------------------------------
// AUTOSENSE // AUTOSENSE
<1> The `default_field` uses the `default` similarity (ie TF/IDF). <1> The `default_field` uses the `classic` similarity (ie TF/IDF).
<2> The `bm25_field` uses the `BM25` similarity. <2> The `bm25_field` uses the `BM25` similarity.

View File

@ -166,7 +166,7 @@ Defaults depend on the <<mapping-index,`index`>> setting:
<<similarity,`similarity`>>:: <<similarity,`similarity`>>::
Which scoring algorithm or _similarity_ should be used. Defaults Which scoring algorithm or _similarity_ should be used. Defaults
to `default`, which uses TF/IDF. to `classic`, which uses TF/IDF.
<<term-vector,`term_vector`>>:: <<term-vector,`term_vector`>>::

View File

@ -224,6 +224,10 @@ Allocation settings deprecated in 1.x have been removed:
Please change the setting in your configuration files or in the clusterstate to use the new settings instead. Please change the setting in your configuration files or in the clusterstate to use the new settings instead.
==== Similarity settings
The 'default' similarity has been renamed to 'classic'.
[[breaking_30_mapping_changes]] [[breaking_30_mapping_changes]]
=== Mapping changes === Mapping changes