mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-02 08:59:09 +00:00
Merge pull request #15446 from jimferenczi/classic_similarity
Renames `default` similarity into `classic`
This commit is contained in:
commit
992ffac509
@ -54,7 +54,7 @@ import java.util.Objects;
|
|||||||
* While aggregating the total term frequency is trivial since it
|
* While aggregating the total term frequency is trivial since it
|
||||||
* can be summed up not every {@link org.apache.lucene.search.similarities.Similarity}
|
* can be summed up not every {@link org.apache.lucene.search.similarities.Similarity}
|
||||||
* makes use of this statistic. The document frequency which is used in the
|
* makes use of this statistic. The document frequency which is used in the
|
||||||
* {@link org.apache.lucene.search.similarities.DefaultSimilarity}
|
* {@link org.apache.lucene.search.similarities.ClassicSimilarity}
|
||||||
* can only be estimated as an lower-bound since it is a document based statistic. For
|
* can only be estimated as an lower-bound since it is a document based statistic. For
|
||||||
* the document frequency the maximum frequency across all fields per term is used
|
* the document frequency the maximum frequency across all fields per term is used
|
||||||
* which is the minimum number of documents the terms occurs in.
|
* which is the minimum number of documents the terms occurs in.
|
||||||
|
@ -24,7 +24,7 @@ import org.apache.lucene.index.Term;
|
|||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.TermStatistics;
|
import org.apache.lucene.search.TermStatistics;
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
@ -67,7 +67,7 @@ public class TermVectorsFilter {
|
|||||||
|
|
||||||
this.dfs = dfs;
|
this.dfs = dfs;
|
||||||
this.scoreTerms = new HashMap<>();
|
this.scoreTerms = new HashMap<>();
|
||||||
this.similarity = new DefaultSimilarity();
|
this.similarity = new ClassicSimilarity();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setSettings(TermVectorsRequest.FilterSettings settings) {
|
public void setSettings(TermVectorsRequest.FilterSettings settings) {
|
||||||
|
@ -30,7 +30,7 @@ import org.apache.lucene.index.TermsEnum;
|
|||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
@ -138,7 +138,7 @@ public class MoreLikeThisQuery extends Query {
|
|||||||
if (rewritten != this) {
|
if (rewritten != this) {
|
||||||
return rewritten;
|
return rewritten;
|
||||||
}
|
}
|
||||||
XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity);
|
XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new ClassicSimilarity() : similarity);
|
||||||
|
|
||||||
mlt.setFieldNames(moreLikeFields);
|
mlt.setFieldNames(moreLikeFields);
|
||||||
mlt.setAnalyzer(analyzer);
|
mlt.setAnalyzer(analyzer);
|
||||||
|
@ -52,7 +52,7 @@ import org.apache.lucene.search.BoostQuery;
|
|||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CharsRefBuilder;
|
import org.apache.lucene.util.CharsRefBuilder;
|
||||||
@ -304,7 +304,7 @@ public final class XMoreLikeThis {
|
|||||||
/**
|
/**
|
||||||
* For idf() calculations.
|
* For idf() calculations.
|
||||||
*/
|
*/
|
||||||
private TFIDFSimilarity similarity;// = new DefaultSimilarity();
|
private TFIDFSimilarity similarity;// = new ClassicSimilarity();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IndexReader to use
|
* IndexReader to use
|
||||||
@ -346,7 +346,7 @@ public final class XMoreLikeThis {
|
|||||||
* Constructor requiring an IndexReader.
|
* Constructor requiring an IndexReader.
|
||||||
*/
|
*/
|
||||||
public XMoreLikeThis(IndexReader ir) {
|
public XMoreLikeThis(IndexReader ir) {
|
||||||
this(ir, new DefaultSimilarity());
|
this(ir, new ClassicSimilarity());
|
||||||
}
|
}
|
||||||
|
|
||||||
public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
|
public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
|
||||||
|
@ -35,6 +35,8 @@ import org.elasticsearch.index.mapper.MappedFieldType.Loading;
|
|||||||
import org.elasticsearch.index.mapper.Mapper;
|
import org.elasticsearch.index.mapper.Mapper;
|
||||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||||
import org.elasticsearch.index.mapper.object.ObjectMapper;
|
import org.elasticsearch.index.mapper.object.ObjectMapper;
|
||||||
|
import org.elasticsearch.index.similarity.SimilarityProvider;
|
||||||
|
import org.elasticsearch.index.similarity.SimilarityService;
|
||||||
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
@ -79,7 +81,8 @@ public class TypeParsers {
|
|||||||
builder.omitNorms(nodeBooleanValue(propNode));
|
builder.omitNorms(nodeBooleanValue(propNode));
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
} else if (propName.equals("similarity")) {
|
} else if (propName.equals("similarity")) {
|
||||||
builder.similarity(parserContext.getSimilarity(propNode.toString()));
|
SimilarityProvider similarityProvider = resolveSimilarity(parserContext, name, propNode.toString());
|
||||||
|
builder.similarity(similarityProvider);
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
} else if (parseMultiField(builder, name, parserContext, propName, propNode)) {
|
} else if (parseMultiField(builder, name, parserContext, propName, propNode)) {
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
@ -210,7 +213,8 @@ public class TypeParsers {
|
|||||||
// ignore for old indexes
|
// ignore for old indexes
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
} else if (propName.equals("similarity")) {
|
} else if (propName.equals("similarity")) {
|
||||||
builder.similarity(parserContext.getSimilarity(propNode.toString()));
|
SimilarityProvider similarityProvider = resolveSimilarity(parserContext, name, propNode.toString());
|
||||||
|
builder.similarity(similarityProvider);
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
} else if (propName.equals("fielddata")) {
|
} else if (propName.equals("fielddata")) {
|
||||||
final Settings settings = Settings.builder().put(SettingsLoader.Helper.loadNestedFromMap(nodeMapValue(propNode, "fielddata"))).build();
|
final Settings settings = Settings.builder().put(SettingsLoader.Helper.loadNestedFromMap(nodeMapValue(propNode, "fielddata"))).build();
|
||||||
@ -369,4 +373,15 @@ public class TypeParsers {
|
|||||||
builder.copyTo(copyToBuilder.build());
|
builder.copyTo(copyToBuilder.build());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static SimilarityProvider resolveSimilarity(Mapper.TypeParser.ParserContext parserContext, String name, String value) {
|
||||||
|
if (parserContext.indexVersionCreated().before(Version.V_3_0_0) && "default".equals(value)) {
|
||||||
|
// "default" similarity has been renamed into "classic" in 3.x.
|
||||||
|
value = SimilarityService.DEFAULT_SIMILARITY;
|
||||||
|
}
|
||||||
|
SimilarityProvider similarityProvider = parserContext.getSimilarity(value);
|
||||||
|
if (similarityProvider == null) {
|
||||||
|
throw new MapperParsingException("Unknown Similarity type [" + value + "] for [" + name + "]");
|
||||||
|
}
|
||||||
|
return similarityProvider;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,23 +19,23 @@
|
|||||||
|
|
||||||
package org.elasticsearch.index.similarity;
|
package org.elasticsearch.index.similarity;
|
||||||
|
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@link SimilarityProvider} for {@link DefaultSimilarity}.
|
* {@link SimilarityProvider} for {@link ClassicSimilarity}.
|
||||||
* <p>
|
* <p>
|
||||||
* Configuration options available:
|
* Configuration options available:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>discount_overlaps</li>
|
* <li>discount_overlaps</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
* @see DefaultSimilarity For more information about configuration
|
* @see ClassicSimilarity For more information about configuration
|
||||||
*/
|
*/
|
||||||
public class DefaultSimilarityProvider extends AbstractSimilarityProvider {
|
public class ClassicSimilarityProvider extends AbstractSimilarityProvider {
|
||||||
|
|
||||||
private final DefaultSimilarity similarity = new DefaultSimilarity();
|
private final ClassicSimilarity similarity = new ClassicSimilarity();
|
||||||
|
|
||||||
public DefaultSimilarityProvider(String name, Settings settings) {
|
public ClassicSimilarityProvider(String name, Settings settings) {
|
||||||
super(name);
|
super(name);
|
||||||
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
|
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
|
||||||
this.similarity.setDiscountOverlaps(discountOverlaps);
|
this.similarity.setDiscountOverlaps(discountOverlaps);
|
||||||
@ -45,7 +45,7 @@ public class DefaultSimilarityProvider extends AbstractSimilarityProvider {
|
|||||||
* {@inheritDoc}
|
* {@inheritDoc}
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public DefaultSimilarity get() {
|
public ClassicSimilarity get() {
|
||||||
return similarity;
|
return similarity;
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -35,7 +35,7 @@ import java.util.function.BiFunction;
|
|||||||
|
|
||||||
public final class SimilarityService extends AbstractIndexComponent {
|
public final class SimilarityService extends AbstractIndexComponent {
|
||||||
|
|
||||||
public final static String DEFAULT_SIMILARITY = "default";
|
public final static String DEFAULT_SIMILARITY = "classic";
|
||||||
private final Similarity defaultSimilarity;
|
private final Similarity defaultSimilarity;
|
||||||
private final Similarity baseSimilarity;
|
private final Similarity baseSimilarity;
|
||||||
private final Map<String, SimilarityProvider> similarities;
|
private final Map<String, SimilarityProvider> similarities;
|
||||||
@ -44,9 +44,9 @@ public final class SimilarityService extends AbstractIndexComponent {
|
|||||||
static {
|
static {
|
||||||
Map<String, BiFunction<String, Settings, SimilarityProvider>> defaults = new HashMap<>();
|
Map<String, BiFunction<String, Settings, SimilarityProvider>> defaults = new HashMap<>();
|
||||||
Map<String, BiFunction<String, Settings, SimilarityProvider>> buildIn = new HashMap<>();
|
Map<String, BiFunction<String, Settings, SimilarityProvider>> buildIn = new HashMap<>();
|
||||||
defaults.put("default", DefaultSimilarityProvider::new);
|
defaults.put("classic", ClassicSimilarityProvider::new);
|
||||||
defaults.put("BM25", BM25SimilarityProvider::new);
|
defaults.put("BM25", BM25SimilarityProvider::new);
|
||||||
buildIn.put("default", DefaultSimilarityProvider::new);
|
buildIn.put("classic", ClassicSimilarityProvider::new);
|
||||||
buildIn.put("BM25", BM25SimilarityProvider::new);
|
buildIn.put("BM25", BM25SimilarityProvider::new);
|
||||||
buildIn.put("DFR", DFRSimilarityProvider::new);
|
buildIn.put("DFR", DFRSimilarityProvider::new);
|
||||||
buildIn.put("IB", IBSimilarityProvider::new);
|
buildIn.put("IB", IBSimilarityProvider::new);
|
||||||
|
@ -37,7 +37,7 @@ import org.apache.lucene.search.ScoreDoc;
|
|||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
@ -214,7 +214,7 @@ public class BlendedTermQueryTests extends ESTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public IndexSearcher setSimilarity(IndexSearcher searcher) {
|
public IndexSearcher setSimilarity(IndexSearcher searcher) {
|
||||||
Similarity similarity = random().nextBoolean() ? new BM25Similarity() : new DefaultSimilarity();
|
Similarity similarity = random().nextBoolean() ? new BM25Similarity() : new ClassicSimilarity();
|
||||||
searcher.setSimilarity(similarity);
|
searcher.setSimilarity(similarity);
|
||||||
return searcher;
|
return searcher;
|
||||||
}
|
}
|
||||||
|
@ -19,11 +19,11 @@
|
|||||||
|
|
||||||
package org.elasticsearch.index.similarity;
|
package org.elasticsearch.index.similarity;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.apache.lucene.search.similarities.AfterEffectL;
|
import org.apache.lucene.search.similarities.AfterEffectL;
|
||||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||||
import org.apache.lucene.search.similarities.BasicModelG;
|
import org.apache.lucene.search.similarities.BasicModelG;
|
||||||
import org.apache.lucene.search.similarities.DFRSimilarity;
|
import org.apache.lucene.search.similarities.DFRSimilarity;
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
|
||||||
import org.apache.lucene.search.similarities.DistributionSPL;
|
import org.apache.lucene.search.similarities.DistributionSPL;
|
||||||
import org.apache.lucene.search.similarities.IBSimilarity;
|
import org.apache.lucene.search.similarities.IBSimilarity;
|
||||||
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
|
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
|
||||||
@ -31,11 +31,16 @@ import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
|
|||||||
import org.apache.lucene.search.similarities.LambdaTTF;
|
import org.apache.lucene.search.similarities.LambdaTTF;
|
||||||
import org.apache.lucene.search.similarities.NormalizationH2;
|
import org.apache.lucene.search.similarities.NormalizationH2;
|
||||||
import org.elasticsearch.common.compress.CompressedXContent;
|
import org.elasticsearch.common.compress.CompressedXContent;
|
||||||
|
import org.elasticsearch.Version;
|
||||||
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.index.IndexService;
|
import org.elasticsearch.index.IndexService;
|
||||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||||
|
import org.elasticsearch.index.mapper.DocumentMapperParser;
|
||||||
|
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
|
import org.elasticsearch.test.VersionUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
@ -45,42 +50,43 @@ import static org.hamcrest.CoreMatchers.instanceOf;
|
|||||||
public class SimilarityTests extends ESSingleNodeTestCase {
|
public class SimilarityTests extends ESSingleNodeTestCase {
|
||||||
public void testResolveDefaultSimilarities() {
|
public void testResolveDefaultSimilarities() {
|
||||||
SimilarityService similarityService = createIndex("foo").similarityService();
|
SimilarityService similarityService = createIndex("foo").similarityService();
|
||||||
assertThat(similarityService.getSimilarity("default").get(), instanceOf(DefaultSimilarity.class));
|
assertThat(similarityService.getSimilarity("classic").get(), instanceOf(ClassicSimilarity.class));
|
||||||
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
|
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
|
||||||
|
assertThat(similarityService.getSimilarity("default"), equalTo(null));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testResolveSimilaritiesFromMapping_default() throws IOException {
|
public void testResolveSimilaritiesFromMapping_classic() throws IOException {
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
.startObject("properties")
|
.startObject("properties")
|
||||||
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
||||||
.endObject()
|
.endObject()
|
||||||
.endObject().endObject().string();
|
.endObject().endObject().string();
|
||||||
|
|
||||||
Settings indexSettings = Settings.settingsBuilder()
|
Settings indexSettings = Settings.settingsBuilder()
|
||||||
.put("index.similarity.my_similarity.type", "default")
|
.put("index.similarity.my_similarity.type", "classic")
|
||||||
.put("index.similarity.my_similarity.discount_overlaps", false)
|
.put("index.similarity.my_similarity.discount_overlaps", false)
|
||||||
.build();
|
.build();
|
||||||
IndexService indexService = createIndex("foo", indexSettings);
|
IndexService indexService = createIndex("foo", indexSettings);
|
||||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(DefaultSimilarityProvider.class));
|
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(ClassicSimilarityProvider.class));
|
||||||
|
|
||||||
DefaultSimilarity similarity = (DefaultSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
|
ClassicSimilarity similarity = (ClassicSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
|
||||||
assertThat(similarity.getDiscountOverlaps(), equalTo(false));
|
assertThat(similarity.getDiscountOverlaps(), equalTo(false));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testResolveSimilaritiesFromMapping_bm25() throws IOException {
|
public void testResolveSimilaritiesFromMapping_bm25() throws IOException {
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
.startObject("properties")
|
.startObject("properties")
|
||||||
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
||||||
.endObject()
|
.endObject()
|
||||||
.endObject().endObject().string();
|
.endObject().endObject().string();
|
||||||
|
|
||||||
Settings indexSettings = Settings.settingsBuilder()
|
Settings indexSettings = Settings.settingsBuilder()
|
||||||
.put("index.similarity.my_similarity.type", "BM25")
|
.put("index.similarity.my_similarity.type", "BM25")
|
||||||
.put("index.similarity.my_similarity.k1", 2.0f)
|
.put("index.similarity.my_similarity.k1", 2.0f)
|
||||||
.put("index.similarity.my_similarity.b", 1.5f)
|
.put("index.similarity.my_similarity.b", 1.5f)
|
||||||
.put("index.similarity.my_similarity.discount_overlaps", false)
|
.put("index.similarity.my_similarity.discount_overlaps", false)
|
||||||
.build();
|
.build();
|
||||||
IndexService indexService = createIndex("foo", indexSettings);
|
IndexService indexService = createIndex("foo", indexSettings);
|
||||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(BM25SimilarityProvider.class));
|
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(BM25SimilarityProvider.class));
|
||||||
@ -93,18 +99,18 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||||||
|
|
||||||
public void testResolveSimilaritiesFromMapping_DFR() throws IOException {
|
public void testResolveSimilaritiesFromMapping_DFR() throws IOException {
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
.startObject("properties")
|
.startObject("properties")
|
||||||
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
||||||
.endObject()
|
.endObject()
|
||||||
.endObject().endObject().string();
|
.endObject().endObject().string();
|
||||||
|
|
||||||
Settings indexSettings = Settings.settingsBuilder()
|
Settings indexSettings = Settings.settingsBuilder()
|
||||||
.put("index.similarity.my_similarity.type", "DFR")
|
.put("index.similarity.my_similarity.type", "DFR")
|
||||||
.put("index.similarity.my_similarity.basic_model", "g")
|
.put("index.similarity.my_similarity.basic_model", "g")
|
||||||
.put("index.similarity.my_similarity.after_effect", "l")
|
.put("index.similarity.my_similarity.after_effect", "l")
|
||||||
.put("index.similarity.my_similarity.normalization", "h2")
|
.put("index.similarity.my_similarity.normalization", "h2")
|
||||||
.put("index.similarity.my_similarity.normalization.h2.c", 3f)
|
.put("index.similarity.my_similarity.normalization.h2.c", 3f)
|
||||||
.build();
|
.build();
|
||||||
IndexService indexService = createIndex("foo", indexSettings);
|
IndexService indexService = createIndex("foo", indexSettings);
|
||||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(DFRSimilarityProvider.class));
|
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(DFRSimilarityProvider.class));
|
||||||
@ -118,18 +124,18 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||||||
|
|
||||||
public void testResolveSimilaritiesFromMapping_IB() throws IOException {
|
public void testResolveSimilaritiesFromMapping_IB() throws IOException {
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
.startObject("properties")
|
.startObject("properties")
|
||||||
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
||||||
.endObject()
|
.endObject()
|
||||||
.endObject().endObject().string();
|
.endObject().endObject().string();
|
||||||
|
|
||||||
Settings indexSettings = Settings.settingsBuilder()
|
Settings indexSettings = Settings.settingsBuilder()
|
||||||
.put("index.similarity.my_similarity.type", "IB")
|
.put("index.similarity.my_similarity.type", "IB")
|
||||||
.put("index.similarity.my_similarity.distribution", "spl")
|
.put("index.similarity.my_similarity.distribution", "spl")
|
||||||
.put("index.similarity.my_similarity.lambda", "ttf")
|
.put("index.similarity.my_similarity.lambda", "ttf")
|
||||||
.put("index.similarity.my_similarity.normalization", "h2")
|
.put("index.similarity.my_similarity.normalization", "h2")
|
||||||
.put("index.similarity.my_similarity.normalization.h2.c", 3f)
|
.put("index.similarity.my_similarity.normalization.h2.c", 3f)
|
||||||
.build();
|
.build();
|
||||||
IndexService indexService = createIndex("foo", indexSettings);
|
IndexService indexService = createIndex("foo", indexSettings);
|
||||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(IBSimilarityProvider.class));
|
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(IBSimilarityProvider.class));
|
||||||
@ -143,15 +149,15 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||||||
|
|
||||||
public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException {
|
public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException {
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
.startObject("properties")
|
.startObject("properties")
|
||||||
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
||||||
.endObject()
|
.endObject()
|
||||||
.endObject().endObject().string();
|
.endObject().endObject().string();
|
||||||
|
|
||||||
Settings indexSettings = Settings.settingsBuilder()
|
Settings indexSettings = Settings.settingsBuilder()
|
||||||
.put("index.similarity.my_similarity.type", "LMDirichlet")
|
.put("index.similarity.my_similarity.type", "LMDirichlet")
|
||||||
.put("index.similarity.my_similarity.mu", 3000f)
|
.put("index.similarity.my_similarity.mu", 3000f)
|
||||||
.build();
|
.build();
|
||||||
IndexService indexService = createIndex("foo", indexSettings);
|
IndexService indexService = createIndex("foo", indexSettings);
|
||||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMDirichletSimilarityProvider.class));
|
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMDirichletSimilarityProvider.class));
|
||||||
@ -162,15 +168,15 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||||||
|
|
||||||
public void testResolveSimilaritiesFromMapping_LMJelinekMercer() throws IOException {
|
public void testResolveSimilaritiesFromMapping_LMJelinekMercer() throws IOException {
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
.startObject("properties")
|
.startObject("properties")
|
||||||
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
||||||
.endObject()
|
.endObject()
|
||||||
.endObject().endObject().string();
|
.endObject().endObject().string();
|
||||||
|
|
||||||
Settings indexSettings = Settings.settingsBuilder()
|
Settings indexSettings = Settings.settingsBuilder()
|
||||||
.put("index.similarity.my_similarity.type", "LMJelinekMercer")
|
.put("index.similarity.my_similarity.type", "LMJelinekMercer")
|
||||||
.put("index.similarity.my_similarity.lambda", 0.7f)
|
.put("index.similarity.my_similarity.lambda", 0.7f)
|
||||||
.build();
|
.build();
|
||||||
IndexService indexService = createIndex("foo", indexSettings);
|
IndexService indexService = createIndex("foo", indexSettings);
|
||||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||||
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMJelinekMercerSimilarityProvider.class));
|
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMJelinekMercerSimilarityProvider.class));
|
||||||
@ -178,4 +184,47 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||||||
LMJelinekMercerSimilarity similarity = (LMJelinekMercerSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
|
LMJelinekMercerSimilarity similarity = (LMJelinekMercerSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
|
||||||
assertThat(similarity.getLambda(), equalTo(0.7f));
|
assertThat(similarity.getLambda(), equalTo(0.7f));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testResolveSimilaritiesFromMapping_Unknown() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties")
|
||||||
|
.startObject("field1").field("type", "string").field("similarity", "unknown_similarity").endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
|
||||||
|
IndexService indexService = createIndex("foo");
|
||||||
|
try {
|
||||||
|
indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||||
|
fail("Expected MappingParsingException");
|
||||||
|
} catch (MapperParsingException e) {
|
||||||
|
assertThat(e.getMessage(), equalTo("Unknown Similarity type [unknown_similarity] for [field1]"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSimilarityDefaultBackCompat() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties")
|
||||||
|
.startObject("field1")
|
||||||
|
.field("similarity", "default")
|
||||||
|
.field("type", "string")
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject().string();
|
||||||
|
Settings settings = Settings.builder()
|
||||||
|
.put(IndexMetaData.SETTING_VERSION_CREATED, VersionUtils.randomVersionBetween(random(), Version.V_2_0_0, Version.V_2_2_0))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
DocumentMapperParser parser = createIndex("test_v2.x", settings).mapperService().documentMapperParser();
|
||||||
|
DocumentMapper documentMapper = parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(ClassicSimilarityProvider.class));
|
||||||
|
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().name(), equalTo("classic"));
|
||||||
|
|
||||||
|
parser = createIndex("test_v3.x").mapperService().documentMapperParser();
|
||||||
|
try {
|
||||||
|
parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
fail("Expected MappingParsingException");
|
||||||
|
} catch (MapperParsingException e) {
|
||||||
|
assertThat(e.getMessage(), equalTo("Unknown Similarity type [default] for [field1]"));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
|
|||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
import static org.hamcrest.Matchers.not;
|
import static org.hamcrest.Matchers.not;
|
||||||
|
|
||||||
public class SimilarityIT extends ESIntegTestCase {
|
public class SimilarityIT extends ESIntegTestCase {
|
||||||
public void testCustomBM25Similarity() throws Exception {
|
public void testCustomBM25Similarity() throws Exception {
|
||||||
try {
|
try {
|
||||||
client().admin().indices().prepareDelete("test").execute().actionGet();
|
client().admin().indices().prepareDelete("test").execute().actionGet();
|
||||||
@ -45,7 +45,7 @@ public class SimilarityIT extends ESIntegTestCase {
|
|||||||
.field("type", "string")
|
.field("type", "string")
|
||||||
.endObject()
|
.endObject()
|
||||||
.startObject("field2")
|
.startObject("field2")
|
||||||
.field("similarity", "default")
|
.field("similarity", "classic")
|
||||||
.field("type", "string")
|
.field("type", "string")
|
||||||
.endObject()
|
.endObject()
|
||||||
.endObject()
|
.endObject()
|
||||||
|
@ -48,10 +48,10 @@ Here we configure the DFRSimilarity so it can be referenced as
|
|||||||
=== Available similarities
|
=== Available similarities
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
[[default-similarity]]
|
[[classic-similarity]]
|
||||||
==== Default similarity
|
==== Classic similarity
|
||||||
|
|
||||||
The default similarity that is based on the TF/IDF model. This
|
The classic similarity that is based on the TF/IDF model. This
|
||||||
similarity has the following option:
|
similarity has the following option:
|
||||||
|
|
||||||
`discount_overlaps`::
|
`discount_overlaps`::
|
||||||
@ -59,7 +59,7 @@ similarity has the following option:
|
|||||||
0 position increment) are ignored when computing norm. By default this
|
0 position increment) are ignored when computing norm. By default this
|
||||||
is true, meaning overlap tokens do not count when computing norms.
|
is true, meaning overlap tokens do not count when computing norms.
|
||||||
|
|
||||||
Type name: `default`
|
Type name: `classic`
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
[[bm25]]
|
[[bm25]]
|
||||||
|
@ -15,7 +15,7 @@ similarities. For more details about this expert options, see the
|
|||||||
The only similarities which can be used out of the box, without any further
|
The only similarities which can be used out of the box, without any further
|
||||||
configuration are:
|
configuration are:
|
||||||
|
|
||||||
`default`::
|
`classic`::
|
||||||
The Default TF/IDF algorithm used by Elasticsearch and
|
The Default TF/IDF algorithm used by Elasticsearch and
|
||||||
Lucene. See {defguide}/practical-scoring-function.html[Lucene’s Practical Scoring Function]
|
Lucene. See {defguide}/practical-scoring-function.html[Lucene’s Practical Scoring Function]
|
||||||
for more information.
|
for more information.
|
||||||
@ -49,6 +49,6 @@ PUT my_index
|
|||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// AUTOSENSE
|
// AUTOSENSE
|
||||||
<1> The `default_field` uses the `default` similarity (ie TF/IDF).
|
<1> The `default_field` uses the `classic` similarity (ie TF/IDF).
|
||||||
<2> The `bm25_field` uses the `BM25` similarity.
|
<2> The `bm25_field` uses the `BM25` similarity.
|
||||||
|
|
||||||
|
@ -166,7 +166,7 @@ Defaults depend on the <<mapping-index,`index`>> setting:
|
|||||||
<<similarity,`similarity`>>::
|
<<similarity,`similarity`>>::
|
||||||
|
|
||||||
Which scoring algorithm or _similarity_ should be used. Defaults
|
Which scoring algorithm or _similarity_ should be used. Defaults
|
||||||
to `default`, which uses TF/IDF.
|
to `classic`, which uses TF/IDF.
|
||||||
|
|
||||||
<<term-vector,`term_vector`>>::
|
<<term-vector,`term_vector`>>::
|
||||||
|
|
||||||
|
@ -224,6 +224,10 @@ Allocation settings deprecated in 1.x have been removed:
|
|||||||
|
|
||||||
Please change the setting in your configuration files or in the clusterstate to use the new settings instead.
|
Please change the setting in your configuration files or in the clusterstate to use the new settings instead.
|
||||||
|
|
||||||
|
==== Similarity settings
|
||||||
|
|
||||||
|
The 'default' similarity has been renamed to 'classic'.
|
||||||
|
|
||||||
[[breaking_30_mapping_changes]]
|
[[breaking_30_mapping_changes]]
|
||||||
=== Mapping changes
|
=== Mapping changes
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user