add lucene language model similarities (Dirichlet & JelinekMercer)
This commit is contained in:
parent
9df655adb2
commit
ecab74fe6c
|
@ -121,6 +121,31 @@ based model] . This similarity has the following options:
|
|||
|
||||
Type name: `IB`
|
||||
|
||||
[float]
|
||||
[[lm_dirichlet]]
|
||||
==== LM Dirichlet similarity.
|
||||
|
||||
http://lucene.apache.org/core/4_7_1/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html[LM
|
||||
Dirichlet similarity] . This similarity has the following options:
|
||||
|
||||
[horizontal]
|
||||
`mu`:: Default to `2000`.
|
||||
|
||||
Type name: `LMDirichlet`
|
||||
|
||||
[float]
|
||||
[[lm_jelinek_mercer]]
|
||||
==== LM Jelinek Mercer similarity.
|
||||
|
||||
http://lucene.apache.org/core/4_7_1/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html[LM
|
||||
Jelinek Mercer similarity] . This similarity has the following options:
|
||||
|
||||
[horizontal]
|
||||
`lambda`:: The optimal value depends on both the collection and the query. The optimal value is around `0.1`
|
||||
for title queries and `0.7` for long queries. Default to `0.1`.
|
||||
|
||||
Type name: `LMJelinekMercer`
|
||||
|
||||
[float]
|
||||
[[default-base]]
|
||||
==== Default and Base Similarities
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for {@link LMDirichletSimilarity}.
|
||||
* <p/>
|
||||
* Configuration options available:
|
||||
* <ul>
|
||||
* <li>mu</li>
|
||||
* </ul>
|
||||
* @see LMDirichletSimilarity For more information about configuration
|
||||
*/
|
||||
public class LMDirichletSimilarityProvider extends AbstractSimilarityProvider {
|
||||
|
||||
private final LMDirichletSimilarity similarity;
|
||||
|
||||
@Inject
|
||||
public LMDirichletSimilarityProvider(@Assisted String name, @Assisted Settings settings) {
|
||||
super(name);
|
||||
float mu = settings.getAsFloat("mu", 2000f);
|
||||
this.similarity = new LMDirichletSimilarity(mu);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return similarity;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for {@link LMJelinekMercerSimilarity}.
|
||||
* <p/>
|
||||
* Configuration options available:
|
||||
* <ul>
|
||||
* <li>lambda</li>
|
||||
* </ul>
|
||||
* @see LMJelinekMercerSimilarity For more information about configuration
|
||||
*/
|
||||
public class LMJelinekMercerSimilarityProvider extends AbstractSimilarityProvider {
|
||||
|
||||
private final LMJelinekMercerSimilarity similarity;
|
||||
|
||||
@Inject
|
||||
public LMJelinekMercerSimilarityProvider(@Assisted String name, @Assisted Settings settings) {
|
||||
super(name);
|
||||
float lambda = settings.getAsFloat("lambda", 0.1f);
|
||||
this.similarity = new LMJelinekMercerSimilarity(lambda);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return similarity;
|
||||
}
|
||||
}
|
|
@ -151,6 +151,47 @@ public class SimilarityTests extends ElasticsearchTestCase {
|
|||
assertThat(((NormalizationH2) similarity.getNormalization()).getC(), equalTo(3f));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
||||
.endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
Settings indexSettings = ImmutableSettings.settingsBuilder()
|
||||
.put("index.similarity.my_similarity.type", "LMDirichlet")
|
||||
.put("index.similarity.my_similarity.mu", 3000f)
|
||||
.build();
|
||||
SimilarityService similarityService = similarityService(indexSettings);
|
||||
DocumentMapper documentMapper = similarityService.mapperService().documentMapperParser().parse(mapping);
|
||||
assertThat(documentMapper.mappers().name("field1").mapper().similarity(), instanceOf(LMDirichletSimilarityProvider.class));
|
||||
|
||||
LMDirichletSimilarity similarity = (LMDirichletSimilarity) documentMapper.mappers().name("field1").mapper().similarity().get();
|
||||
assertThat(similarity.getMu(), equalTo(3000f));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testResolveSimilaritiesFromMapping_LMJelinekMercer() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
||||
.endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
Settings indexSettings = ImmutableSettings.settingsBuilder()
|
||||
.put("index.similarity.my_similarity.type", "LMJelinekMercer")
|
||||
.put("index.similarity.my_similarity.lambda", 0.7f)
|
||||
.build();
|
||||
SimilarityService similarityService = similarityService(indexSettings);
|
||||
DocumentMapper documentMapper = similarityService.mapperService().documentMapperParser().parse(mapping);
|
||||
assertThat(documentMapper.mappers().name("field1").mapper().similarity(), instanceOf(LMJelinekMercerSimilarityProvider.class));
|
||||
|
||||
LMJelinekMercerSimilarity similarity = (LMJelinekMercerSimilarity) documentMapper.mappers().name("field1").mapper().similarity().get();
|
||||
assertThat(similarity.getLambda(), equalTo(0.7f));
|
||||
}
|
||||
|
||||
|
||||
private static SimilarityService similarityService() {
|
||||
return similarityService(ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue