Update lucene to r1725675
Adds DFI (divergence from independence) provider. Fixes test bugs passing invalid values for BM25 parameters.
This commit is contained in:
parent
00bc239eec
commit
6e7e3a2274
|
@ -1,5 +1,5 @@
|
|||
elasticsearch = 3.0.0-SNAPSHOT
|
||||
lucene = 5.5.0-snapshot-1721183
|
||||
lucene = 5.5.0-snapshot-1725675
|
||||
|
||||
# optional dependencies
|
||||
spatial4j = 0.5
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.DFISimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
||||
/**
|
||||
* {@link SimilarityProvider} for the {@link DFISimilarity}.
|
||||
* <p>
|
||||
* Configuration options available:
|
||||
* <ul>
|
||||
* <li>discount_overlaps</li>
|
||||
* </ul>
|
||||
* @see DFISimilarity For more information about configuration
|
||||
*/
|
||||
public class DFISimilarityProvider extends AbstractSimilarityProvider {
|
||||
|
||||
private final DFISimilarity similarity;
|
||||
|
||||
public DFISimilarityProvider(String name, Settings settings) {
|
||||
super(name);
|
||||
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
|
||||
|
||||
this.similarity = new DFISimilarity();
|
||||
this.similarity.setDiscountOverlaps(discountOverlaps);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity get() {
|
||||
return similarity;
|
||||
}
|
||||
}
|
|
@ -52,6 +52,7 @@ public final class SimilarityService extends AbstractIndexComponent {
|
|||
buildIn.put("IB", IBSimilarityProvider::new);
|
||||
buildIn.put("LMDirichlet", LMDirichletSimilarityProvider::new);
|
||||
buildIn.put("LMJelinekMercer", LMJelinekMercerSimilarityProvider::new);
|
||||
buildIn.put("DFI", DFISimilarityProvider::new);
|
||||
DEFAULTS = Collections.unmodifiableMap(defaults);
|
||||
BUILT_IN = Collections.unmodifiableMap(buildIn);
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ grant codeBase "${codebase.securesm-1.0.jar}" {
|
|||
//// Very special jar permissions:
|
||||
//// These are dangerous permissions that we don't want to grant to everything.
|
||||
|
||||
grant codeBase "${codebase.lucene-core-5.5.0-snapshot-1721183.jar}" {
|
||||
grant codeBase "${codebase.lucene-core-5.5.0-snapshot-1725675.jar}" {
|
||||
// needed to allow MMapDirectory's "unmap hack"
|
||||
permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
|
||||
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
|
||||
|
|
|
@ -31,7 +31,7 @@ grant codeBase "${codebase.securemock-1.2.jar}" {
|
|||
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
|
||||
};
|
||||
|
||||
grant codeBase "${codebase.lucene-test-framework-5.5.0-snapshot-1721183.jar}" {
|
||||
grant codeBase "${codebase.lucene-test-framework-5.5.0-snapshot-1725675.jar}" {
|
||||
// needed by RamUsageTester
|
||||
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
|
||||
};
|
||||
|
|
|
@ -179,6 +179,8 @@ public class AnalysisFactoryTests extends ESTestCase {
|
|||
put("typeaspayload", Void.class);
|
||||
// fingerprint
|
||||
put("fingerprint", Void.class);
|
||||
// for tee-sinks
|
||||
put("daterecognizer", Void.class);
|
||||
}};
|
||||
|
||||
public void testTokenFilters() {
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.index.similarity;
|
||||
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.DFISimilarity;
|
||||
import org.apache.lucene.search.similarities.AfterEffectL;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.BasicModelG;
|
||||
|
@ -38,6 +39,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
|
|||
import org.elasticsearch.index.IndexService;
|
||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.DocumentMapperParser;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
|
@ -93,7 +95,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
Settings indexSettings = Settings.settingsBuilder()
|
||||
.put("index.similarity.my_similarity.type", "BM25")
|
||||
.put("index.similarity.my_similarity.k1", 2.0f)
|
||||
.put("index.similarity.my_similarity.b", 1.5f)
|
||||
.put("index.similarity.my_similarity.b", 0.5f)
|
||||
.put("index.similarity.my_similarity.discount_overlaps", false)
|
||||
.build();
|
||||
IndexService indexService = createIndex("foo", indexSettings);
|
||||
|
@ -102,7 +104,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
|
||||
BM25Similarity similarity = (BM25Similarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
|
||||
assertThat(similarity.getK1(), equalTo(2.0f));
|
||||
assertThat(similarity.getB(), equalTo(1.5f));
|
||||
assertThat(similarity.getB(), equalTo(0.5f));
|
||||
assertThat(similarity.getDiscountOverlaps(), equalTo(false));
|
||||
}
|
||||
|
||||
|
@ -156,6 +158,23 @@ public class SimilarityTests extends ESSingleNodeTestCase {
|
|||
assertThat(((NormalizationH2) similarity.getNormalization()).getC(), equalTo(3f));
|
||||
}
|
||||
|
||||
public void testResolveSimilaritiesFromMapping_DFI() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
|
||||
.endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
Settings indexSettings = Settings.settingsBuilder()
|
||||
.put("index.similarity.my_similarity.type", "DFI")
|
||||
.build();
|
||||
IndexService indexService = createIndex("foo", indexSettings);
|
||||
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
|
||||
MappedFieldType fieldType = documentMapper.mappers().getMapper("field1").fieldType();
|
||||
assertThat(fieldType.similarity(), instanceOf(DFISimilarityProvider.class));
|
||||
assertThat(fieldType.similarity().get(), instanceOf(DFISimilarity.class));
|
||||
}
|
||||
|
||||
public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
|
|
|
@ -55,7 +55,7 @@ public class SimilarityIT extends ESIntegTestCase {
|
|||
.put("index.number_of_replicas", 0)
|
||||
.put("similarity.custom.type", "BM25")
|
||||
.put("similarity.custom.k1", 2.0f)
|
||||
.put("similarity.custom.b", 1.5f)
|
||||
.put("similarity.custom.b", 0.5f)
|
||||
).execute().actionGet();
|
||||
|
||||
client().prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox jumped over the lazy dog",
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
69e187ef1d2d9c9570363eb4186821e0341df5b8
|
|
@ -0,0 +1 @@
|
|||
528a695bb8882dbc3d9866335ac1bb3905cba4e3
|
|
@ -1 +0,0 @@
|
|||
0fa00a45ff9bc6a4df44db81f2e4e44ea94bf88e
|
|
@ -0,0 +1 @@
|
|||
3fb1bcc1001a10b74ae91848c8558572891c1409
|
|
@ -1 +0,0 @@
|
|||
f6854c65c7f4c6d9de583f4daa4fd3ae8a3800f1
|
|
@ -0,0 +1 @@
|
|||
9eff7f186877882f8b68f031f610bd7ab8c5c1fb
|
|
@ -1 +0,0 @@
|
|||
e996e6c723eb415ba2cfa7f5e98bbf194a4918dd
|
|
@ -0,0 +1 @@
|
|||
6e6253936522f27b35ba4d8485806f517ef2df45
|
|
@ -1 +0,0 @@
|
|||
3b7a5d97b10885f16eb53deb15d64c942b9f9fdb
|
|
@ -0,0 +1 @@
|
|||
8a313aa34b0070d3f7d48005e7677b680db1b09d
|
|
@ -1 +0,0 @@
|
|||
e4dda3eeb76e340aa4713a3b20d68c4a1504e505
|
|
@ -0,0 +1 @@
|
|||
bf4c5a17cfb265d321ef4cfb0f3d7c1a6a6651de
|
|
@ -1 +0,0 @@
|
|||
800442a5d7612ce4c8748831871b4d436a50554e
|
|
@ -0,0 +1 @@
|
|||
2713a319d0aa696c65a32a36fda830bc482a5880
|
|
@ -1 +0,0 @@
|
|||
bdf184de9b5773c7af3ae908af78eeb1e512470c
|
|
@ -0,0 +1 @@
|
|||
88251ecdbf877c15a94d4013aa5157f5b5ce4cea
|
|
@ -1 +0,0 @@
|
|||
fc59de52bd2c7e420edfd235723cb8b0dd44e92d
|
|
@ -0,0 +1 @@
|
|||
bf9e522244c7c4eee6c3bcc3212ff057f7b88000
|
|
@ -1 +0,0 @@
|
|||
1d341e6a4f11f3170773ccffdbe6815b45967e3d
|
|
@ -0,0 +1 @@
|
|||
12d71cf10a4b79231dc488af16d723dfca5ab64b
|
|
@ -1 +0,0 @@
|
|||
a1b02c2b595ac92f45f0d2be03841a3a7fcae1f1
|
|
@ -0,0 +1 @@
|
|||
f903d67d042904527a7e2e8a75c55afe36a04251
|
|
@ -1 +0,0 @@
|
|||
e3ea422b56734329fb6974e9cf9f66478adb5793
|
|
@ -0,0 +1 @@
|
|||
2f5758bbcf97048ab62d2d4ae73867d06f1ed03f
|
|
@ -1 +0,0 @@
|
|||
5eadbd4e63120b59ab6445e39489205f98420471
|
|
@ -0,0 +1 @@
|
|||
2cc29e4658be151658fac6e5ed7915982b6de861
|
|
@ -1 +0,0 @@
|
|||
a336287e65d082535f02a8427666dbe46b1b9b74
|
|
@ -0,0 +1 @@
|
|||
f490a09ca056aba42e8751a469ef114df64aae0d
|
|
@ -107,6 +107,13 @@ All options but the first option need a normalization value.
|
|||
|
||||
Type name: `DFR`
|
||||
|
||||
[float]
|
||||
[[dfi]]
|
||||
==== DFI similarity
|
||||
|
||||
Similarity that implements the http://trec.nist.gov/pubs/trec21/papers/irra.web.nb.pdf[divergence from independence]
|
||||
model (normalized chi-squared distance)
|
||||
|
||||
[float]
|
||||
[[ib]]
|
||||
==== IB similarity.
|
||||
|
|
|
@ -8,7 +8,7 @@ algorithm other than the default TF/IDF, such as `BM25`.
|
|||
Similarities are mostly useful for <<string,`string`>> fields, especially
|
||||
`analyzed` string fields, but can also apply to other field types.
|
||||
|
||||
Custom similarites can be configured by tuning the parameters of the built-in
|
||||
Custom similarities can be configured by tuning the parameters of the built-in
|
||||
similarities. For more details about this expert options, see the
|
||||
<<index-modules-similarity,similarity module>>.
|
||||
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
60e056d2dd04a81440482b047af0737bc41593d9
|
|
@ -0,0 +1 @@
|
|||
31db8e49e4089772eae8ab2db0ac59bab6fbcd2f
|
|
@ -1 +0,0 @@
|
|||
1fce4e9b5c4482bb95e8b275c825d112640d6f1e
|
|
@ -0,0 +1 @@
|
|||
4504d3d993f094ed70585124df56c2be86c2615a
|
|
@ -1 +0,0 @@
|
|||
f104f306fef9d3033db026705043e9cbd145aba5
|
|
@ -0,0 +1 @@
|
|||
15555d41d27bb398b6736be85a5eca4ca224b85d
|
|
@ -1 +0,0 @@
|
|||
40b2034a6aed4c3fe0509016fab4f7bbb37a5fc8
|
|
@ -0,0 +1 @@
|
|||
9d43a338338a6c88e8071a0e3eeb51f4d9d0364a
|
|
@ -1 +0,0 @@
|
|||
e117a87f4338be80b0a052d2ce454d5086aa57f1
|
|
@ -0,0 +1 @@
|
|||
b66c95032c5ca41ce7b85519c64aab4e9a233f78
|
|
@ -1 +0,0 @@
|
|||
703dd91fccdc1c4662c80e412a449097c0578d83
|
|
@ -0,0 +1 @@
|
|||
4f41bacd77ce372f10f2c57ab516b2ce9aa71173
|
Loading…
Reference in New Issue