Update lucene to r1725675

Adds DFI (divergence from independence) provider.
Fixes test bugs passing invalid values for BM25 parameters.
This commit is contained in:
Robert Muir 2016-01-20 03:32:51 -05:00
parent 00bc239eec
commit 6e7e3a2274
50 changed files with 107 additions and 27 deletions

View File

@ -1,5 +1,5 @@
elasticsearch = 3.0.0-SNAPSHOT
lucene = 5.5.0-snapshot-1721183
lucene = 5.5.0-snapshot-1725675
# optional dependencies
spatial4j = 0.5

View File

@ -0,0 +1,51 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.DFISimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.common.settings.Settings;
/**
* {@link SimilarityProvider} for the {@link DFISimilarity}.
* <p>
* Configuration options available:
* <ul>
* <li>discount_overlaps</li>
* </ul>
* @see DFISimilarity For more information about configuration
*/
public class DFISimilarityProvider extends AbstractSimilarityProvider {
private final DFISimilarity similarity;
public DFISimilarityProvider(String name, Settings settings) {
super(name);
boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);
this.similarity = new DFISimilarity();
this.similarity.setDiscountOverlaps(discountOverlaps);
}
@Override
public Similarity get() {
return similarity;
}
}

View File

@ -52,6 +52,7 @@ public final class SimilarityService extends AbstractIndexComponent {
buildIn.put("IB", IBSimilarityProvider::new);
buildIn.put("LMDirichlet", LMDirichletSimilarityProvider::new);
buildIn.put("LMJelinekMercer", LMJelinekMercerSimilarityProvider::new);
buildIn.put("DFI", DFISimilarityProvider::new);
DEFAULTS = Collections.unmodifiableMap(defaults);
BUILT_IN = Collections.unmodifiableMap(buildIn);
}

View File

@ -31,7 +31,7 @@ grant codeBase "${codebase.securesm-1.0.jar}" {
//// Very special jar permissions:
//// These are dangerous permissions that we don't want to grant to everything.
grant codeBase "${codebase.lucene-core-5.5.0-snapshot-1721183.jar}" {
grant codeBase "${codebase.lucene-core-5.5.0-snapshot-1725675.jar}" {
// needed to allow MMapDirectory's "unmap hack"
permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";

View File

@ -31,7 +31,7 @@ grant codeBase "${codebase.securemock-1.2.jar}" {
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
};
grant codeBase "${codebase.lucene-test-framework-5.5.0-snapshot-1721183.jar}" {
grant codeBase "${codebase.lucene-test-framework-5.5.0-snapshot-1725675.jar}" {
// needed by RamUsageTester
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
};

View File

@ -179,6 +179,8 @@ public class AnalysisFactoryTests extends ESTestCase {
put("typeaspayload", Void.class);
// fingerprint
put("fingerprint", Void.class);
// for tee-sinks
put("daterecognizer", Void.class);
}};
public void testTokenFilters() {

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.DFISimilarity;
import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BasicModelG;
@ -38,6 +39,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
@ -93,7 +95,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
Settings indexSettings = Settings.settingsBuilder()
.put("index.similarity.my_similarity.type", "BM25")
.put("index.similarity.my_similarity.k1", 2.0f)
.put("index.similarity.my_similarity.b", 1.5f)
.put("index.similarity.my_similarity.b", 0.5f)
.put("index.similarity.my_similarity.discount_overlaps", false)
.build();
IndexService indexService = createIndex("foo", indexSettings);
@ -102,7 +104,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
BM25Similarity similarity = (BM25Similarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get();
assertThat(similarity.getK1(), equalTo(2.0f));
assertThat(similarity.getB(), equalTo(1.5f));
assertThat(similarity.getB(), equalTo(0.5f));
assertThat(similarity.getDiscountOverlaps(), equalTo(false));
}
@ -156,6 +158,23 @@ public class SimilarityTests extends ESSingleNodeTestCase {
assertThat(((NormalizationH2) similarity.getNormalization()).getC(), equalTo(3f));
}
public void testResolveSimilaritiesFromMapping_DFI() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field1").field("type", "string").field("similarity", "my_similarity").endObject()
.endObject()
.endObject().endObject().string();
Settings indexSettings = Settings.settingsBuilder()
.put("index.similarity.my_similarity.type", "DFI")
.build();
IndexService indexService = createIndex("foo", indexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
MappedFieldType fieldType = documentMapper.mappers().getMapper("field1").fieldType();
assertThat(fieldType.similarity(), instanceOf(DFISimilarityProvider.class));
assertThat(fieldType.similarity().get(), instanceOf(DFISimilarity.class));
}
public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")

View File

@ -55,7 +55,7 @@ public class SimilarityIT extends ESIntegTestCase {
.put("index.number_of_replicas", 0)
.put("similarity.custom.type", "BM25")
.put("similarity.custom.k1", 2.0f)
.put("similarity.custom.b", 1.5f)
.put("similarity.custom.b", 0.5f)
).execute().actionGet();
client().prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox jumped over the lazy dog",

View File

@ -1 +0,0 @@
69e187ef1d2d9c9570363eb4186821e0341df5b8

View File

@ -0,0 +1 @@
528a695bb8882dbc3d9866335ac1bb3905cba4e3

View File

@ -1 +0,0 @@
0fa00a45ff9bc6a4df44db81f2e4e44ea94bf88e

View File

@ -0,0 +1 @@
3fb1bcc1001a10b74ae91848c8558572891c1409

View File

@ -1 +0,0 @@
f6854c65c7f4c6d9de583f4daa4fd3ae8a3800f1

View File

@ -0,0 +1 @@
9eff7f186877882f8b68f031f610bd7ab8c5c1fb

View File

@ -1 +0,0 @@
e996e6c723eb415ba2cfa7f5e98bbf194a4918dd

View File

@ -0,0 +1 @@
6e6253936522f27b35ba4d8485806f517ef2df45

View File

@ -1 +0,0 @@
3b7a5d97b10885f16eb53deb15d64c942b9f9fdb

View File

@ -0,0 +1 @@
8a313aa34b0070d3f7d48005e7677b680db1b09d

View File

@ -1 +0,0 @@
e4dda3eeb76e340aa4713a3b20d68c4a1504e505

View File

@ -0,0 +1 @@
bf4c5a17cfb265d321ef4cfb0f3d7c1a6a6651de

View File

@ -1 +0,0 @@
800442a5d7612ce4c8748831871b4d436a50554e

View File

@ -0,0 +1 @@
2713a319d0aa696c65a32a36fda830bc482a5880

View File

@ -1 +0,0 @@
bdf184de9b5773c7af3ae908af78eeb1e512470c

View File

@ -0,0 +1 @@
88251ecdbf877c15a94d4013aa5157f5b5ce4cea

View File

@ -1 +0,0 @@
fc59de52bd2c7e420edfd235723cb8b0dd44e92d

View File

@ -0,0 +1 @@
bf9e522244c7c4eee6c3bcc3212ff057f7b88000

View File

@ -1 +0,0 @@
1d341e6a4f11f3170773ccffdbe6815b45967e3d

View File

@ -0,0 +1 @@
12d71cf10a4b79231dc488af16d723dfca5ab64b

View File

@ -1 +0,0 @@
a1b02c2b595ac92f45f0d2be03841a3a7fcae1f1

View File

@ -0,0 +1 @@
f903d67d042904527a7e2e8a75c55afe36a04251

View File

@ -1 +0,0 @@
e3ea422b56734329fb6974e9cf9f66478adb5793

View File

@ -0,0 +1 @@
2f5758bbcf97048ab62d2d4ae73867d06f1ed03f

View File

@ -1 +0,0 @@
5eadbd4e63120b59ab6445e39489205f98420471

View File

@ -0,0 +1 @@
2cc29e4658be151658fac6e5ed7915982b6de861

View File

@ -1 +0,0 @@
a336287e65d082535f02a8427666dbe46b1b9b74

View File

@ -0,0 +1 @@
f490a09ca056aba42e8751a469ef114df64aae0d

View File

@ -107,6 +107,13 @@ All options but the first option need a normalization value.
Type name: `DFR`
[float]
[[dfi]]
==== DFI similarity
Similarity that implements the http://trec.nist.gov/pubs/trec21/papers/irra.web.nb.pdf[divergence from independence]
model (normalized chi-squared distance)
[float]
[[ib]]
==== IB similarity.

View File

@ -8,7 +8,7 @@ algorithm other than the default TF/IDF, such as `BM25`.
Similarities are mostly useful for <<string,`string`>> fields, especially
`analyzed` string fields, but can also apply to other field types.
Custom similarites can be configured by tuning the parameters of the built-in
Custom similarities can be configured by tuning the parameters of the built-in
similarities. For more details about this expert options, see the
<<index-modules-similarity,similarity module>>.

View File

@ -1 +0,0 @@
60e056d2dd04a81440482b047af0737bc41593d9

View File

@ -0,0 +1 @@
31db8e49e4089772eae8ab2db0ac59bab6fbcd2f

View File

@ -1 +0,0 @@
1fce4e9b5c4482bb95e8b275c825d112640d6f1e

View File

@ -0,0 +1 @@
4504d3d993f094ed70585124df56c2be86c2615a

View File

@ -1 +0,0 @@
f104f306fef9d3033db026705043e9cbd145aba5

View File

@ -0,0 +1 @@
15555d41d27bb398b6736be85a5eca4ca224b85d

View File

@ -1 +0,0 @@
40b2034a6aed4c3fe0509016fab4f7bbb37a5fc8

View File

@ -0,0 +1 @@
9d43a338338a6c88e8071a0e3eeb51f4d9d0364a

View File

@ -1 +0,0 @@
e117a87f4338be80b0a052d2ce454d5086aa57f1

View File

@ -0,0 +1 @@
b66c95032c5ca41ce7b85519c64aab4e9a233f78

View File

@ -1 +0,0 @@
703dd91fccdc1c4662c80e412a449097c0578d83

View File

@ -0,0 +1 @@
4f41bacd77ce372f10f2c57ab516b2ce9aa71173