Update to lucene-8.0.0-snapshot-c78429a554 (#36212)

Includes:

* A fix for a bug in Intervals.or() (https://issues.apache.org/jira/browse/LUCENE-8586)
* The ability to disable offset mangling in WordDelimiterGraphFilter
        (https://issues.apache.org/jira/browse/LUCENE-8509)
* BM25Similarity no longer multiplies scores by k1 + 1
This commit is contained in:
Alan Woodward 2018-12-05 12:43:56 +00:00 committed by GitHub
parent d2886e1c81
commit 73ceaad03a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
59 changed files with 60 additions and 59 deletions

View File

@ -1,5 +1,5 @@
elasticsearch = 7.0.0
lucene = 8.0.0-snapshot-67cdd21996
lucene = 8.0.0-snapshot-c78429a554
# optional dependencies
spatial4j = 0.7

View File

@ -200,13 +200,13 @@ now returns matches from the new index:
},
"hits": {
"total": 1,
"max_score": 0.2876821,
"max_score": 0.13076457,
"hits": [
{
"_index": "new_index", <1>
"_type": "_doc",
"_id": "1",
"_score": 0.2876821,
"_score": 0.13076457,
"_source": {
"query": {
"match": {
@ -395,13 +395,13 @@ This results in a response like this:
},
"hits": {
"total": 1,
"max_score": 0.2876821,
"max_score": 0.13076457,
"hits": [
{
"_index": "test_index",
"_type": "_doc",
"_id": "1",
"_score": 0.2876821,
"_score": 0.13076457,
"_source": {
"query": {
"match": {
@ -554,13 +554,13 @@ GET /my_queries1/_search
},
"hits": {
"total": 1,
"max_score": 0.41501677,
"max_score": 0.18864399,
"hits": [
{
"_index": "my_queries1",
"_type": "_doc",
"_id": "1",
"_score": 0.41501677,
"_score": 0.18864399,
"_source": {
"query": {
"term": {

View File

@ -90,13 +90,13 @@ The above request will yield the following response:
},
"hits": {
"total": 1,
"max_score": 0.5753642,
"max_score": 0.26152915,
"hits": [
{ <1>
"_index": "my-index",
"_type": "_doc",
"_id": "1",
"_score": 0.5753642,
"_score": 0.26152915,
"_source": {
"query": {
"match": {
@ -238,13 +238,13 @@ GET /my-index/_search
},
"hits": {
"total": 1,
"max_score": 1.5606477,
"max_score": 0.7093853,
"hits": [
{
"_index": "my-index",
"_type": "_doc",
"_id": "1",
"_score": 1.5606477,
"_score": 0.7093853,
"_source": {
"query": {
"match": {
@ -418,13 +418,13 @@ This will yield the following response.
},
"hits": {
"total": 2,
"max_score": 0.5753642,
"max_score": 0.26152915,
"hits": [
{
"_index": "my-index",
"_type": "_doc",
"_id": "3",
"_score": 0.5753642,
"_score": 0.26152915,
"_source": {
"query": {
"match": {
@ -445,7 +445,7 @@ This will yield the following response.
"_index": "my-index",
"_type": "_doc",
"_id": "4",
"_score": 0.5753642,
"_score": 0.26152915,
"_source": {
"query": {
"match": {
@ -523,13 +523,13 @@ The slightly different response:
},
"hits": {
"total": 1,
"max_score": 1.5606477,
"max_score": 0.7093853,
"hits": [
{
"_index": "my-index",
"_type": "_doc",
"_id": "1",
"_score": 1.5606477,
"_score": 0.7093853,
"_source": {
"query": {
"match": {
@ -619,13 +619,13 @@ The above search request returns a response similar to this:
},
"hits": {
"total": 1,
"max_score": 0.5753642,
"max_score": 0.26152915,
"hits": [
{
"_index": "my-index",
"_type": "_doc",
"_id": "1",
"_score": 0.5753642,
"_score": 0.26152915,
"_source": {
"query": {
"match": {

View File

@ -43,7 +43,7 @@ This will yield the following result:
"details":[
{
"value":2.2,
"description":"scaling factor, k1 + 1",
"description":"boost",
"details":[]
},
{

View File

@ -99,7 +99,7 @@ public class WordDelimiterGraphTokenFilterFactory extends AbstractTokenFilterFac
@Override
public TokenStream create(TokenStream tokenStream) {
return new WordDelimiterGraphFilter(tokenStream, charTypeTable, flags, protoWords);
return new WordDelimiterGraphFilter(tokenStream, true, charTypeTable, flags, protoWords);
}
@Override

View File

@ -1 +0,0 @@
65b85d26f4eb4d23b98aaeffc9b1054c23d0227b

View File

@ -0,0 +1 @@
4a1574a3d3fcb950b440e36b3035f90885794bbf

View File

@ -107,7 +107,7 @@ public class PainlessExecuteApiTests extends ESSingleNodeTestCase {
"Math.round((_score + (doc['rank'].value / params.max_rank)) * 100.0) / 100.0", singletonMap("max_rank", 5.0)), "score",
contextSetup);
Response response = innerShardOperation(request, scriptService, indexService);
assertThat(response.getResult(), equalTo(1.09D));
assertThat(response.getResult(), equalTo(0.93D));
}
}

View File

@ -1 +0,0 @@
2c31180c0afaf7ce10244175c68a9189e57b456b

View File

@ -0,0 +1 @@
428b4a9e84b4e903dfadb4dd1e1ef2cdd98cce08

View File

@ -1 +0,0 @@
d39dee7d510aecb9437a1e438ec19cf4398d8792

View File

@ -0,0 +1 @@
d08ee1049d04f672175ea9ba3132f7eaa98d9742

View File

@ -1 +0,0 @@
1f3ce32163fbf344f82d18b61715dc0891c22e00

View File

@ -0,0 +1 @@
841a9bd3a0e12b15b700c0655a76e4035d3128ae

View File

@ -1 +0,0 @@
6d378fb5b5a904cd3e3a1b1f3bab8b7c5cbc9d85

View File

@ -0,0 +1 @@
e9bfd4935d1a5d55154cb99a066a03797174bc33

View File

@ -1 +0,0 @@
df4957389f85da32b553dd901f30767879a507f2

View File

@ -0,0 +1 @@
6a933a5113a708229177463c94d53ea544414a53

View File

@ -1 +0,0 @@
210ea4e9423e03cd3f6ea9b8e81cab727101d3cb

View File

@ -0,0 +1 @@
7709b470601b0c1a77fdcd5dd9ce9f48aba3db78

View File

@ -1 +0,0 @@
3c345959ae03ae458be1590c2ac782b2a621abb2

View File

@ -0,0 +1 @@
00062c609614d7229c5869d7d8988674ffaea350

View File

@ -1 +0,0 @@
1e557f096cd55fd1f20104b1fb4c0d0095e03fd2

View File

@ -0,0 +1 @@
ada03def6399ef5606a77c93ee45514701b98987

View File

@ -1 +0,0 @@
77c1844fd0b17e26fb4facb94f6140e98a6bbd49

View File

@ -0,0 +1 @@
c21b7cb3d2a3f34ea73b915cc15c67f203876ddf

View File

@ -1 +0,0 @@
20b559db91bda12f7b242c516915aad26e654baa

View File

@ -0,0 +1 @@
a6149ea94d695ebad4e5037f2926ca20c768777d

View File

@ -1 +0,0 @@
24e4eb6703be36c910bd0d7e3f060259602131b8

View File

@ -0,0 +1 @@
88de707d0913f9240114091a22bc178627792de3

View File

@ -1 +0,0 @@
1a9acefd0d7a9348f62fb0ea307853fe06cebc63

View File

@ -0,0 +1 @@
9812a19bdccd3646fde3db3ed53ce17c8ecd2c72

View File

@ -1 +0,0 @@
941fa34281837c5d2a62d67657618b4d6e92c6d7

View File

@ -0,0 +1 @@
9877d38f3f966352812888014b9dd0fcd861b418

View File

@ -1 +0,0 @@
eb78318f2a76b2013857ba72e0ddc42141bad36e

View File

@ -0,0 +1 @@
2ae87d38ad6b9f349de1a14c9fa2bc36d1e1126e

View File

@ -1 +0,0 @@
ce90ede863c08726d7ae70f9f15443f122674d89

View File

@ -0,0 +1 @@
cb167b153ee422e222b314fb1aacf07742079b18

View File

@ -1 +0,0 @@
e3b889834b8b43f3c5b718ee0b1b2fd198aa9467

View File

@ -0,0 +1 @@
5461afee0210ce1d2e9336e0a3f94ea7da64e491

View File

@ -1 +0,0 @@
f4c6c02a0834d582a918c895a715a74f40195297

View File

@ -0,0 +1 @@
28fd369ca80e1bee4a9830723348363850f25f91

View File

@ -1 +0,0 @@
7ed65e999af74d9356180c91176bcf0bcdf80b6a

View File

@ -0,0 +1 @@
7139424ecadad80df8127497f06d08d037c5e9cd

View File

@ -1 +0,0 @@
28a64cb272639b610064291e726f2a1792c224f2

View File

@ -0,0 +1 @@
82f9b91f2e288af0b9cee8ccc561655f9d07ed70

View File

@ -1 +0,0 @@
6af61d6e2d22be8cf0d7afb42ea61e73a59e6708

View File

@ -0,0 +1 @@
add9ee3e5c59c0544c3c88a4c92695a630a20693

View File

@ -1 +0,0 @@
7e7d3d4c5b7a3a4a065db5c7e4a22d75c11191ff

View File

@ -0,0 +1 @@
1b926af192edb666840bf23cfb2d8e72fc7373e7

View File

@ -1 +0,0 @@
b2993443ae730960c22a2c9050f58d943fb8797c

View File

@ -0,0 +1 @@
e1926831397ff98ac0c68b3632b5d3365ee5062b

View File

@ -23,7 +23,6 @@ import org.apache.logging.log4j.LogManager;
import org.apache.lucene.search.similarities.AfterEffect;
import org.apache.lucene.search.similarities.AfterEffectB;
import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BasicModel;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BasicModelIF;
@ -51,6 +50,7 @@ import org.apache.lucene.search.similarities.NormalizationH1;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarities.NormalizationH3;
import org.apache.lucene.search.similarities.NormalizationZ;
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
import org.elasticsearch.Version;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.settings.Settings;
@ -269,14 +269,14 @@ final class SimilarityProviders {
}
}
public static BM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
public static LegacyBM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) {
assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS);
float k1 = settings.getAsFloat("k1", 1.2f);
float b = settings.getAsFloat("b", 0.75f);
boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true);
BM25Similarity similarity = new BM25Similarity(k1, b);
LegacyBM25Similarity similarity = new LegacyBM25Similarity(k1, b);
similarity.setDiscountOverlaps(discountOverlaps);
return similarity;
}

View File

@ -25,12 +25,12 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.TriFunction;
@ -75,7 +75,7 @@ public final class SimilarityService extends AbstractIndexComponent {
}
});
defaults.put("BM25", version -> {
final BM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version);
final LegacyBM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version);
return () -> similarity;
});
defaults.put("boolean", version -> {

View File

@ -19,9 +19,9 @@
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
@ -51,7 +51,7 @@ public class LegacySimilarityTests extends ESSingleNodeTestCase {
assertWarnings("The [classic] similarity is now deprecated in favour of BM25, which is generally "
+ "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity "
+ "instead.");
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class));
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
assertThat(similarityService.getSimilarity("default"), equalTo(null));
}

View File

@ -21,9 +21,9 @@ package org.elasticsearch.index.similarity;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
import org.elasticsearch.Version;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
@ -40,7 +40,7 @@ public class SimilarityServiceTests extends ESTestCase {
Settings settings = Settings.builder().build();
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
assertThat(service.getDefaultSimilarity(), instanceOf(BM25Similarity.class));
assertThat(service.getDefaultSimilarity(), instanceOf(LegacyBM25Similarity.class));
}
// Tests #16594

View File

@ -20,7 +20,6 @@
package org.elasticsearch.index.similarity;
import org.apache.lucene.search.similarities.AfterEffectL;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.BasicModelG;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.DFISimilarity;
@ -32,6 +31,7 @@ import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
import org.apache.lucene.search.similarities.LambdaTTF;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.apache.lucene.search.similarity.LegacyBM25Similarity;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
@ -60,7 +60,7 @@ public class SimilarityTests extends ESSingleNodeTestCase {
public void testResolveDefaultSimilarities() {
SimilarityService similarityService = createIndex("foo").similarityService();
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(LegacyBM25Similarity.class));
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
assertThat(similarityService.getSimilarity("default"), equalTo(null));
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
@ -94,12 +94,13 @@ public class SimilarityTests extends ESSingleNodeTestCase {
.put("index.similarity.my_similarity.discount_overlaps", false)
.build();
MapperService mapperService = createIndex("foo", indexSettings, "type", mapping).mapperService();
assertThat(mapperService.fullName("field1").similarity().get(), instanceOf(BM25Similarity.class));
assertThat(mapperService.fullName("field1").similarity().get(), instanceOf(LegacyBM25Similarity.class));
BM25Similarity similarity = (BM25Similarity) mapperService.fullName("field1").similarity().get();
LegacyBM25Similarity similarity = (LegacyBM25Similarity) mapperService.fullName("field1").similarity().get();
assertThat(similarity.getK1(), equalTo(2.0f));
assertThat(similarity.getB(), equalTo(0.5f));
assertThat(similarity.getDiscountOverlaps(), equalTo(false));
// TODO: re-enable when we switch back to BM25Similarity
// assertThat(similarity.getDiscountOverlaps(), equalTo(false));
}
public void testResolveSimilaritiesFromMapping_boolean() throws IOException {

View File

@ -1 +0,0 @@
20b559db91bda12f7b242c516915aad26e654baa

View File

@ -0,0 +1 @@
a6149ea94d695ebad4e5037f2926ca20c768777d