Upgrade to lucene-7.0.0-snapshot-92b1783. (#25222)

This snapshot has faster range queries on range fields (LUCENE-7828), more
accurate norms (LUCENE-7730) and the ability to use fake term frequencies
(LUCENE-7854).
This commit is contained in:
Adrien Grand 2017-06-15 09:52:07 +02:00 committed by GitHub
parent 106e373412
commit 0c117145f6
61 changed files with 91 additions and 113 deletions

View File

@ -1,6 +1,6 @@
# When updating elasticsearch, please update 'rest' version in core/src/main/resources/org/elasticsearch/bootstrap/test-framework.policy
elasticsearch = 6.0.0-alpha3
lucene = 7.0.0-snapshot-a0aef2f
lucene = 7.0.0-snapshot-92b1783
# optional dependencies
spatial4j = 0.6

View File

@ -0,0 +1 @@
5bf8d8b7d885e25c343c187d1849580e21ef3fce

View File

@ -1 +0,0 @@
5e191674c50c9d99c9838da52cbf67c411998f4e

View File

@ -0,0 +1 @@
9696b87e27ea949fabc62606833ab63e6e5473b9

View File

@ -1 +0,0 @@
45bc34ab640d5d1a7491b523631b902f20db5384

View File

@ -0,0 +1 @@
d65b95dc24ce104e4d815b31f7159c5f6e97831d

View File

@ -1 +0,0 @@
b44d86e9077443c3ba4918a85603734461c6b448

View File

@ -0,0 +1 @@
3afa0db63adea8ee78b958cc85c5a6cb7750a5aa

View File

@ -1 +0,0 @@
409b616d40e2041a02890b2dc477ed845e3121e9

View File

@ -0,0 +1 @@
586db5fba5b84d4955e349c3ca77b7df67498a24

View File

@ -1 +0,0 @@
cfac105541315e2ca54955f681b410a7aa3bbb9d

View File

@ -0,0 +1 @@
8fc234d4474eaa400f5f964e18e9b179d87d86f0

View File

@ -1 +0,0 @@
993c1331130dd26c632b964fd8caac259bb9f3fc

View File

@ -0,0 +1 @@
3c70558114d053c025d04347b13bd10317c1db69

View File

@ -1 +0,0 @@
ec1460a28850410112a6349a7fff27df31242295

View File

@ -0,0 +1 @@
bf80c278e4c1c22b6e1382fc88ed016969596b61

View File

@ -1 +0,0 @@
57d342dbe68cf05361ccfda6bb76f2410cac900b

View File

@ -0,0 +1 @@
fb2313a800903b991d21704ebcdce5f07a602259

View File

@ -1 +0,0 @@
5ed10847b6a2353ac66decd5a2ee1a1d34353049

View File

@ -0,0 +1 @@
24d1843ffaf4fddbd41c636274a9a8288ccdf964

View File

@ -1 +0,0 @@
23ce6c2ea59287d8fe4fe31f466e9a58a1efe7b5

View File

@ -0,0 +1 @@
6413231d34b23fcbca9fd17ea6c980b594e420ff

View File

@ -1 +0,0 @@
78bda71c8e65428927136f81112a031aa9cd04d4

View File

@ -0,0 +1 @@
634187ab976bcde9905b4167ad273d3db6372a20

View File

@ -1 +0,0 @@
1e7ea95e6197176015b13551c7496be4867ede45

View File

@ -0,0 +1 @@
c65576991cd1d9a75e6ee4e4a81e3d20bd160239

View File

@ -1 +0,0 @@
5ae4ecd6c478456395ae9a3f954b8afc13629bb9

View File

@ -0,0 +1 @@
85c7a9adc02245b7a19e5cffed83cc20043cda83

View File

@ -1 +0,0 @@
d5d1a81fc290b9660a49557f848dc2a3c4f2048b

View File

@ -0,0 +1 @@
7ca7464c4b7900d7d514335d98c391851dcd84ce

View File

@ -1 +0,0 @@
d77cdd8f2782062a3b4c319c64f0fa4d804aafed

View File

@ -24,6 +24,7 @@ import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.uhighlight.Snippet;
import org.apache.lucene.search.uhighlight.BoundedBreakIteratorScanner;
import org.apache.lucene.search.uhighlight.CustomPassageFormatter;
import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
@ -96,9 +97,7 @@ public class UnifiedHighlighter implements Highlighter {
// breaks the text on, so we don't lose the distinction between the different values of a field and we
// get back a snippet per value
String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator breakIterator =
new org.apache.lucene.search.postingshighlight
.CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
highlighter =
new CustomUnifiedHighlighter(searcher, analyzer, mapperHighlighterEntry.passageFormatter,
field.fieldOptions().boundaryScannerLocale(), breakIterator, fieldValue,

View File

@ -31,7 +31,7 @@ grant codeBase "${codebase.securesm-1.1.jar}" {
//// Very special jar permissions:
//// These are dangerous permissions that we don't want to grant to everything.
grant codeBase "${codebase.lucene-core-7.0.0-snapshot-a0aef2f.jar}" {
grant codeBase "${codebase.lucene-core-7.0.0-snapshot-92b1783.jar}" {
// needed to allow MMapDirectory's "unmap hack" (die unmap hack, die)
// java 8 package
permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
@ -42,7 +42,7 @@ grant codeBase "${codebase.lucene-core-7.0.0-snapshot-a0aef2f.jar}" {
permission java.lang.RuntimePermission "accessDeclaredMembers";
};
grant codeBase "${codebase.lucene-misc-7.0.0-snapshot-a0aef2f.jar}" {
grant codeBase "${codebase.lucene-misc-7.0.0-snapshot-92b1783.jar}" {
// needed to allow shard shrinking to use hard-links if possible via lucenes HardlinkCopyDirectoryWrapper
permission java.nio.file.LinkPermission "hard";
};

View File

@ -33,7 +33,7 @@ grant codeBase "${codebase.securemock-1.2.jar}" {
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
};
grant codeBase "${codebase.lucene-test-framework-7.0.0-snapshot-a0aef2f.jar}" {
grant codeBase "${codebase.lucene-test-framework-7.0.0-snapshot-92b1783.jar}" {
// needed by RamUsageTester
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
// needed for testing hardlinks in StoreRecoveryTests since we install MockFS

View File

@ -26,7 +26,9 @@ import org.elasticsearch.test.ESIntegTestCase;
import org.hamcrest.core.IsNull;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
@ -280,14 +282,10 @@ public class AnalyzeActionIT extends ESIntegTestCase {
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubled"));
String[] expectedAttributesKey = {
"bytes",
"termFrequency",
"positionLength"};
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length));
Object extendedAttribute;
for (String key : expectedAttributesKey) {
extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key);
assertThat(extendedAttribute, notNullValue());
}
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().keySet(),
equalTo(new HashSet<>(Arrays.asList(expectedAttributesKey))));
}
public void testDetailAnalyzeWithMultiValues() throws Exception {

View File

@ -126,8 +126,10 @@ public class TransportTwoNodesSearchIT extends ESIntegTestCase {
// to produce the same 8-bit norm for all docs here, so that
// the tf is basically the entire score (assuming idf is fixed, which
// it should be if dfs is working correctly)
for (int i = 1024; i < 1124; i++) {
index(Integer.toString(i - 1024), "test", i);
// With the current way of encoding norms, every length between 1048 and 1176
// are encoded into the same byte
for (int i = 1048; i < 1148; i++) {
index(Integer.toString(i - 1048), "test", i);
}
refresh();

View File

@ -273,14 +273,14 @@ public class DecayFunctionScoreIT extends ESIntegTestCase {
.setId("1")
.setIndex("test")
.setSource(
jsonBuilder().startObject().field("test", "value").startObject("loc").field("lat", 11).field("lon", 21).endObject()
.endObject()));
jsonBuilder().startObject().field("test", "value value").startObject("loc").field("lat", 11).field("lon", 21)
.endObject().endObject()));
indexBuilders.add(client().prepareIndex()
.setType("type1")
.setId("2")
.setIndex("test")
.setSource(
jsonBuilder().startObject().field("test", "value value").startObject("loc").field("lat", 11).field("lon", 20)
jsonBuilder().startObject().field("test", "value").startObject("loc").field("lat", 11).field("lon", 20)
.endObject().endObject()));
indexRandom(true, false, indexBuilders); // force no dummy docs
@ -297,10 +297,19 @@ public class DecayFunctionScoreIT extends ESIntegTestCase {
SearchResponse sr = response.actionGet();
SearchHits sh = sr.getHits();
assertThat(sh.getTotalHits(), equalTo((long) (2)));
assertThat(sh.getAt(0).getId(), isOneOf("1"));
assertThat(sh.getAt(0).getId(), equalTo("1"));
assertThat(sh.getAt(1).getId(), equalTo("2"));
// Test Exp
response = client().search(
searchRequest().searchType(SearchType.QUERY_THEN_FETCH).source(
searchSource().query(termQuery("test", "value"))));
sr = response.actionGet();
sh = sr.getHits();
assertThat(sh.getTotalHits(), equalTo((long) (2)));
assertThat(sh.getAt(0).getId(), equalTo("1"));
assertThat(sh.getAt(1).getId(), equalTo("2"));
response = client().search(
searchRequest().searchType(SearchType.QUERY_THEN_FETCH).source(
searchSource().query(

View File

@ -294,8 +294,8 @@ public class QueryRescorerIT extends ESIntegTestCase {
assertThat(searchResponse.getHits().getHits().length, equalTo(4));
assertHitCount(searchResponse, 4);
assertThat(searchResponse.getHits().getMaxScore(), equalTo(searchResponse.getHits().getHits()[0].getScore()));
assertFirstHit(searchResponse, hasId("6"));
assertSecondHit(searchResponse, hasId("1"));
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("6"));
assertThirdHit(searchResponse, hasId("3"));
assertFourthHit(searchResponse, hasId("2"));
}
@ -392,29 +392,6 @@ public class QueryRescorerIT extends ESIntegTestCase {
}
}
private static void assertEquivalentOrSubstringMatch(String query, SearchResponse plain, SearchResponse rescored) {
assertNoFailures(plain);
assertNoFailures(rescored);
SearchHits leftHits = plain.getHits();
SearchHits rightHits = rescored.getHits();
assertThat(leftHits.getTotalHits(), equalTo(rightHits.getTotalHits()));
assertThat(leftHits.getHits().length, equalTo(rightHits.getHits().length));
SearchHit[] hits = leftHits.getHits();
SearchHit[] otherHits = rightHits.getHits();
if (!hits[0].getId().equals(otherHits[0].getId())) {
assertThat(((String) otherHits[0].getSourceAsMap().get("field1")).contains(query), equalTo(true));
} else {
Arrays.sort(hits, searchHitsComparator);
Arrays.sort(otherHits, searchHitsComparator);
for (int i = 0; i < hits.length; i++) {
if (hits[i].getScore() == hits[hits.length-1].getScore()) {
return; // we need to cut off here since this is the tail of the queue and we might not have fetched enough docs
}
assertThat(query, hits[i].getId(), equalTo(rightHits.getHits()[i].getId()));
}
}
}
// forces QUERY_THEN_FETCH because of https://github.com/elastic/elasticsearch/issues/4829
public void testEquivalence() throws Exception {
// no dummy docs since merges can change scores while we run queries.
@ -461,18 +438,6 @@ public class QueryRescorerIT extends ESIntegTestCase {
.actionGet();
// check equivalence
assertEquivalent(query, plain, rescored);
rescored = client()
.prepareSearch()
.setSearchType(SearchType.QUERY_THEN_FETCH)
.setPreference("test") // ensure we hit the same shards for tie-breaking
.setQuery(QueryBuilders.matchQuery("field1", query).operator(Operator.OR))
.setFrom(0)
.setSize(resultSize)
.setRescorer(queryRescorer(matchPhraseQuery("field1", intToEnglish).slop(0))
.setQueryWeight(1.0f).setRescoreQueryWeight(1.0f), 2 * rescoreWindow).execute().actionGet();
// check equivalence or if the first match differs we check if the phrase is a substring of the top doc
assertEquivalentOrSubstringMatch(intToEnglish, plain, rescored);
}
}

View File

@ -241,13 +241,13 @@ The output from the above is:
},
"hits": {
"total": 1,
"max_score": 0.2824934,
"max_score": 0.2876821,
"hits": [
{
"_index": "my_index",
"_type": "my_type",
"_id": "1",
"_score": 0.2824934,
"_score": 0.2876821,
"_source": {
"text": "The fooBarBaz method"
},

View File

@ -300,13 +300,13 @@ GET my_index/_search
},
"hits": {
"total": 1,
"max_score": 0.51623213,
"max_score": 0.5753642,
"hits": [
{
"_index": "my_index",
"_type": "doc",
"_id": "1",
"_score": 0.51623213,
"_score": 0.5753642,
"_source": {
"title": "Quick Foxes"
}

View File

@ -88,13 +88,13 @@ GET index/_search
},
"hits": {
"total": 2,
"max_score": 0.25811607,
"max_score": 0.2876821,
"hits": [
{
"_index": "index",
"_type": "type",
"_id": "2",
"_score": 0.25811607,
"_score": 0.2876821,
"_source": {
"body": "A pair of skis"
}
@ -103,7 +103,7 @@ GET index/_search
"_index": "index",
"_type": "type",
"_id": "1",
"_score": 0.25811607,
"_score": 0.2876821,
"_source": {
"body": "Ski resort"
}
@ -145,13 +145,13 @@ GET index/_search
},
"hits": {
"total": 1,
"max_score": 0.25811607,
"max_score": 0.2876821,
"hits": [
{
"_index": "index",
"_type": "type",
"_id": "1",
"_score": 0.25811607,
"_score": 0.2876821,
"_source": {
"body": "Ski resort"
}
@ -201,13 +201,13 @@ GET index/_search
},
"hits": {
"total": 1,
"max_score": 0.25811607,
"max_score": 0.2876821,
"hits": [
{
"_index": "index",
"_type": "type",
"_id": "1",
"_score": 0.25811607,
"_score": 0.2876821,
"_source": {
"body": "Ski resort"
}

View File

@ -90,13 +90,13 @@ The above request will yield the following response:
},
"hits": {
"total": 1,
"max_score": 0.5716521,
"max_score": 0.5753642,
"hits": [
{ <1>
"_index": "my-index",
"_type": "doc",
"_id": "1",
"_score": 0.5716521,
"_score": 0.5753642,
"_source": {
"query": {
"match": {
@ -291,13 +291,13 @@ This will yield the following response.
},
"hits": {
"total": 2,
"max_score": 0.5446649,
"max_score": 0.5753642,
"hits": [
{
"_index": "my-index",
"_type": "doc",
"_id": "4",
"_score": 0.5446649,
"_score": 0.5753642,
"_source": {
"query": {
"match": {
@ -315,7 +315,7 @@ This will yield the following response.
"_index": "my-index",
"_type": "doc",
"_id": "3",
"_score": 0.5446649,
"_score": 0.5753642,
"_source": {
"query": {
"match": {

View File

@ -35,11 +35,11 @@ This will yield the following result:
"_id": "0",
"matched": true,
"explanation": {
"value": 1.55077,
"value": 1.6943599,
"description": "weight(message:elasticsearch in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 1.55077,
"value": 1.6943599,
"description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:",
"details": [
{
@ -59,7 +59,7 @@ This will yield the following result:
]
},
{
"value": 1.1186441,
"value": 1.2222223,
"description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
"details": [
{
@ -83,7 +83,7 @@ This will yield the following result:
"details": []
},
{
"value": 4.0,
"value": 3.0,
"description": "fieldLength",
"details": []
}

View File

@ -457,13 +457,13 @@ Response:
...
"hits": {
"total": 1,
"max_score": 1.4818809,
"max_score": 1.601195,
"hits": [
{
"_index": "twitter",
"_type": "tweet",
"_id": "1",
"_score": 1.4818809,
"_score": 1.601195,
"_source": {
"user": "test",
"message": "some message with the number 1",
@ -513,13 +513,13 @@ Response:
...
"hits": {
"total": 1,
"max_score": 1.4818809,
"max_score": 1.601195,
"hits": [
{
"_index": "twitter",
"_type": "tweet",
"_id": "1",
"_score": 1.4818809,
"_score": 1.601195,
"_source": {
"user": "test",
"message": "some message with the number 1",

View File

@ -137,26 +137,26 @@ An example of a response snippet that could be generated from the above search r
...,
"hits": {
"total": 1,
"max_score": 0.9651416,
"max_score": 1.0444683,
"hits": [
{
"_index": "test",
"_type": "doc",
"_id": "1",
"_score": 0.9651416,
"_score": 1.0444683,
"_source": ...,
"inner_hits": {
"comments": { <1>
"hits": {
"total": 1,
"max_score": 0.9651416,
"max_score": 1.0444683,
"hits": [
{
"_nested": {
"field": "comments",
"offset": 1
},
"_score": 0.9651416,
"_score": 1.0444683,
"_source": {
"author": "nik9000",
"text": "words words words"
@ -263,26 +263,26 @@ Response not included in text but tested for completeness sake.
...,
"hits": {
"total": 1,
"max_score": 0.9651416,
"max_score": 1.0444683,
"hits": [
{
"_index": "test",
"_type": "doc",
"_id": "1",
"_score": 0.9651416,
"_score": 1.0444683,
"_source": ...,
"inner_hits": {
"comments": { <1>
"hits": {
"total": 1,
"max_score": 0.9651416,
"max_score": 1.0444683,
"hits": [
{
"_nested": {
"field": "comments",
"offset": 1
},
"_score": 0.9651416,
"_score": 1.0444683,
"fields": {
"comments.text": [
"words words words"

View File

@ -0,0 +1 @@
bcd4b2c3308a284f4d93400a47cb324a3c729aed

View File

@ -1 +0,0 @@
e7bfe234a793f8a1f0556def4e526d040ed636c8

View File

@ -0,0 +1 @@
4e74b475f888a6b488fa1f30362f2a537330d911

View File

@ -1 +0,0 @@
18e2a8a8096b13e191882aa77134e27c68e60372

View File

@ -0,0 +1 @@
d51c247bd2a0e053db07eaec25464eae2f7f4360

View File

@ -1 +0,0 @@
236924d9d6da7e4f36535e957e9a506b4e737302

View File

@ -0,0 +1 @@
b01fe0b5d64e2c6dbeba51bfcc38c20b86f7f71a

View File

@ -1 +0,0 @@
f8b0087d03c65253122cbc3b3419f346204e80fe

View File

@ -0,0 +1 @@
c38eb6f68ca095314568176f8d183b284f1fcc17

View File

@ -1 +0,0 @@
3e5102270f6c10a3b33e402ed5f8722ec2a1a338

View File

@ -0,0 +1 @@
e9f595188eb3d977e242ab02692c1845c69efdaf

View File

@ -1 +0,0 @@
6d9730ec654bdcf943a4018a5695e7954159ceda

View File

@ -0,0 +1 @@
dab621b2c6b28c322a90668c2d43d14a354997ae

View File

@ -1 +0,0 @@
26d01ae0d15243b30874b2cb609be5d041890459

View File

@ -262,6 +262,9 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("daterecognizer", Void.class)
// for token filters that generate bad offsets, which are now rejected since Lucene 7
.put("fixbrokenoffsets", Void.class)
// should we expose it, or maybe think about higher level integration of the
// fake term frequency feature (LUCENE-7854)
.put("delimitedtermfrequency", Void.class)
.immutableMap();

View File

@ -56,7 +56,8 @@ public class ESTestCaseTests extends ESTestCase {
});
fail("expected assertion error");
} catch (AssertionFailedError assertFailed) {
assertEquals("Unexpected exception type, expected IllegalArgumentException", assertFailed.getMessage());
assertEquals("Unexpected exception type, expected IllegalArgumentException but got java.lang.IllegalStateException: bad state",
assertFailed.getMessage());
assertNotNull(assertFailed.getCause());
assertEquals("bad state", assertFailed.getCause().getMessage());
}
@ -66,7 +67,8 @@ public class ESTestCaseTests extends ESTestCase {
fail("expected assertion error");
} catch (AssertionFailedError assertFailed) {
assertNull(assertFailed.getCause());
assertEquals("Expected exception IllegalArgumentException", assertFailed.getMessage());
assertEquals("Expected exception IllegalArgumentException but no exception was thrown",
assertFailed.getMessage());
}
}