From f8998fece5f492244d2feb1c7d64e93582f67d5c Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 4 Jan 2017 19:03:52 +0100 Subject: [PATCH] Upgrade to lucene-6.4.0-snapshot-084f7a0. (#22413) --- buildSrc/version.properties | 2 +- ...ers-common-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ers-common-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ard-codecs-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ard-codecs-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ucene-core-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ucene-core-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...e-grouping-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...e-grouping-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ighlighter-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ighlighter-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ucene-join-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ucene-join-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ene-memory-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ene-memory-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ucene-misc-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ucene-misc-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ne-queries-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ne-queries-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ueryparser-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ueryparser-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ne-sandbox-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ne-sandbox-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ne-spatial-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ne-spatial-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ial-extras-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ial-extras-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...-spatial3d-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...-spatial3d-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...ne-suggest-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...ne-suggest-6.4.0-snapshot-ec38570.jar.sha1 | 1 - .../GraphTokenStreamFiniteStrings.java | 291 --------- .../analysis/synonym/SynonymGraphFilter.java | 588 ------------------ .../org/apache/lucene/search/GraphQuery.java | 115 ---- .../index/analysis/AnalysisRegistry.java | 4 +- ...va => SynonymGraphTokenFilterFactory.java} | 4 +- .../index/mapper/NumberFieldMapper.java | 86 +-- .../index/search/MatchQuery.java | 146 ----- .../elasticsearch/bootstrap/security.policy | 4 +- .../bootstrap/test-framework.policy | 2 +- .../index/search/MatchQueryIT.java | 7 +- ...xpressions-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...xpressions-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...lyzers-icu-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...lyzers-icu-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...s-kuromoji-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...s-kuromoji-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...s-phonetic-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...s-phonetic-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...rs-smartcn-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...rs-smartcn-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...rs-stempel-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...rs-stempel-6.4.0-snapshot-ec38570.jar.sha1 | 1 - ...morfologik-6.4.0-snapshot-084f7a0.jar.sha1 | 1 + ...morfologik-6.4.0-snapshot-ec38570.jar.sha1 | 1 - .../AnalysisFactoryTestCase.java | 4 + 56 files changed, 44 insertions(+), 1253 deletions(-) create mode 100644 core/licenses/lucene-analyzers-common-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-analyzers-common-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-backward-codecs-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-backward-codecs-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-core-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-core-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-grouping-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-grouping-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-highlighter-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-highlighter-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-join-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-join-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-memory-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-memory-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-misc-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-misc-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-queries-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-queries-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-queryparser-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-queryparser-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-sandbox-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-sandbox-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-spatial-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-spatial-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-spatial-extras-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-spatial-extras-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-spatial3d-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-spatial3d-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 core/licenses/lucene-suggest-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 core/licenses/lucene-suggest-6.4.0-snapshot-ec38570.jar.sha1 delete mode 100644 core/src/main/java/org/apache/lucene/analysis/synonym/GraphTokenStreamFiniteStrings.java delete mode 100644 core/src/main/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java delete mode 100644 core/src/main/java/org/apache/lucene/search/GraphQuery.java rename core/src/main/java/org/elasticsearch/index/analysis/{SynonymGraphFilterFactory.java => SynonymGraphTokenFilterFactory.java} (88%) create mode 100644 modules/lang-expression/licenses/lucene-expressions-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 modules/lang-expression/licenses/lucene-expressions-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 plugins/analysis-icu/licenses/lucene-analyzers-icu-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 plugins/analysis-icu/licenses/lucene-analyzers-icu-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 plugins/analysis-stempel/licenses/lucene-analyzers-stempel-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 plugins/analysis-stempel/licenses/lucene-analyzers-stempel-6.4.0-snapshot-ec38570.jar.sha1 create mode 100644 plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-6.4.0-snapshot-084f7a0.jar.sha1 delete mode 100644 plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-6.4.0-snapshot-ec38570.jar.sha1 diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 44835f7227c..4fd4f26bd71 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -1,5 +1,5 @@ elasticsearch = 6.0.0-alpha1 -lucene = 6.4.0-snapshot-ec38570 +lucene = 6.4.0-snapshot-084f7a0 # optional dependencies spatial4j = 0.6 diff --git a/core/licenses/lucene-analyzers-common-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-analyzers-common-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..ffa2b42fb90 --- /dev/null +++ b/core/licenses/lucene-analyzers-common-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +ad1553dd2eed3a7cd5778bc7520821ac926b56df \ No newline at end of file diff --git a/core/licenses/lucene-analyzers-common-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-analyzers-common-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 5cab7b2fef1..00000000000 --- a/core/licenses/lucene-analyzers-common-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -770114e0188dd8b4f30e5878b4f6c8677cecf1be \ No newline at end of file diff --git a/core/licenses/lucene-backward-codecs-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-backward-codecs-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..58587dc58b8 --- /dev/null +++ b/core/licenses/lucene-backward-codecs-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +dde630b1d09ff928a1f358951747cfad5c46b334 \ No newline at end of file diff --git a/core/licenses/lucene-backward-codecs-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-backward-codecs-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 02677cb1ff8..00000000000 --- a/core/licenses/lucene-backward-codecs-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -f4eb0257e8419beaa9f84da6a51375fda4e491f2 \ No newline at end of file diff --git a/core/licenses/lucene-core-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-core-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..66a9a3208e6 --- /dev/null +++ b/core/licenses/lucene-core-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +1789bff323a0c013b126f4e51f1f269ebc631277 \ No newline at end of file diff --git a/core/licenses/lucene-core-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-core-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index ea81fbaeb56..00000000000 --- a/core/licenses/lucene-core-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -c80ad16cd36c41012abb8a8bb1c7328c6d680b4a \ No newline at end of file diff --git a/core/licenses/lucene-grouping-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-grouping-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..74441065e0d --- /dev/null +++ b/core/licenses/lucene-grouping-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +8cb17916d0e63705f1f715fe0d03ed32916a077a \ No newline at end of file diff --git a/core/licenses/lucene-grouping-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-grouping-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index d4442ded938..00000000000 --- a/core/licenses/lucene-grouping-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -070d4e370f4fe0b8a04b2bce5b4381201b0c783f \ No newline at end of file diff --git a/core/licenses/lucene-highlighter-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-highlighter-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..9aaa848b476 --- /dev/null +++ b/core/licenses/lucene-highlighter-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +79d6ba8fa629a52ad3eb829d085836f5fd2f7a87 \ No newline at end of file diff --git a/core/licenses/lucene-highlighter-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-highlighter-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index e6fc043a287..00000000000 --- a/core/licenses/lucene-highlighter-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -131d9a86f5943675493a85def0e692842f396458 \ No newline at end of file diff --git a/core/licenses/lucene-join-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-join-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..4ea4443a650 --- /dev/null +++ b/core/licenses/lucene-join-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +19794d8f15402c991d9533bfcd67e2e7a34677ef \ No newline at end of file diff --git a/core/licenses/lucene-join-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-join-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 6c90673f498..00000000000 --- a/core/licenses/lucene-join-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -385b2202036b50a764e4d2b032e21496b74a1c8e \ No newline at end of file diff --git a/core/licenses/lucene-memory-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-memory-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..8128c115c13 --- /dev/null +++ b/core/licenses/lucene-memory-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +33e42d3019e072752258bd778912c8d4365470a1 \ No newline at end of file diff --git a/core/licenses/lucene-memory-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-memory-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index bdb3a168612..00000000000 --- a/core/licenses/lucene-memory-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -e8742a44ef4849a17d5e59ef36e9a52a8f2370c2 \ No newline at end of file diff --git a/core/licenses/lucene-misc-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-misc-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..d55fa646119 --- /dev/null +++ b/core/licenses/lucene-misc-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +a1b3271b3800da349c8b98f7b1a25b2b6192252a \ No newline at end of file diff --git a/core/licenses/lucene-misc-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-misc-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index e29fc5f139c..00000000000 --- a/core/licenses/lucene-misc-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -7ce2e4948fb66393a34f4200a6131cfde43e47bd \ No newline at end of file diff --git a/core/licenses/lucene-queries-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-queries-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..99948c1260d --- /dev/null +++ b/core/licenses/lucene-queries-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +792716d805fcc5091931874c2f2f86f35da8b401 \ No newline at end of file diff --git a/core/licenses/lucene-queries-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-queries-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 4998ff5b2e4..00000000000 --- a/core/licenses/lucene-queries-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6c1c385a597ce797b0049d9b2281b09593e1488a \ No newline at end of file diff --git a/core/licenses/lucene-queryparser-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-queryparser-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..06cade53075 --- /dev/null +++ b/core/licenses/lucene-queryparser-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +c3f8bbc6ebe8d31da41fcdb1fa73f13d8170ee62 \ No newline at end of file diff --git a/core/licenses/lucene-queryparser-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-queryparser-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 9ba51f22f25..00000000000 --- a/core/licenses/lucene-queryparser-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -fafaa22906c067e6894f9f2b18ad03ded98e2f38 \ No newline at end of file diff --git a/core/licenses/lucene-sandbox-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-sandbox-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..33dc3fac466 --- /dev/null +++ b/core/licenses/lucene-sandbox-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +263901a19686c6cce7dd5c32a4934c42c62454dc \ No newline at end of file diff --git a/core/licenses/lucene-sandbox-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-sandbox-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index cce2045942b..00000000000 --- a/core/licenses/lucene-sandbox-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -19c64a84617f42bb4c11b1e266df4009cd37fdd0 \ No newline at end of file diff --git a/core/licenses/lucene-spatial-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-spatial-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..8bcd0086722 --- /dev/null +++ b/core/licenses/lucene-spatial-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +85426164fcc264a7e3bacc1a70602513540a261a \ No newline at end of file diff --git a/core/licenses/lucene-spatial-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-spatial-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 8169bea2fae..00000000000 --- a/core/licenses/lucene-spatial-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -bc8613fb61c0ae95dd3680b0f65e3380c3fd0d6c \ No newline at end of file diff --git a/core/licenses/lucene-spatial-extras-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-spatial-extras-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..d2041b9a4dd --- /dev/null +++ b/core/licenses/lucene-spatial-extras-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +332cbfaa6b1ee0bf4d820018872988e15cd413d2 \ No newline at end of file diff --git a/core/licenses/lucene-spatial-extras-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-spatial-extras-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 2614704c057..00000000000 --- a/core/licenses/lucene-spatial-extras-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0fa2c3e722294e863f3c70a15e97a18397391fb4 \ No newline at end of file diff --git a/core/licenses/lucene-spatial3d-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-spatial3d-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..b699c89a6d3 --- /dev/null +++ b/core/licenses/lucene-spatial3d-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +3fe3e902b971f4aa2b4a3a417ba5dcf83e968428 \ No newline at end of file diff --git a/core/licenses/lucene-spatial3d-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-spatial3d-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 9b1c45581a1..00000000000 --- a/core/licenses/lucene-spatial3d-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -db74c6313965ffdd10d9b19be2eed4ae2c76d2e3 \ No newline at end of file diff --git a/core/licenses/lucene-suggest-6.4.0-snapshot-084f7a0.jar.sha1 b/core/licenses/lucene-suggest-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..69bb10621f1 --- /dev/null +++ b/core/licenses/lucene-suggest-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +c4863fe45853163abfbe5c8b8bd7bdcf9a9c7b40 \ No newline at end of file diff --git a/core/licenses/lucene-suggest-6.4.0-snapshot-ec38570.jar.sha1 b/core/licenses/lucene-suggest-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 91841f474ef..00000000000 --- a/core/licenses/lucene-suggest-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b85ae1121b5fd56df985615a3cdd7b3879e9b92d \ No newline at end of file diff --git a/core/src/main/java/org/apache/lucene/analysis/synonym/GraphTokenStreamFiniteStrings.java b/core/src/main/java/org/apache/lucene/analysis/synonym/GraphTokenStreamFiniteStrings.java deleted file mode 100644 index 3d806588eca..00000000000 --- a/core/src/main/java/org/apache/lucene/analysis/synonym/GraphTokenStreamFiniteStrings.java +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.analysis.synonym; - -import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.automaton.Automaton; -import org.apache.lucene.util.automaton.FiniteStringsIterator; -import org.apache.lucene.util.automaton.Operations; -import org.apache.lucene.util.automaton.Transition; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Creates a list of {@link TokenStream} where each stream is the tokens that make up a finite string in graph token stream. To do this, - * the graph token stream is converted to an {@link Automaton} and from there we use a {@link FiniteStringsIterator} to collect the various - * token streams for each finite string. - */ -public class GraphTokenStreamFiniteStrings { - private final Automaton.Builder builder; - Automaton det; - private final Map termToID = new HashMap<>(); - private final Map idToTerm = new HashMap<>(); - private int anyTermID = -1; - - public GraphTokenStreamFiniteStrings() { - this.builder = new Automaton.Builder(); - } - - private static class BytesRefArrayTokenStream extends TokenStream { - private final BytesTermAttribute termAtt = addAttribute(BytesTermAttribute.class); - private final BytesRef[] terms; - private int offset; - - BytesRefArrayTokenStream(BytesRef[] terms) { - this.terms = terms; - offset = 0; - } - - @Override - public boolean incrementToken() throws IOException { - if (offset < terms.length) { - clearAttributes(); - termAtt.setBytesRef(terms[offset]); - offset = offset + 1; - return true; - } - - return false; - } - } - - /** - * Gets - */ - public List getTokenStreams(final TokenStream in) throws IOException { - // build automation - build(in); - - List tokenStreams = new ArrayList<>(); - final FiniteStringsIterator finiteStrings = new FiniteStringsIterator(det); - for (IntsRef string; (string = finiteStrings.next()) != null; ) { - final BytesRef[] tokens = new BytesRef[string.length]; - for (int idx = string.offset, len = string.offset + string.length; idx < len; idx++) { - tokens[idx - string.offset] = idToTerm.get(string.ints[idx]); - } - - tokenStreams.add(new BytesRefArrayTokenStream(tokens)); - } - - return tokenStreams; - } - - private void build(final TokenStream in) throws IOException { - if (det != null) { - throw new IllegalStateException("Automation already built"); - } - - final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class); - final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class); - final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class); - final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class); - - in.reset(); - - int pos = -1; - int lastPos = 0; - int maxOffset = 0; - int maxPos = -1; - int state = -1; - while (in.incrementToken()) { - int posInc = posIncAtt.getPositionIncrement(); - assert pos > -1 || posInc > 0; - - if (posInc > 1) { - throw new IllegalArgumentException("cannot handle holes; to accept any term, use '*' term"); - } - - if (posInc > 0) { - // New node: - pos += posInc; - } - - int endPos = pos + posLengthAtt.getPositionLength(); - while (state < endPos) { - state = createState(); - } - - BytesRef term = termBytesAtt.getBytesRef(); - //System.out.println(pos + "-" + endPos + ": " + term.utf8ToString() + ": posInc=" + posInc); - if (term.length == 1 && term.bytes[term.offset] == (byte) '*') { - addAnyTransition(pos, endPos); - } else { - addTransition(pos, endPos, term); - } - - maxOffset = Math.max(maxOffset, offsetAtt.endOffset()); - maxPos = Math.max(maxPos, endPos); - } - - in.end(); - - // TODO: look at endOffset? ts2a did... - - // TODO: this (setting "last" state as the only accept state) may be too simplistic? - setAccept(state, true); - finish(); - } - - /** - * Returns a new state; state 0 is always the initial state. - */ - private int createState() { - return builder.createState(); - } - - /** - * Marks the specified state as accept or not. - */ - private void setAccept(int state, boolean accept) { - builder.setAccept(state, accept); - } - - /** - * Adds a transition to the automaton. - */ - private void addTransition(int source, int dest, String term) { - addTransition(source, dest, new BytesRef(term)); - } - - /** - * Adds a transition to the automaton. - */ - private void addTransition(int source, int dest, BytesRef term) { - if (term == null) { - throw new NullPointerException("term should not be null"); - } - builder.addTransition(source, dest, getTermID(term)); - } - - /** - * Adds a transition matching any term. - */ - private void addAnyTransition(int source, int dest) { - builder.addTransition(source, dest, getTermID(null)); - } - - /** - * Call this once you are done adding states/transitions. - */ - private void finish() { - finish(DEFAULT_MAX_DETERMINIZED_STATES); - } - - /** - * Call this once you are done adding states/transitions. - * - * @param maxDeterminizedStates Maximum number of states created when determinizing the automaton. Higher numbers allow this operation - * to consume more memory but allow more complex automatons. - */ - private void finish(int maxDeterminizedStates) { - Automaton automaton = builder.finish(); - - // System.out.println("before det:\n" + automaton.toDot()); - - Transition t = new Transition(); - - // TODO: should we add "eps back to initial node" for all states, - // and det that? then we don't need to revisit initial node at - // every position? but automaton could blow up? And, this makes it - // harder to skip useless positions at search time? - - if (anyTermID != -1) { - - // Make sure there are no leading or trailing ANY: - int count = automaton.initTransition(0, t); - for (int i = 0; i < count; i++) { - automaton.getNextTransition(t); - if (anyTermID >= t.min && anyTermID <= t.max) { - throw new IllegalStateException("automaton cannot lead with an ANY transition"); - } - } - - int numStates = automaton.getNumStates(); - for (int i = 0; i < numStates; i++) { - count = automaton.initTransition(i, t); - for (int j = 0; j < count; j++) { - automaton.getNextTransition(t); - if (automaton.isAccept(t.dest) && anyTermID >= t.min && anyTermID <= t.max) { - throw new IllegalStateException("automaton cannot end with an ANY transition"); - } - } - } - - int termCount = termToID.size(); - - // We have to carefully translate these transitions so automaton - // realizes they also match all other terms: - Automaton newAutomaton = new Automaton(); - for (int i = 0; i < numStates; i++) { - newAutomaton.createState(); - newAutomaton.setAccept(i, automaton.isAccept(i)); - } - - for (int i = 0; i < numStates; i++) { - count = automaton.initTransition(i, t); - for (int j = 0; j < count; j++) { - automaton.getNextTransition(t); - int min, max; - if (t.min <= anyTermID && anyTermID <= t.max) { - // Match any term - min = 0; - max = termCount - 1; - } else { - min = t.min; - max = t.max; - } - newAutomaton.addTransition(t.source, t.dest, min, max); - } - } - newAutomaton.finishState(); - automaton = newAutomaton; - } - - det = Operations.removeDeadStates(Operations.determinize(automaton, maxDeterminizedStates)); - } - - private int getTermID(BytesRef term) { - Integer id = termToID.get(term); - if (id == null) { - id = termToID.size(); - if (term != null) { - term = BytesRef.deepCopyOf(term); - } - termToID.put(term, id); - idToTerm.put(id, term); - if (term == null) { - anyTermID = id; - } - } - - return id; - } -} diff --git a/core/src/main/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java b/core/src/main/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java deleted file mode 100644 index f2c27679ab6..00000000000 --- a/core/src/main/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java +++ /dev/null @@ -1,588 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.analysis.synonym; - -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CharsRefBuilder; -import org.apache.lucene.util.RollingBuffer; -import org.apache.lucene.util.fst.FST; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; - -// TODO: maybe we should resolve token -> wordID then run -// FST on wordIDs, for better perf? - -// TODO: a more efficient approach would be Aho/Corasick's -// algorithm -// http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm -// It improves over the current approach here -// because it does not fully re-start matching at every -// token. For example if one pattern is "a b c x" -// and another is "b c d" and the input is "a b c d", on -// trying to parse "a b c x" but failing when you got to x, -// rather than starting over again your really should -// immediately recognize that "b c d" matches at the next -// input. I suspect this won't matter that much in -// practice, but it's possible on some set of synonyms it -// will. We'd have to modify Aho/Corasick to enforce our -// conflict resolving (eg greedy matching) because that algo -// finds all matches. This really amounts to adding a .* -// closure to the FST and then determinizing it. -// -// Another possible solution is described at http://www.cis.uni-muenchen.de/people/Schulz/Pub/dictle5.ps - -/** - * Applies single- or multi-token synonyms from a {@link SynonymMap} - * to an incoming {@link TokenStream}, producing a fully correct graph - * output. This is a replacement for {@link SynonymFilter}, which produces - * incorrect graphs for multi-token synonyms. - * - * NOTE: this cannot consume an incoming graph; results will - * be undefined. - */ -public final class SynonymGraphFilter extends TokenFilter { - - public static final String TYPE_SYNONYM = "SYNONYM"; - public static final int GRAPH_FLAG = 8; - - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class); - private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); - - private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); - private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - - private final SynonymMap synonyms; - private final boolean ignoreCase; - - private final FST fst; - - private final FST.BytesReader fstReader; - private final FST.Arc scratchArc; - private final ByteArrayDataInput bytesReader = new ByteArrayDataInput(); - private final BytesRef scratchBytes = new BytesRef(); - private final CharsRefBuilder scratchChars = new CharsRefBuilder(); - private final LinkedList outputBuffer = new LinkedList<>(); - - private int nextNodeOut; - private int lastNodeOut; - private int maxLookaheadUsed; - - // For testing: - private int captureCount; - - private boolean liveToken; - - // Start/end offset of the current match: - private int matchStartOffset; - private int matchEndOffset; - - // True once the input TokenStream is exhausted: - private boolean finished; - - private int lookaheadNextRead; - private int lookaheadNextWrite; - - private RollingBuffer lookahead = new RollingBuffer() { - @Override - protected BufferedInputToken newInstance() { - return new BufferedInputToken(); - } - }; - - static class BufferedInputToken implements RollingBuffer.Resettable { - final CharsRefBuilder term = new CharsRefBuilder(); - AttributeSource.State state; - int startOffset = -1; - int endOffset = -1; - - @Override - public void reset() { - state = null; - term.clear(); - - // Intentionally invalid to ferret out bugs: - startOffset = -1; - endOffset = -1; - } - } - - static class BufferedOutputToken { - final String term; - - // Non-null if this was an incoming token: - final State state; - - final int startNode; - final int endNode; - - public BufferedOutputToken(State state, String term, int startNode, int endNode) { - this.state = state; - this.term = term; - this.startNode = startNode; - this.endNode = endNode; - } - } - - public SynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) { - super(input); - this.synonyms = synonyms; - this.fst = synonyms.fst; - if (fst == null) { - throw new IllegalArgumentException("fst must be non-null"); - } - this.fstReader = fst.getBytesReader(); - scratchArc = new FST.Arc<>(); - this.ignoreCase = ignoreCase; - } - - @Override - public boolean incrementToken() throws IOException { - //System.out.println("\nS: incrToken lastNodeOut=" + lastNodeOut + " nextNodeOut=" + nextNodeOut); - - assert lastNodeOut <= nextNodeOut; - - if (outputBuffer.isEmpty() == false) { - // We still have pending outputs from a prior synonym match: - releaseBufferedToken(); - //System.out.println(" syn: ret buffered=" + this); - assert liveToken == false; - return true; - } - - // Try to parse a new synonym match at the current token: - - if (parse()) { - // A new match was found: - releaseBufferedToken(); - //System.out.println(" syn: after parse, ret buffered=" + this); - assert liveToken == false; - return true; - } - - if (lookaheadNextRead == lookaheadNextWrite) { - - // Fast path: parse pulled one token, but it didn't match - // the start for any synonym, so we now return it "live" w/o having - // cloned all of its atts: - if (finished) { - //System.out.println(" syn: ret END"); - return false; - } - - assert liveToken; - liveToken = false; - - // NOTE: no need to change posInc since it's relative, i.e. whatever - // node our output is upto will just increase by the incoming posInc. - // We also don't need to change posLen, but only because we cannot - // consume a graph, so the incoming token can never span a future - // synonym match. - - } else { - // We still have buffered lookahead tokens from a previous - // parse attempt that required lookahead; just replay them now: - //System.out.println(" restore buffer"); - assert lookaheadNextRead < lookaheadNextWrite : "read=" + lookaheadNextRead + " write=" + lookaheadNextWrite; - BufferedInputToken token = lookahead.get(lookaheadNextRead); - lookaheadNextRead++; - - restoreState(token.state); - - lookahead.freeBefore(lookaheadNextRead); - - //System.out.println(" after restore offset=" + offsetAtt.startOffset() + "-" + offsetAtt.endOffset()); - assert liveToken == false; - } - - lastNodeOut += posIncrAtt.getPositionIncrement(); - nextNodeOut = lastNodeOut + posLenAtt.getPositionLength(); - - //System.out.println(" syn: ret lookahead=" + this); - - return true; - } - - private void releaseBufferedToken() throws IOException { - //System.out.println(" releaseBufferedToken"); - - BufferedOutputToken token = outputBuffer.pollFirst(); - - if (token.state != null) { - // This is an original input token (keepOrig=true case): - //System.out.println(" hasState"); - restoreState(token.state); - //System.out.println(" startOffset=" + offsetAtt.startOffset() + " endOffset=" + offsetAtt.endOffset()); - } else { - clearAttributes(); - //System.out.println(" no state"); - termAtt.append(token.term); - - // We better have a match already: - assert matchStartOffset != -1; - - offsetAtt.setOffset(matchStartOffset, matchEndOffset); - //System.out.println(" startOffset=" + matchStartOffset + " endOffset=" + matchEndOffset); - typeAtt.setType(TYPE_SYNONYM); - } - - //System.out.println(" lastNodeOut=" + lastNodeOut); - //System.out.println(" term=" + termAtt); - - posIncrAtt.setPositionIncrement(token.startNode - lastNodeOut); - lastNodeOut = token.startNode; - posLenAtt.setPositionLength(token.endNode - token.startNode); - flagsAtt.setFlags(flagsAtt.getFlags() | GRAPH_FLAG); // set the graph flag - } - - /** - * Scans the next input token(s) to see if a synonym matches. Returns true - * if a match was found. - */ - private boolean parse() throws IOException { - // System.out.println(Thread.currentThread().getName() + ": S: parse: " + System.identityHashCode(this)); - - // Holds the longest match we've seen so far: - BytesRef matchOutput = null; - int matchInputLength = 0; - - BytesRef pendingOutput = fst.outputs.getNoOutput(); - fst.getFirstArc(scratchArc); - - assert scratchArc.output == fst.outputs.getNoOutput(); - - // How many tokens in the current match - int matchLength = 0; - boolean doFinalCapture = false; - - int lookaheadUpto = lookaheadNextRead; - matchStartOffset = -1; - - byToken: - while (true) { - //System.out.println(" cycle lookaheadUpto=" + lookaheadUpto + " maxPos=" + lookahead.getMaxPos()); - - // Pull next token's chars: - final char[] buffer; - final int bufferLen; - final int inputEndOffset; - - if (lookaheadUpto <= lookahead.getMaxPos()) { - // Still in our lookahead buffer - BufferedInputToken token = lookahead.get(lookaheadUpto); - lookaheadUpto++; - buffer = token.term.chars(); - bufferLen = token.term.length(); - inputEndOffset = token.endOffset; - //System.out.println(" use buffer now max=" + lookahead.getMaxPos()); - if (matchStartOffset == -1) { - matchStartOffset = token.startOffset; - } - } else { - - // We used up our lookahead buffer of input tokens - // -- pull next real input token: - - assert finished || liveToken == false; - - if (finished) { - //System.out.println(" break: finished"); - break; - } else if (input.incrementToken()) { - //System.out.println(" input.incrToken"); - liveToken = true; - buffer = termAtt.buffer(); - bufferLen = termAtt.length(); - if (matchStartOffset == -1) { - matchStartOffset = offsetAtt.startOffset(); - } - inputEndOffset = offsetAtt.endOffset(); - - lookaheadUpto++; - } else { - // No more input tokens - finished = true; - //System.out.println(" break: now set finished"); - break; - } - } - - matchLength++; - //System.out.println(" cycle term=" + new String(buffer, 0, bufferLen)); - - // Run each char in this token through the FST: - int bufUpto = 0; - while (bufUpto < bufferLen) { - final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen); - if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == - null) { - break byToken; - } - - // Accum the output - pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); - bufUpto += Character.charCount(codePoint); - } - - assert bufUpto == bufferLen; - - // OK, entire token matched; now see if this is a final - // state in the FST (a match): - if (scratchArc.isFinal()) { - matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); - matchInputLength = matchLength; - matchEndOffset = inputEndOffset; - //System.out.println(" ** match"); - } - - // See if the FST can continue matching (ie, needs to - // see the next input token): - if (fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) == null) { - // No further rules can match here; we're done - // searching for matching rules starting at the - // current input position. - break; - } else { - // More matching is possible -- accum the output (if - // any) of the WORD_SEP arc: - pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); - doFinalCapture = true; - if (liveToken) { - capture(); - } - } - } - - if (doFinalCapture && liveToken && finished == false) { - // Must capture the final token if we captured any prior tokens: - capture(); - } - - if (matchOutput != null) { - - if (liveToken) { - // Single input token synonym; we must buffer it now: - capture(); - } - - // There is a match! - bufferOutputTokens(matchOutput, matchInputLength); - lookaheadNextRead += matchInputLength; - //System.out.println(" precmatch; set lookaheadNextRead=" + lookaheadNextRead + " now max=" + lookahead.getMaxPos()); - lookahead.freeBefore(lookaheadNextRead); - //System.out.println(" match; set lookaheadNextRead=" + lookaheadNextRead + " now max=" + lookahead.getMaxPos()); - return true; - } else { - //System.out.println(" no match; lookaheadNextRead=" + lookaheadNextRead); - return false; - } - - //System.out.println(" parse done inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead + " nextWrite=" + nextWrite); - } - - /** - * Expands the output graph into the necessary tokens, adding - * synonyms as side paths parallel to the input tokens, and - * buffers them in the output token buffer. - */ - private void bufferOutputTokens(BytesRef bytes, int matchInputLength) { - bytesReader.reset(bytes.bytes, bytes.offset, bytes.length); - - final int code = bytesReader.readVInt(); - final boolean keepOrig = (code & 0x1) == 0; - //System.out.println(" buffer: keepOrig=" + keepOrig + " matchInputLength=" + matchInputLength); - - // How many nodes along all paths; we need this to assign the - // node ID for the final end node where all paths merge back: - int totalPathNodes; - if (keepOrig) { - assert matchInputLength > 0; - totalPathNodes = matchInputLength - 1; - } else { - totalPathNodes = 0; - } - - // How many synonyms we will insert over this match: - final int count = code >>> 1; - - // TODO: we could encode this instead into the FST: - - // 1st pass: count how many new nodes we need - List> paths = new ArrayList<>(); - for (int outputIDX = 0; outputIDX < count; outputIDX++) { - int wordID = bytesReader.readVInt(); - synonyms.words.get(wordID, scratchBytes); - scratchChars.copyUTF8Bytes(scratchBytes); - int lastStart = 0; - - List path = new ArrayList<>(); - paths.add(path); - int chEnd = scratchChars.length(); - for (int chUpto = 0; chUpto <= chEnd; chUpto++) { - if (chUpto == chEnd || scratchChars.charAt(chUpto) == SynonymMap.WORD_SEPARATOR) { - path.add(new String(scratchChars.chars(), lastStart, chUpto - lastStart)); - lastStart = 1 + chUpto; - } - } - - assert path.size() > 0; - totalPathNodes += path.size() - 1; - } - //System.out.println(" totalPathNodes=" + totalPathNodes); - - // 2nd pass: buffer tokens for the graph fragment - - // NOTE: totalPathNodes will be 0 in the case where the matched - // input is a single token and all outputs are also a single token - - // We "spawn" a side-path for each of the outputs for this matched - // synonym, all ending back at this end node: - - int startNode = nextNodeOut; - - int endNode = startNode + totalPathNodes + 1; - //System.out.println(" " + paths.size() + " new side-paths"); - - // First, fanout all tokens departing start node for these new side paths: - int newNodeCount = 0; - for (List path : paths) { - int pathEndNode; - //System.out.println(" path size=" + path.size()); - if (path.size() == 1) { - // Single token output, so there are no intermediate nodes: - pathEndNode = endNode; - } else { - pathEndNode = nextNodeOut + newNodeCount + 1; - newNodeCount += path.size() - 1; - } - outputBuffer.add(new BufferedOutputToken(null, path.get(0), startNode, pathEndNode)); - } - - // We must do the original tokens last, else the offsets "go backwards": - if (keepOrig) { - BufferedInputToken token = lookahead.get(lookaheadNextRead); - int inputEndNode; - if (matchInputLength == 1) { - // Single token matched input, so there are no intermediate nodes: - inputEndNode = endNode; - } else { - inputEndNode = nextNodeOut + newNodeCount + 1; - } - - //System.out.println(" keepOrig first token: " + token.term); - - outputBuffer.add(new BufferedOutputToken(token.state, token.term.toString(), startNode, inputEndNode)); - } - - nextNodeOut = endNode; - - // Do full side-path for each syn output: - for (int pathID = 0; pathID < paths.size(); pathID++) { - List path = paths.get(pathID); - if (path.size() > 1) { - int lastNode = outputBuffer.get(pathID).endNode; - for (int i = 1; i < path.size() - 1; i++) { - outputBuffer.add(new BufferedOutputToken(null, path.get(i), lastNode, lastNode + 1)); - lastNode++; - } - outputBuffer.add(new BufferedOutputToken(null, path.get(path.size() - 1), lastNode, endNode)); - } - } - - if (keepOrig && matchInputLength > 1) { - // Do full "side path" with the original tokens: - int lastNode = outputBuffer.get(paths.size()).endNode; - for (int i = 1; i < matchInputLength - 1; i++) { - BufferedInputToken token = lookahead.get(lookaheadNextRead + i); - outputBuffer.add(new BufferedOutputToken(token.state, token.term.toString(), lastNode, lastNode + 1)); - lastNode++; - } - BufferedInputToken token = lookahead.get(lookaheadNextRead + matchInputLength - 1); - outputBuffer.add(new BufferedOutputToken(token.state, token.term.toString(), lastNode, endNode)); - } - - /* - System.out.println(" after buffer: " + outputBuffer.size() + " tokens:"); - for(BufferedOutputToken token : outputBuffer) { - System.out.println(" tok: " + token.term + " startNode=" + token.startNode + " endNode=" + token.endNode); - } - */ - } - - /** - * Buffers the current input token into lookahead buffer. - */ - private void capture() { - assert liveToken; - liveToken = false; - BufferedInputToken token = lookahead.get(lookaheadNextWrite); - lookaheadNextWrite++; - - token.state = captureState(); - token.startOffset = offsetAtt.startOffset(); - token.endOffset = offsetAtt.endOffset(); - assert token.term.length() == 0; - token.term.append(termAtt); - - captureCount++; - maxLookaheadUsed = Math.max(maxLookaheadUsed, lookahead.getBufferSize()); - //System.out.println(" maxLookaheadUsed=" + maxLookaheadUsed); - } - - @Override - public void reset() throws IOException { - super.reset(); - lookahead.reset(); - lookaheadNextWrite = 0; - lookaheadNextRead = 0; - captureCount = 0; - lastNodeOut = -1; - nextNodeOut = 0; - matchStartOffset = -1; - matchEndOffset = -1; - finished = false; - liveToken = false; - outputBuffer.clear(); - maxLookaheadUsed = 0; - //System.out.println("S: reset"); - } - - // for testing - int getCaptureCount() { - return captureCount; - } - - // for testing - int getMaxLookaheadUsed() { - return maxLookaheadUsed; - } -} diff --git a/core/src/main/java/org/apache/lucene/search/GraphQuery.java b/core/src/main/java/org/apache/lucene/search/GraphQuery.java deleted file mode 100644 index cad316d701c..00000000000 --- a/core/src/main/java/org/apache/lucene/search/GraphQuery.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.lucene.search; - -import org.apache.lucene.index.IndexReader; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Objects; - -/** - * A query that wraps multiple sub-queries generated from a graph token stream. - */ -public final class GraphQuery extends Query { - private final Query[] queries; - private final boolean hasBoolean; - - /** - * Constructor sets the queries and checks if any of them are - * a boolean query. - * - * @param queries the non-null array of queries - */ - public GraphQuery(Query... queries) { - this.queries = Objects.requireNonNull(queries).clone(); - for (Query query : queries) { - if (query instanceof BooleanQuery) { - hasBoolean = true; - return; - } - } - hasBoolean = false; - } - - /** - * Gets the queries - * - * @return unmodifiable list of Query - */ - public List getQueries() { - return Collections.unmodifiableList(Arrays.asList(queries)); - } - - /** - * If there is at least one boolean query or not. - * - * @return true if there is a boolean, false if not - */ - public boolean hasBoolean() { - return hasBoolean; - } - - /** - * Rewrites to a single query or a boolean query where each query is a SHOULD clause. - */ - @Override - public Query rewrite(IndexReader reader) throws IOException { - if (queries.length == 0) { - return new BooleanQuery.Builder().build(); - } - - if (queries.length == 1) { - return queries[0]; - } - - BooleanQuery.Builder q = new BooleanQuery.Builder(); - q.setDisableCoord(true); - for (Query clause : queries) { - q.add(clause, BooleanClause.Occur.SHOULD); - } - - return q.build(); - } - - @Override - public String toString(String field) { - StringBuilder builder = new StringBuilder("Graph("); - for (int i = 0; i < queries.length; i++) { - if (i != 0) { - builder.append(", "); - } - builder.append(Objects.toString(queries[i])); - } - builder.append(")"); - return builder.toString(); - } - - @Override - public boolean equals(Object other) { - return sameClassAs(other) && - Arrays.equals(queries, ((GraphQuery) other).queries); - } - - @Override - public int hashCode() { - return 31 * classHash() + Arrays.hashCode(queries); - } -} diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index 25ef5d1885f..f2f13479c9d 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -167,7 +167,7 @@ public final class AnalysisRegistry implements Closeable { * hide internal data-structures as much as possible. */ tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings))); - tokenFilters.put("synonym_graph", requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphFilterFactory(is, env, this, name, settings))); + tokenFilters.put("synonym_graph", requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings))); return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories); } @@ -231,7 +231,7 @@ public final class AnalysisRegistry implements Closeable { if ("synonym".equals(typeName)) { return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)); } else if ("synonym_graph".equals(typeName)) { - return requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphFilterFactory(is, env, this, name, settings)); + return requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)); } else { return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName); } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SynonymGraphFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java similarity index 88% rename from core/src/main/java/org/elasticsearch/index/analysis/SynonymGraphFilterFactory.java rename to core/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java index da9b11b9785..cfb37f0b075 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/SynonymGraphFilterFactory.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java @@ -27,8 +27,8 @@ import org.elasticsearch.index.IndexSettings; import java.io.IOException; -public class SynonymGraphFilterFactory extends SynonymTokenFilterFactory { - public SynonymGraphFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry, +public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory { + public SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry, String name, Settings settings) throws IOException { super(indexSettings, env, analysisRegistry, name, settings); } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index ac96fe8199e..c922cd8b54c 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -188,30 +188,6 @@ public class NumberFieldMapper extends FieldMapper { return HalfFloatPoint.newSetQuery(field, v); } - private float nextDown(float f) { - // HalfFloatPoint.nextDown considers that -0 is the same as +0 - // while point ranges are consistent with Float.compare, so - // they consider that -0 < +0, so we explicitly make sure - // that nextDown(+0) returns -0 - if (Float.floatToIntBits(f) == Float.floatToIntBits(0f)) { - return -0f; - } else { - return HalfFloatPoint.nextDown(f); - } - } - - private float nextUp(float f) { - // HalfFloatPoint.nextUp considers that -0 is the same as +0 - // while point ranges are consistent with Float.compare, so - // they consider that -0 < +0, so we explicitly make sure - // that nextUp(-0) returns +0 - if (Float.floatToIntBits(f) == Float.floatToIntBits(-0f)) { - return +0f; - } else { - return HalfFloatPoint.nextUp(f); - } - } - @Override Query rangeQuery(String field, Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper) { @@ -220,16 +196,16 @@ public class NumberFieldMapper extends FieldMapper { if (lowerTerm != null) { l = parse(lowerTerm, false); if (includeLower) { - l = nextDown(l); + l = HalfFloatPoint.nextDown(l); } l = HalfFloatPoint.nextUp(l); } if (upperTerm != null) { u = parse(upperTerm, false); if (includeUpper) { - u = nextUp(u); + u = HalfFloatPoint.nextUp(u); } - u = nextDown(u); + u = HalfFloatPoint.nextDown(u); } return HalfFloatPoint.newRangeQuery(field, l, u); } @@ -302,30 +278,6 @@ public class NumberFieldMapper extends FieldMapper { return FloatPoint.newSetQuery(field, v); } - private float nextDown(float f) { - // Math.nextDown considers that -0 is the same as +0 - // while point ranges are consistent with Float.compare, so - // they consider that -0 < +0, so we explicitly make sure - // that nextDown(+0) returns -0 - if (Float.floatToIntBits(f) == Float.floatToIntBits(0f)) { - return -0f; - } else { - return Math.nextDown(f); - } - } - - private float nextUp(float f) { - // Math.nextUp considers that -0 is the same as +0 - // while point ranges are consistent with Float.compare, so - // they consider that -0 < +0, so we explicitly make sure - // that nextUp(-0) returns +0 - if (Float.floatToIntBits(f) == Float.floatToIntBits(-0f)) { - return +0f; - } else { - return Math.nextUp(f); - } - } - @Override Query rangeQuery(String field, Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper) { @@ -334,13 +286,13 @@ public class NumberFieldMapper extends FieldMapper { if (lowerTerm != null) { l = parse(lowerTerm, false); if (includeLower == false) { - l = nextUp(l); + l = FloatPoint.nextUp(l); } } if (upperTerm != null) { u = parse(upperTerm, false); if (includeUpper == false) { - u = nextDown(u); + u = FloatPoint.nextDown(u); } } return FloatPoint.newRangeQuery(field, l, u); @@ -414,30 +366,6 @@ public class NumberFieldMapper extends FieldMapper { return DoublePoint.newSetQuery(field, v); } - private double nextDown(double d) { - // Math.nextDown considers that -0 is the same as +0 - // while point ranges are consistent with Double.compare, so - // they consider that -0 < +0, so we explicitly make sure - // that nextDown(+0) returns -0 - if (Double.doubleToLongBits(d) == Double.doubleToLongBits(0d)) { - return -0d; - } else { - return Math.nextDown(d); - } - } - - private double nextUp(double d) { - // Math.nextUp considers that -0 is the same as +0 - // while point ranges are consistent with Double.compare, so - // they consider that -0 < +0, so we explicitly make sure - // that nextUp(-0) returns +0 - if (Double.doubleToLongBits(d) == Double.doubleToLongBits(-0d)) { - return +0d; - } else { - return Math.nextUp(d); - } - } - @Override Query rangeQuery(String field, Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper) { @@ -446,13 +374,13 @@ public class NumberFieldMapper extends FieldMapper { if (lowerTerm != null) { l = parse(lowerTerm, false); if (includeLower == false) { - l = nextUp(l); + l = DoublePoint.nextUp(l); } } if (upperTerm != null) { u = parse(upperTerm, false); if (includeUpper == false) { - u = nextDown(u); + u = DoublePoint.nextDown(u); } } return DoublePoint.newRangeQuery(field, l, u); diff --git a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java index c9664265d3a..1fbeb81febc 100644 --- a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -19,16 +19,7 @@ package org.elasticsearch.index.search; -import static org.apache.lucene.analysis.synonym.SynonymGraphFilter.GRAPH_FLAG; - import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.CachingTokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.synonym.GraphTokenStreamFiniteStrings; -import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.search.BooleanClause; @@ -36,7 +27,6 @@ import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.GraphQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; @@ -58,8 +48,6 @@ import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; public class MatchQuery { @@ -316,116 +304,6 @@ public class MatchQuery { this.mapper = mapper; } - /** - * Creates a query from the analysis chain. Overrides original so all it does is create the token stream and pass that into the - * new {@link #createFieldQuery(TokenStream, Occur, String, boolean, int)} method which has all the original query generation logic. - * - * @param analyzer analyzer used for this query - * @param operator default boolean operator used for this query - * @param field field to create queries against - * @param queryText text to be passed to the analysis chain - * @param quoted true if phrases should be generated when terms occur at more than one position - * @param phraseSlop slop factor for phrase/multiphrase queries - */ - @Override - protected final Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, - boolean quoted, int phraseSlop) { - assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; - - // Use the analyzer to get all the tokens, and then build an appropriate - // query based on the analysis chain. - try (TokenStream source = analyzer.tokenStream(field, queryText)) { - return createFieldQuery(source, operator, field, quoted, phraseSlop); - } catch (IOException e) { - throw new RuntimeException("Error analyzing query text", e); - } - } - - /** - * Creates a query from a token stream. Same logic as {@link #createFieldQuery(Analyzer, Occur, String, String, boolean, int)} - * with additional graph token stream detection. - * - * @param source the token stream to create the query from - * @param operator default boolean operator used for this query - * @param field field to create queries against - * @param quoted true if phrases should be generated when terms occur at more than one position - * @param phraseSlop slop factor for phrase/multiphrase queries - */ - protected final Query createFieldQuery(TokenStream source, BooleanClause.Occur operator, String field, boolean quoted, - int phraseSlop) { - assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; - - // Build an appropriate query based on the analysis chain. - try (CachingTokenFilter stream = new CachingTokenFilter(source)) { - - TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); - PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class); - FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class); - - if (termAtt == null) { - return null; - } - - // phase 1: read through the stream and assess the situation: - // counting the number of tokens/positions and marking if we have any synonyms. - - int numTokens = 0; - int positionCount = 0; - boolean hasSynonyms = false; - boolean isGraph = false; - - stream.reset(); - while (stream.incrementToken()) { - numTokens++; - int positionIncrement = posIncAtt.getPositionIncrement(); - if (positionIncrement != 0) { - positionCount += positionIncrement; - } else { - hasSynonyms = true; - } - - int positionLength = posLenAtt.getPositionLength(); - if (!isGraph && positionLength > 1 && ((flagsAtt.getFlags() & GRAPH_FLAG) == GRAPH_FLAG)) { - isGraph = true; - } - } - - // phase 2: based on token count, presence of synonyms, and options - // formulate a single term, boolean, or phrase. - - if (numTokens == 0) { - return null; - } else if (numTokens == 1) { - // single term - return analyzeTerm(field, stream); - } else if (isGraph) { - // graph - return analyzeGraph(stream, operator, field, quoted, phraseSlop); - } else if (quoted && positionCount > 1) { - // phrase - if (hasSynonyms) { - // complex phrase with synonyms - return analyzeMultiPhrase(field, stream, phraseSlop); - } else { - // simple phrase - return analyzePhrase(field, stream, phraseSlop); - } - } else { - // boolean - if (positionCount == 1) { - // only one position, with synonyms - return analyzeBoolean(field, stream); - } else { - // complex case: multiple positions - return analyzeMultiBoolean(field, stream, operator); - } - } - } catch (IOException e) { - throw new RuntimeException("Error analyzing query text", e); - } - } - @Override protected Query newTermQuery(Term term) { return blendTermQuery(term, mapper); @@ -492,30 +370,6 @@ public class MatchQuery { return booleanQuery; } - - /** - * Creates a query from a graph token stream by extracting all the finite strings from the graph and using them to create the query. - */ - protected Query analyzeGraph(TokenStream source, BooleanClause.Occur operator, String field, boolean quoted, int phraseSlop) - throws IOException { - source.reset(); - GraphTokenStreamFiniteStrings graphTokenStreams = new GraphTokenStreamFiniteStrings(); - List tokenStreams = graphTokenStreams.getTokenStreams(source); - - if (tokenStreams.isEmpty()) { - return null; - } - - List queries = new ArrayList<>(tokenStreams.size()); - for (TokenStream ts : tokenStreams) { - Query query = createFieldQuery(ts, operator, field, quoted, phraseSlop); - if (query != null) { - queries.add(query); - } - } - - return new GraphQuery(queries.toArray(new Query[0])); - } } protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) { diff --git a/core/src/main/resources/org/elasticsearch/bootstrap/security.policy b/core/src/main/resources/org/elasticsearch/bootstrap/security.policy index 623f883f492..07ce6663505 100644 --- a/core/src/main/resources/org/elasticsearch/bootstrap/security.policy +++ b/core/src/main/resources/org/elasticsearch/bootstrap/security.policy @@ -31,7 +31,7 @@ grant codeBase "${codebase.securesm-1.1.jar}" { //// Very special jar permissions: //// These are dangerous permissions that we don't want to grant to everything. -grant codeBase "${codebase.lucene-core-6.4.0-snapshot-ec38570.jar}" { +grant codeBase "${codebase.lucene-core-6.4.0-snapshot-084f7a0.jar}" { // needed to allow MMapDirectory's "unmap hack" (die unmap hack, die) // java 8 package permission java.lang.RuntimePermission "accessClassInPackage.sun.misc"; @@ -42,7 +42,7 @@ grant codeBase "${codebase.lucene-core-6.4.0-snapshot-ec38570.jar}" { permission java.lang.RuntimePermission "accessDeclaredMembers"; }; -grant codeBase "${codebase.lucene-misc-6.4.0-snapshot-ec38570.jar}" { +grant codeBase "${codebase.lucene-misc-6.4.0-snapshot-084f7a0.jar}" { // needed to allow shard shrinking to use hard-links if possible via lucenes HardlinkCopyDirectoryWrapper permission java.nio.file.LinkPermission "hard"; }; diff --git a/core/src/main/resources/org/elasticsearch/bootstrap/test-framework.policy b/core/src/main/resources/org/elasticsearch/bootstrap/test-framework.policy index 6f44c37d233..43fbe43f220 100644 --- a/core/src/main/resources/org/elasticsearch/bootstrap/test-framework.policy +++ b/core/src/main/resources/org/elasticsearch/bootstrap/test-framework.policy @@ -33,7 +33,7 @@ grant codeBase "${codebase.securemock-1.2.jar}" { permission java.lang.reflect.ReflectPermission "suppressAccessChecks"; }; -grant codeBase "${codebase.lucene-test-framework-6.4.0-snapshot-ec38570.jar}" { +grant codeBase "${codebase.lucene-test-framework-6.4.0-snapshot-084f7a0.jar}" { // needed by RamUsageTester permission java.lang.reflect.ReflectPermission "suppressAccessChecks"; // needed for testing hardlinks in StoreRecoveryTests since we install MockFS diff --git a/core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java b/core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java index f6fbc3410ac..0cd185bc03a 100644 --- a/core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java +++ b/core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java @@ -21,7 +21,6 @@ package org.elasticsearch.index.search; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; @@ -120,9 +119,9 @@ public class MatchQueryIT extends ESIntegTestCase { SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge") .operator(Operator.AND).analyzer("lower_syns")).get(); - // 0 = say, 1 = OR(wtf, what), 2 = the, 3 = fudge - // "the" and "fudge" are required here, even though they were part of the synonym which is also expanded - assertNoSearchHits(searchResponse); + // Old synonyms work fine in that case, but it is coincidental + assertHitCount(searchResponse, 1L); + assertSearchHits(searchResponse, "1"); // same query using graph should find correct result searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge") diff --git a/modules/lang-expression/licenses/lucene-expressions-6.4.0-snapshot-084f7a0.jar.sha1 b/modules/lang-expression/licenses/lucene-expressions-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..67db46515f2 --- /dev/null +++ b/modules/lang-expression/licenses/lucene-expressions-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +1f0be480db66169f45a9f0982fbad9f549b88b55 \ No newline at end of file diff --git a/modules/lang-expression/licenses/lucene-expressions-6.4.0-snapshot-ec38570.jar.sha1 b/modules/lang-expression/licenses/lucene-expressions-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index b68a4d5cbd7..00000000000 --- a/modules/lang-expression/licenses/lucene-expressions-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -031d34e0a604a7cbb5c8ba816d49d9f622adaa3f \ No newline at end of file diff --git a/plugins/analysis-icu/licenses/lucene-analyzers-icu-6.4.0-snapshot-084f7a0.jar.sha1 b/plugins/analysis-icu/licenses/lucene-analyzers-icu-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..28179b5a015 --- /dev/null +++ b/plugins/analysis-icu/licenses/lucene-analyzers-icu-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +8e6047ca31d0284e1ccac1ac2d5cbf1b8e3e1b04 \ No newline at end of file diff --git a/plugins/analysis-icu/licenses/lucene-analyzers-icu-6.4.0-snapshot-ec38570.jar.sha1 b/plugins/analysis-icu/licenses/lucene-analyzers-icu-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 7ee6c4f0787..00000000000 --- a/plugins/analysis-icu/licenses/lucene-analyzers-icu-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -0850319baf063c5ee54aecabeaddb95efde8711b \ No newline at end of file diff --git a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-6.4.0-snapshot-084f7a0.jar.sha1 b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..3a94f405278 --- /dev/null +++ b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +dca59de9397d9bd33ad0714cd9896fc1bb8f13ef \ No newline at end of file diff --git a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-6.4.0-snapshot-ec38570.jar.sha1 b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index c66710ea344..00000000000 --- a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -3a2af1d2e80b9901b3e950f5ac1b6cd1eb408fd3 \ No newline at end of file diff --git a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-6.4.0-snapshot-084f7a0.jar.sha1 b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..b2495d0595b --- /dev/null +++ b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +0459b8e596e91ed00d5b36bc61adad53372c6491 \ No newline at end of file diff --git a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-6.4.0-snapshot-ec38570.jar.sha1 b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 06702f8d87e..00000000000 --- a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -7e9243da1482f88a91bd5239316b571259d24341 \ No newline at end of file diff --git a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-6.4.0-snapshot-084f7a0.jar.sha1 b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..4860dad3ad2 --- /dev/null +++ b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +4ad39a97e64f0a477a58934318b7f129f8c33c55 \ No newline at end of file diff --git a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-6.4.0-snapshot-ec38570.jar.sha1 b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 561a46f2a2c..00000000000 --- a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -2ead714733bb3cc90e9792d76021497946d5af09 \ No newline at end of file diff --git a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-6.4.0-snapshot-084f7a0.jar.sha1 b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..805e3dddf69 --- /dev/null +++ b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +1ae21b511636da5abd5f498d20fb087fa07fc7c2 \ No newline at end of file diff --git a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-6.4.0-snapshot-ec38570.jar.sha1 b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index c3b55dc76a1..00000000000 --- a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -9a8f3b58e6c672276331f54b5c3be49c8014ec5c \ No newline at end of file diff --git a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-6.4.0-snapshot-084f7a0.jar.sha1 b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-6.4.0-snapshot-084f7a0.jar.sha1 new file mode 100644 index 00000000000..d89554321a1 --- /dev/null +++ b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-6.4.0-snapshot-084f7a0.jar.sha1 @@ -0,0 +1 @@ +9be4966458f88699fb09fb0f6a3a71017e7678e7 \ No newline at end of file diff --git a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-6.4.0-snapshot-ec38570.jar.sha1 b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-6.4.0-snapshot-ec38570.jar.sha1 deleted file mode 100644 index 3fbc82d91c1..00000000000 --- a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-6.4.0-snapshot-ec38570.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -763b3144b9bc53328e923242a3c6614903ee2d7e \ No newline at end of file diff --git a/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java index 6a5c764375c..1634f049392 100644 --- a/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java @@ -82,6 +82,7 @@ import org.elasticsearch.index.analysis.StandardTokenizerFactory; import org.elasticsearch.index.analysis.StemmerOverrideTokenFilterFactory; import org.elasticsearch.index.analysis.StemmerTokenFilterFactory; import org.elasticsearch.index.analysis.StopTokenFilterFactory; +import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory; import org.elasticsearch.index.analysis.SynonymTokenFilterFactory; import org.elasticsearch.index.analysis.ThaiTokenizerFactory; import org.elasticsearch.index.analysis.TrimTokenFilterFactory; @@ -240,6 +241,7 @@ public class AnalysisFactoryTestCase extends ESTestCase { .put("stop", StopTokenFilterFactory.class) .put("swedishlightstem", StemmerTokenFilterFactory.class) .put("synonym", SynonymTokenFilterFactory.class) + .put("synonymgraph", SynonymGraphTokenFilterFactory.class) .put("trim", TrimTokenFilterFactory.class) .put("truncate", TruncateTokenFilterFactory.class) .put("turkishlowercase", LowerCaseTokenFilterFactory.class) @@ -275,6 +277,8 @@ public class AnalysisFactoryTestCase extends ESTestCase { .put("fingerprint", Void.class) // for tee-sinks .put("daterecognizer", Void.class) + // to flatten graphs created by the synonym graph filter + .put("flattengraph", Void.class) .immutableMap();