Upgrade master to lucene 5.0 snapshot
This has a lot of improvements in lucene, particularly around memory usage, merging, safety, compressed bitsets, etc. On the elasticsearch side, summary of the larger changes: API changes: postings API became a "pull" rather than "push", collector API became per-segment, etc. packaging changes: add lucene-backwards-codecs.jar as a dependency. improvements to boolean filtering: especially ensuring it will not be slow for SparseBitSet. use generic BitSet api in plumbing so that concrete bitset type is an implementation detail. use generic BitDocIdSetFilter api for dedicated bitset cache, so there is type safety. changes to support atomic commits implement Accountable.getChildResources (detailed memory usage API) for fielddata, etc change handling of IndexFormatTooOld/New, since they no longer extends CorruptIndexException Closes #8347. Squashed commit of the following: commit d90d53f5f21b876efc1e09cbd6d63c538a16cd89 Author: Simon Willnauer <simonw@apache.org> Date: Wed Nov 5 21:35:28 2014 +0100 Make default codec/postings/docvalues format constants commit cb66c22c71cd304a36e7371b199a8c279908ae37 Merge: d4e2f6dad4ff43
Author: Robert Muir <rmuir@apache.org> Date: Wed Nov 5 11:41:13 2014 -0500 Merge branch 'master' into enhancement/lucene_5_0_upgrade commit d4e2f6dfe767a5128c9b9ae9e75036378de08f47 Merge: 4e5445c4111d93
Author: Robert Muir <rmuir@apache.org> Date: Wed Nov 5 06:26:32 2014 -0500 Merge branch 'master' into enhancement/lucene_5_0_upgrade commit 4e5445c775f580730eb01360244e9330c0dc3958 Author: Robert Muir <rmuir@apache.org> Date: Tue Nov 4 16:19:19 2014 -0500 FixedBitSet -> BitSet commit 9887ea73e8b857eeda7f851ef3722ef580c92acf Merge: 1bf8894fc84666
Author: Robert Muir <rmuir@apache.org> Date: Tue Nov 4 15:26:25 2014 -0500 Merge branch 'master' into enhancement/lucene_5_0_upgrade commit 1bf8894430de3e566d0dc5623b0cc28b0d674ebb Author: Robert Muir <rmuir@apache.org> Date: Tue Nov 4 15:22:51 2014 -0500 remove nocommit commit a9c2a2259ff79c69bae7806b64e92d5f472c18c8 Author: Robert Muir <rmuir@apache.org> Date: Tue Nov 4 13:48:43 2014 -0500 turn jenkins red again commit 067baaaa4d52fce772c81654dcdb5051ea79139f Author: Robert Muir <rmuir@apache.org> Date: Tue Nov 4 13:18:21 2014 -0500 unzip from stream commit 82b6fba33d362aca2313cc0ca495f28f5ebb9260 Merge: b2214bb6523cd9
Author: Robert Muir <rmuir@apache.org> Date: Tue Nov 4 13:10:59 2014 -0500 Merge branch 'master' into enhancement/lucene_5_0_upgrade commit b2214bb093ec2f759003c488c3c403c8931db914 Author: Robert Muir <rmuir@apache.org> Date: Tue Nov 4 13:09:53 2014 -0500 go back to my URL until we can figure out what is up with jenkins commit e7d614172240175a51f580aeaefb6460d21cede9 Author: Robert Muir <rmuir@apache.org> Date: Tue Nov 4 10:52:54 2014 -0500 try this jenkins commit 337a3c7704efa7c9809bf373152d711ee55f876c Author: Simon Willnauer <simonw@apache.org> Date: Tue Nov 4 16:17:49 2014 +0100 Rename temp-files under lock to prevent metadata reads while renaming commit 77d5ba80d0a76efa549dd753b9f114b2f2d2d29c Author: Robert Muir <rmuir@apache.org> Date: Tue Nov 4 10:07:11 2014 -0500 continue to treat too-old/too-new as corruption for now commit 98d0fd2f4851bc50e505a94ca592a694d502c51c Author: Robert Muir <rmuir@apache.org> Date: Tue Nov 4 09:24:21 2014 -0500 fix last nocommit commit 643fceed66c8caf22b97fc489d67b4a2a90a1a1c Author: Simon Willnauer <simonw@apache.org> Date: Tue Nov 4 14:46:17 2014 +0100 remove NoSuchDirectoryException commit 2e43c4feba05cfaf451df70f946c0930cbcc4557 Merge: 93826e48163107
Author: Simon Willnauer <simonw@apache.org> Date: Tue Nov 4 14:38:00 2014 +0100 Merge branch 'master' into enhancement/lucene_5_0_upgrade commit 93826e4d56a6a97c2074669014af77ff519bde63 Merge: 7f1012944e24d3
Author: Simon Willnauer <simonw@apache.org> Date: Tue Nov 4 12:54:27 2014 +0100 Merge branch 'master' into enhancement/lucene_5_0_upgrade Conflicts: src/main/java/org/elasticsearch/index/store/DistributorDirectory.java src/main/java/org/elasticsearch/index/store/Store.java src/main/java/org/elasticsearch/indices/recovery/RecoveryStatus.java src/test/java/org/elasticsearch/index/store/DistributorDirectoryTest.java src/test/java/org/elasticsearch/index/store/StoreTest.java src/test/java/org/elasticsearch/indices/recovery/RecoveryStatusTests.java commit 7f10129364623620575c109df725cf54488b3abb Author: Adrien Grand <jpountz@gmail.com> Date: Tue Nov 4 11:32:24 2014 +0100 Fix TopHitsAggregator to not ignore the top-level/leaf collector split. commit 042fadc8603b997bdfdc45ca44fec70dc86774a6 Author: Adrien Grand <jpountz@gmail.com> Date: Tue Nov 4 11:31:20 2014 +0100 Remove MatchDocIdSet in favor of DocValuesDocIdSet. commit 7d877581ff5db585a674c95ac391ac78a0282826 Author: Adrien Grand <jpountz@gmail.com> Date: Tue Nov 4 11:10:08 2014 +0100 Make the and filter use the cost API. Lucene 5 ensured that cost() can safely be used, and this will have the benefit that the order in which filters are specified is not important anymore (only for slow random-access filters in practice). commit 78f1718aa2cd82184db7c3a8393e6215f43eb4a8 Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 23:55:17 2014 -0500 fix previous eclipse import braindamage commit 186c40e9258ce32f22a9a714ab442a310b6376e0 Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 22:32:34 2014 -0500 allow child queries to exhaust iterators again commit b0b1271305e1b6d0c4c4da51a3c54df1aa5c0605 Author: Ryan Ernst <ryan@iernst.net> Date: Mon Nov 3 14:50:44 2014 -0800 Fix nocommit for mapping output. index_options will not be printed if the field is not indexed. commit ba223eb85e399c9620a347a983e29bf703953e7a Author: Ryan Ernst <ryan@iernst.net> Date: Mon Nov 3 14:07:26 2014 -0800 Remove no commit for chinese analyzer provider. We should have a separate issue to address not using this provider on new indexes. commit ca554b03c4471797682b2fb724f25205cf040c4a Author: Ryan Ernst <ryan@iernst.net> Date: Mon Nov 3 13:41:59 2014 -0800 Fix stop tests commit de67c4653ec47dee9c671390536110749d2bb05f Author: Ryan Ernst <ryan@iernst.net> Date: Mon Nov 3 12:51:17 2014 -0800 Remove analysis nocommits, switching over to Lucene43*Filters for backcompat commit 50cae9bec72c25c33a1ab8a8931bccb3355171e2 Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 15:32:25 2014 -0500 add ram accounting and TODO lazy-loading (its no worse than master, can be a followup improvement) for suggesters commit 7a7f0122f138684b312d0f0b03dc2a9c16c15f9c Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 15:11:26 2014 -0500 bump lucene version commit cd0cae5c35e7a9e049f49ae45431f658fb86676b Merge: 446bc093c72073
Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 14:49:05 2014 -0500 Merge branch 'master' into enhancement/lucene_5_0_upgrade commit 446bc09b4e8bf4602d3c252b53ddaa0da65cce2f Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 14:46:30 2014 -0500 remove hack commit a19d85a968d82e6d00292b49630ef6ff2dbf2f32 Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 12:53:11 2014 -0500 dont create exceptions with circular references on corruption (will open a PR for this) commit 0beefb9e821d97c37e90ec556d81ac7b00369b8a Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 11:47:14 2014 -0500 temporarily add craptastic detector for this horrible bug commit e9f2d298bff75f3d1591f8622441e459c3ce7ac3 Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 10:56:01 2014 -0500 add nocommit commit e97f1d50a91a7129650b8effc7a9ecf74ca0569a Merge: c57a3c8f1f50ac
Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 10:12:12 2014 -0500 Merge branch 'master' into enhancement/lucene_5_0_upgrade commit c57a3c8341ed61dca62eaf77fad6b8b48aeb6940 Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 10:11:46 2014 -0500 fix nocommit commit dd0e77e4ec07c7011ab5f6b60b2ead33dc2333d2 Author: Robert Muir <rmuir@apache.org> Date: Mon Nov 3 09:54:09 2014 -0500 nocommit -> TODO, this is in much more places in the codebase, bigger issue commit 3cc3bf56d72d642059f8fe220d6f2fed608363e9 Author: Ryan Ernst <ryan@iernst.net> Date: Sat Nov 1 23:59:17 2014 -0700 Remove nocommit and awaitsfix for edge ngram filter test. commit 89f115245155511c0fbc0d5ee62e63141c3700c1 Author: Ryan Ernst <ryan@iernst.net> Date: Sat Nov 1 23:57:44 2014 -0700 Fix EdgeNGramTokenFilter logic for version <= 4.3, and fixed instanceof checks in corresponding tests to correctly check for reverse filter when applicable. commit 112df869cd199e36aab0e1a7a288bb1fdb2ebf1c Author: Robert Muir <rmuir@apache.org> Date: Sun Nov 2 00:08:30 2014 -0400 execute geo disjoint query/filter as intersects commit e5061273cc685f1252e9a3a9ae4877ec9bce7752 Author: Robert Muir <rmuir@apache.org> Date: Sat Nov 1 22:58:59 2014 -0400 remove chinese analyzer from docs commit ea1af11b8978fcc551f198e24fe21d52806993ef Author: Robert Muir <rmuir@apache.org> Date: Sat Nov 1 22:29:00 2014 -0400 fix ram accounting bug commit 53c0a42c6aa81aa6bf81d3aa77b95efd513e0f81 Merge: e3bcd3c6011a18
Author: Robert Muir <rmuir@apache.org> Date: Sat Nov 1 22:16:29 2014 -0400 Merge branch 'master' into enhancement/lucene_5_0_upgrade commit e3bcd3cc07a4957e12c7b3affc462c31290a9186 Author: Robert Muir <rmuir@apache.org> Date: Sat Nov 1 22:15:01 2014 -0400 fix url-email back compat (thanks ryan) commit 91d6b096a96c357755abee167098607223be1aad Author: Robert Muir <rmuir@apache.org> Date: Sat Nov 1 22:11:26 2014 -0400 bump lucene version commit d2bb9568df72b37ec7050d25940160b8517394bc Author: Robert Muir <rmuir@apache.org> Date: Sat Nov 1 20:33:07 2014 -0400 remove nocommit commit 1d049c471e19e5c457262c7399c5bad9e023b2e3 Author: Robert Muir <rmuir@apache.org> Date: Sat Nov 1 20:28:58 2014 -0400 fix eclipse to group org/com imports together: without this, its madness commit 09d8c1585ee99b6e63be032732c04ef6fed84ed2 Author: Robert Muir <rmuir@apache.org> Date: Sat Nov 1 14:27:41 2014 -0400 remove nocommit, if you dont liek it, print assembly and tell me how it can be better commit 8a6a294313fdf33b50c7126ec20c07867ecd637c Author: Adrien Grand <jpountz@gmail.com> Date: Fri Oct 31 20:01:55 2014 +0100 Remove deprecated usage of DocIdSets.newDocIDSet. commit 601bee60543610558403298124a84b1b3bbd1045 Author: Robert Muir <rmuir@apache.org> Date: Fri Oct 31 14:13:18 2014 -0400 maybe one of these zillions of annotations will stop thread leaks commit 9d3f69abc7267c5e455aefa26db95cb554b02d62 Author: Robert Muir <rmuir@apache.org> Date: Fri Oct 31 14:05:39 2014 -0400 fix some analysis nocommits commit 312e3a29c77214b8142d21c33a6b2c2b151acf9a Author: Adrien Grand <jpountz@gmail.com> Date: Fri Oct 31 18:28:45 2014 +0100 Remove XConstantScoreQuery/XFilteredQuery/ApplyAcceptedDocsFilter. commit 5a0cb9f8e167215df7f1b1fad11eec6e6c74940f Author: Adrien Grand <jpountz@gmail.com> Date: Fri Oct 31 17:06:45 2014 +0100 Fix misleading documentation of DocIdSets.toCacheable. commit 8b4ef2b5b476fff4c79c0c2a0e4769ead26cf82b Author: Adrien Grand <jpountz@gmail.com> Date: Fri Oct 31 17:05:59 2014 +0100 Fix CustomRandomAccessFilterStrategy to override the right method. commit d7a9a407a615987cfffc651f724fbd8795c9c671 Author: Adrien Grand <jpountz@gmail.com> Date: Fri Oct 31 16:21:35 2014 +0100 Better handle the special case when there is a single SHOULD clause. commit 648ad389f07e92dfc451f345549c9841ba5e4c9a Author: Adrien Grand <jpountz@gmail.com> Date: Fri Oct 31 15:53:38 2014 +0100 Cut over XBooleanFilter to BitDocIdSet.Builder. The idea is similar to what happened to Lucene's BooleanFilter. Yet XBooleanFilter is a bit more sophisticated and I had to slightly change the way it is implemented in order to make it work. The main difference with before is that slow filters are now applied lazily, so eg. if you have 3 MUST clauses, two with a fast iterator and the third with a slow iterator, the previous implementation used to apply the fast iterators first and then only check the slow filter for bits which were set in the bit set. Now we are computing a bit set based on the fast must clauses and then basically returning a BitsFilteredDocIdSet.wrap(bitset, slowClause). Other than that, BooleanFilter still uses the bitset optimizations when or-ing and and-ind filters. Another improvement is that BooleanFilter is now aware of the cost API. commit b2dad312b4bc9f931dc3a25415dd81c0d9deee08 Author: Robert Muir <rmuir@apache.org> Date: Fri Oct 31 10:18:53 2014 -0400 clear nocommit commit 4851d2091e744294336dfade33906c75fbe695cd Author: Simon Willnauer <simonw@apache.org> Date: Fri Oct 31 15:15:16 2014 +0100 cut over to RoaringDocIdSet commit ca6aec24a901073e65ce4dd6b70964fd3612409e Author: Simon Willnauer <simonw@apache.org> Date: Fri Oct 31 14:57:30 2014 +0100 make nocommit more explicit commit d0742ee2cb7a6c48b0bbb31580b7fbcebdb6ec40 Author: Robert Muir <rmuir@apache.org> Date: Fri Oct 31 09:55:24 2014 -0400 fix standardtokenizer nocommit commit 7d6faccafff22a86af62af0384838391d46695ca Author: Simon Willnauer <simonw@apache.org> Date: Fri Oct 31 14:54:08 2014 +0100 fix compilation commit a038a405c1ff6458ad294e6b5bc469e622f699d0 Author: Simon Willnauer <simonw@apache.org> Date: Fri Oct 31 14:53:43 2014 +0100 fix compilation commit 30c9e307b1f5d80e2deca3392c0298682241207f Author: Simon Willnauer <simonw@apache.org> Date: Fri Oct 31 14:52:35 2014 +0100 fix compilation commit e5139bc5a0a9abd2bdc6ba0dfbcb7e3c2e7b8481 Author: Robert Muir <rmuir@apache.org> Date: Fri Oct 31 09:52:16 2014 -0400 clear nocommit here commit 85dd2cedf7a7994bed871ac421cfda06aaf5c0a5 Author: Simon Willnauer <simonw@apache.org> Date: Fri Oct 31 14:46:17 2014 +0100 fix CompletionPostingsFormatTest commit c0f3781f616c9b0ee3b5c4d0998810f595868649 Author: Robert Muir <rmuir@apache.org> Date: Fri Oct 31 09:38:00 2014 -0400 add tests for these analyzers commit 51f9999b4ad079c283ae762c862fd0e22d00445f Author: Simon Willnauer <simonw@apache.org> Date: Fri Oct 31 14:10:26 2014 +0100 remove nocommit - this is not an issue commit fd1388fa03e622b0738601c8aeb2dbf7949a6dd2 Author: Martijn van Groningen <martijn.v.groningen@gmail.com> Date: Fri Oct 31 14:07:01 2014 +0100 Remove redundant null check commit 3d6dd51b0927337ba941a235446b22e8cd500dc3 Author: Martijn van Groningen <martijn.v.groningen@gmail.com> Date: Fri Oct 31 14:01:37 2014 +0100 Removed the work around to prevent p/c error when invoking #iterator() twice, because the custom query filter wrapper now doesn't transform the result to a cache doc id set any more. I think the transforming to a cachable doc id set in CustomQueryWrappingFilter isn't needed at all, because we use the DocIdSet only once and because of that is just slowed things down. commit 821832a537e00cd1216064b379df3e01d2911d3a Author: Simon Willnauer <simonw@apache.org> Date: Fri Oct 31 13:54:33 2014 +0100 one more nocommit commit 77eb9ea4c4ea50afb2680c29682ddcb3851a9d4f Author: Martijn van Groningen <martijn.v.groningen@gmail.com> Date: Fri Oct 31 13:52:29 2014 +0100 Remove cast commit a400573c034ed602221f801b20a58a9186a06eae Author: Simon Willnauer <simonw@apache.org> Date: Fri Oct 31 13:49:24 2014 +0100 fix stop filter commit 51746087cf8ec34c4d20aa05ba8dbff7b3b43eec Author: Simon Willnauer <simonw@apache.org> Date: Fri Oct 31 13:21:36 2014 +0100 fix changed semantics of FBS.nextSetBit to check for NO_MORE_DOCS commit 8d0a4e2511310f1293860823fe3ba80ac771bbe3 Author: Robert Muir <rmuir@apache.org> Date: Fri Oct 31 08:13:44 2014 -0400 do the bogus cast differently commit 46a5cc5732dea096c0c80ae5ce42911c9c51e44e Author: Simon Willnauer <simonw@apache.org> Date: Fri Oct 31 13:00:16 2014 +0100 I hate it but P/C now passes commit 580c0c2f82bbeacf217e594f22312b11d1bdb839 Merge: a9d3c001645434
Author: Robert Muir <rmuir@apache.org> Date: Fri Oct 31 06:54:31 2014 -0400 fix nocommit/classcast commit a9d3c004d62fe04989f49a897e6ff84973c06eb9 Author: Adrien Grand <jpountz@gmail.com> Date: Fri Oct 31 08:49:31 2014 +0100 Update TODO. commit aa75af0b407792aeef32017f03a6f442ed970baa Author: Robert Muir <rmuir@apache.org> Date: Thu Oct 30 19:18:25 2014 -0400 clear obselete nocommits from lucene bump commit d438534cf41fcbe2d88070e2f27c994625e082c2 Author: Robert Muir <rmuir@apache.org> Date: Thu Oct 30 18:53:20 2014 -0400 throw classcastexception when ES abuses regular filtercache for nested docs commit 2c751f3a8feda43ec127c34769b069de21f3d16f Author: Robert Muir <rmuir@apache.org> Date: Thu Oct 30 18:31:34 2014 -0400 bump lucene revision, fix tests commit d6ef7f6304ae262bf6228a7d661b2a452df332be Author: Simon Willnauer <simonw@apache.org> Date: Thu Oct 30 22:37:58 2014 +0100 fix merge problems commit de9d361f88a9ce6bb3fba85285de41f223c95767 Merge: 41f6aabf6b37a3
Author: Simon Willnauer <simonw@apache.org> Date: Thu Oct 30 22:28:59 2014 +0100 Merge branch 'master' into enhancement/lucene_5_0_upgrade Conflicts: pom.xml src/main/java/org/elasticsearch/Version.java src/main/java/org/elasticsearch/gateway/local/state/meta/MetaDataStateFormat.java commit 41f6aab388aa80c40b08a2facab2617576203a0d Author: Simon Willnauer <simonw@apache.org> Date: Thu Oct 30 17:48:46 2014 +0100 fix potiential NPE commit c4428b12e1ae838b91e847df8b4a8be7f49e10f4 Author: Simon Willnauer <simonw@apache.org> Date: Thu Oct 30 17:38:46 2014 +0100 don't advance iterator in a match(doc) method commit 28ab948e99e3ea4497c9b1e468384806ba7e1790 Author: Simon Willnauer <simonw@apache.org> Date: Thu Oct 30 17:34:58 2014 +0100 don't advance iterator in a match(doc) method commit eb0f33f6634fadfcf4b2bf7327400e568f0427bb Author: Simon Willnauer <simonw@apache.org> Date: Thu Oct 30 16:55:54 2014 +0100 fix GeoUtilsTest commit 7f711fe3eaf73b6c2268cf42d5a41132a61ad831 Author: Simon Willnauer <simonw@apache.org> Date: Thu Oct 30 16:43:16 2014 +0100 Use a dedicated default index option if field type is not indexed by default commit 78e3f37ab779e3e1b25b45a742cc86ab5f975149 Author: Robert Muir <rmuir@apache.org> Date: Thu Oct 30 10:56:14 2014 -0400 disable this test with AwaitsFix to reduce noise commit 9a590f563c8e03a99ecf0505c92d12d7ab20d11d Author: Simon Willnauer <simonw@apache.org> Date: Thu Oct 30 09:38:49 2014 +0100 fix lucene version commit abe3ca1d8bb6b5101b545198f59aec44bacfa741 Author: Simon Willnauer <simonw@apache.org> Date: Thu Oct 30 09:35:05 2014 +0100 fix AnalyzingCompletionLookupProvider to wrok with new codec API commit 464293b245852d60bde050c6d3feb5907dcfbf5f Author: Robert Muir <rmuir@apache.org> Date: Thu Oct 30 00:26:00 2014 -0400 don't try to write stuff to tests class directory commit 031cc6c19f4fe4423a034b515f77e5a0e282a124 Author: Robert Muir <rmuir@apache.org> Date: Thu Oct 30 00:12:36 2014 -0400 AwaitsFix these known issues to reduce noise commit 4600d51891e35847f2d344247d6f915a0605c0d1 Author: Robert Muir <rmuir@apache.org> Date: Thu Oct 30 00:06:53 2014 -0400 openbitset lives on commit 8492bae056249e2555d24acd55f1046b66a667c4 Author: Robert Muir <rmuir@apache.org> Date: Wed Oct 29 23:42:54 2014 -0400 fixes for filter tests commit 31f24ce4efeda31f97eafdb122346c7047a53bf2 Author: Robert Muir <rmuir@apache.org> Date: Wed Oct 29 23:12:38 2014 -0400 don't use fieldcache commit 8480789942fdff14a6d2b2cd8134502fe62f20c8 Author: Robert Muir <rmuir@apache.org> Date: Wed Oct 29 23:04:29 2014 -0400 ancient index no longer supported commit 02e78dc7ebdd827533009f542582e8db44309c57 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 23:37:02 2014 +0100 fix more tests commit ff746c6df23c50b3f3ec24922413b962c8983080 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 23:08:19 2014 +0100 fix all mapper commit e4fb84b517107b25cb064c66f83c9aa814a311b2 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 22:55:54 2014 +0100 fix distributor tests and cut over to FileStore API commit 20c850e2cfe3210cd1fb9e232afed8d4ac045857 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 22:42:18 2014 +0100 use DOCS_ONLY if index=true and current options == null commit 44169c108418413cfe51f5ce23ab82047463e4c2 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 22:33:36 2014 +0100 Fix index=yes|no settings in mappers commit a3c5f77987461a18121156ed345d42ded301c566 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 21:51:41 2014 +0100 fix several field mappers conversion from setIndexed to indexOptions commit df84d736908e88a031d710f98e222be68ae96af1 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 21:33:35 2014 +0100 fix SourceFieldMapper to be not indexed commit b2bf01d12a8271a31fb2df601162d0e89924c8f5 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 21:23:08 2014 +0100 Cut over to .liv files in store and corruption tests commit 619004df436f9ef05d24bef1b6a7f084c6b0ad75 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 17:05:52 2014 +0100 fix more tests commit b7ed653a8b464de446e00456bce0a89e47627c38 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 16:19:08 2014 +0100 [STORE] Add dedicated method to write temporary files Recovery writes temporary files which might not end up in the right distributor directories today. This commit adds a dedicated API that allows specifying the target file name in order to create the tempoary file in the correct directory. commit 7d574659f6ae04adc2b857146ad0d8d56ca66f12 Author: Robert Muir <rmuir@apache.org> Date: Wed Oct 29 10:28:49 2014 -0400 add some leniency to temporary bogus method commit f97022ea7c2259f7a5cf97d924c59ed75ab65b32 Author: Robert Muir <rmuir@apache.org> Date: Wed Oct 29 10:24:17 2014 -0400 fix MultiCollector bug commit b760533128c2b4eb10ad76e9689ef714293dd819 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 14:56:08 2014 +0100 CheckIndex is now closeable we need to close it commit 9dae9fb6d63546a6c2427be2a2d5c8358f5b1934 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 14:45:11 2014 +0100 s/Lucene51/Lucene50 commit 7aea9b86856a8c1b06a08e7c312ede1168af1287 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 14:42:30 2014 +0100 fix BloomFilterPostingsFormat commit 16fea6fe842e88665d59cc091e8224e8dc6ce08c Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 14:41:16 2014 +0100 fix some codec format issues commit 3d77aa97dd2c4012b63befef3f2ba2525965e8a6 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 14:30:43 2014 +0100 fix CodecTests commit 6ef823b1fde25657438ace1aabd9d552d6ae215e Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 14:26:47 2014 +0100 make it compile commit 9991eee1fe99435118d4dd42b297ffc83fce5ec5 Author: Robert Muir <rmuir@apache.org> Date: Wed Oct 29 09:12:43 2014 -0400 add an ugly hack for TopHitsAggregator for now commit 03e768a01fcae6b1f4cb50bcceec7d42977ac3e6 Author: Simon Willnauer <simonw@apache.org> Date: Wed Oct 29 14:01:02 2014 +0100 cut over ES090PostingsFormat commit 463d281faadb794fdde3b469326bdaada25af048 Merge: 0f8740a8eac79c
Author: Robert Muir <rmuir@apache.org> Date: Wed Oct 29 08:30:36 2014 -0400 Merge branch 'master' into enhancement/lucene_5_0_upgrade commit 0f8740a782455a63524a5a82169f6bbbfc613518 Author: Robert Muir <rmuir@apache.org> Date: Wed Oct 29 01:00:15 2014 -0400 fix/hack remaining filter and analysis issues commit df534488569da13b31d66e581456dfd4b55156b9 Author: Robert Muir <rmuir@apache.org> Date: Tue Oct 28 23:11:47 2014 -0400 fix ngrams / openbitset usage commit 11f5dc3b9887f4da80a0fa1818e1350b30599329 Author: Robert Muir <rmuir@apache.org> Date: Tue Oct 28 22:42:44 2014 -0400 hack over sort comparators commit 4ebdc754350f512596f6a02770d223e9f5f7975a Author: Robert Muir <rmuir@apache.org> Date: Tue Oct 28 21:27:07 2014 -0400 compiler errors < 100 commit 2d60c9e29de48ccb0347dd87f7201f47b67b83a0 Author: Robert Muir <rmuir@apache.org> Date: Tue Oct 28 03:13:08 2014 -0400 clear some nocommits around ram usage commit aaf47fe6c0aabcfb2581dd456fc50edf871da758 Author: Robert Muir <rmuir@apache.org> Date: Mon Oct 27 12:27:34 2014 -0400 migrate fieldinfo handling commit ef6ed6d15d8def71cd880d97249678136cd29fe3 Author: Robert Muir <rmuir@apache.org> Date: Mon Oct 27 12:07:13 2014 -0400 more simple fixes commit f475e1048ae697dd9da5bd9da445102b0b7bc5b3 Author: Robert Muir <rmuir@apache.org> Date: Mon Oct 27 11:58:21 2014 -0400 more fielddata ram accounting fixes commit 16b4239eaa9b4262df258257df4f31d39f28a3a2 Author: Simon Willnauer <simonw@apache.org> Date: Mon Oct 27 16:47:32 2014 +0100 add missing file commit 5b542fa2a6da81e36a0c35b8e891a1d8bc58f663 Author: Simon Willnauer <simonw@apache.org> Date: Mon Oct 27 16:43:29 2014 +0100 cut over completion posting formats - still some nocommits commit ecdea49404c4ec4e1b78fb54575825f21b4e096e Author: Robert Muir <rmuir@apache.org> Date: Mon Oct 27 11:21:09 2014 -0400 fielddata accountable fixes commit d43da265718917e20c8264abd43342069198fe9c Author: Simon Willnauer <simonw@apache.org> Date: Mon Oct 27 16:19:53 2014 +0100 cut over BloomFilterPostings to new API commit 29b192ba621c14820175775d01242162b88bd364 Author: Robert Muir <rmuir@apache.org> Date: Mon Oct 27 10:22:51 2014 -0400 fix more analyzers commit 74b4a0c5283e323a7d02490df469497c722780d2 Author: Robert Muir <rmuir@apache.org> Date: Mon Oct 27 09:54:25 2014 -0400 fix tests commit 554084ccb4779dd6b1c65fa7212ad1f64f3a6968 Author: Simon Willnauer <simonw@apache.org> Date: Mon Oct 27 14:51:48 2014 +0100 maintain supressed exceptions on CorruptIndexException commit cf882d9112c5e8ef1e9f2b0f800f7aa59001a4f2 Author: Simon Willnauer <simonw@apache.org> Date: Mon Oct 27 14:47:17 2014 +0100 commitOnClose=false commit ebb2a9189ab2f459b7c6c9985be610fd90dfe410 Author: Simon Willnauer <simonw@apache.org> Date: Mon Oct 27 14:46:06 2014 +0100 cut over indexwriter closeing in InternalEngine commit cd21b3d4706f0b562bd37792d077d60832aff65f Author: Simon Willnauer <simonw@apache.org> Date: Mon Oct 27 14:38:10 2014 +0100 fix constant commit f93f900c4a1c90af3a21a4af5735a7536423fe28 Author: Robert Muir <rmuir@apache.org> Date: Mon Oct 27 09:50:49 2014 -0400 fix test commit a9a752940b1ab4699a6a08ba8b34afca82b843fe Author: Martijn van Groningen <martijn.v.groningen@gmail.com> Date: Mon Oct 27 09:26:18 2014 +0100 Be explicit about the index options commit d9ee815babd030fa2ceaec9f467c105ee755bf6b Author: Simon Willnauer <simonw@apache.org> Date: Sun Oct 26 20:03:44 2014 +0100 cut over store and directory commit b3f5c8e39039dd8f5caac0c4dd1fc3b1116e64ca Author: Robert Muir <rmuir@apache.org> Date: Sun Oct 26 13:08:39 2014 -0400 more test fixes commit 8842f2684e3606aae0860c27f7a4c53e273d47fb Author: Robert Muir <rmuir@apache.org> Date: Sun Oct 26 12:14:52 2014 -0400 tests manual labor commit c43de5aec337919a3fdc3638406dff17fc80bc98 Author: Robert Muir <rmuir@apache.org> Date: Sun Oct 26 11:04:13 2014 -0400 BytesRef -> BytesRefBuilder commit 020c0d087a2f37566a1db390b0e044ebab030138 Author: Martijn van Groningen <martijn.v.groningen@gmail.com> Date: Sun Oct 26 15:53:37 2014 +0100 Moved over to BitSetFilter commit 48dd1b909e6c52cef733961c9ecebfe4f67109fe Author: Martijn van Groningen <martijn.v.groningen@gmail.com> Date: Sun Oct 26 15:53:11 2014 +0100 Left over Collector api change in ScanContext commit 6ec248ef63f262bcda400181b838fd9244752625 Author: Martijn van Groningen <martijn.v.groningen@gmail.com> Date: Sun Oct 26 15:47:40 2014 +0100 Moved indexed() over to indexOptions != null or indexOptions == null commit 9937aebfd8546ae4bb652cd976b3b43ac5ab7a63 Author: Martijn van Groningen <martijn.v.groningen@gmail.com> Date: Sun Oct 26 13:26:31 2014 +0100 Fixed many compile errors. Mainly around the breaking Collector api change in 5.0. commit fec32c4abc0e3309cf34260c8816305a6f820c9e Author: Robert Muir <rmuir@apache.org> Date: Sat Oct 25 11:22:17 2014 -0400 more easy fixes commit dab22531d801800d17a65dc7c9464148ce8ebffd Author: Robert Muir <rmuir@apache.org> Date: Sat Oct 25 09:33:41 2014 -0400 more progress commit 414767e9a955010076b0497cc4f6d0c1850b48d3 Author: Robert Muir <rmuir@apache.org> Date: Sat Oct 25 06:33:17 2014 -0400 more progress commit ad9d969fddf139a8830254d3eb36a908ba87cc12 Author: Robert Muir <rmuir@apache.org> Date: Fri Oct 24 14:28:01 2014 -0400 current state of fun commit 464475eecb0be15d7d084135ed16051f76a7e521 Author: Robert Muir <rmuir@apache.org> Date: Fri Oct 24 11:42:41 2014 -0400 bump to 5.0 snapshot
This commit is contained in:
parent
ad4ff43f6f
commit
610ce078fb
File diff suppressed because one or more lines are too long
|
@ -26,10 +26,6 @@ org.apache.lucene.index.IndexReader#tryIncRef()
|
|||
@defaultMessage QueryWrapperFilter is cachable by default - use Queries#wrap instead
|
||||
org.apache.lucene.search.QueryWrapperFilter#<init>(org.apache.lucene.search.Query)
|
||||
|
||||
@defaultMessage Because the filtercache doesn't take deletes into account FilteredQuery can't be used - use XFilteredQuery instead
|
||||
org.apache.lucene.search.FilteredQuery#<init>(org.apache.lucene.search.Query,org.apache.lucene.search.Filter)
|
||||
org.apache.lucene.search.FilteredQuery#<init>(org.apache.lucene.search.Query,org.apache.lucene.search.Filter,org.apache.lucene.search.FilteredQuery$FilterStrategy)
|
||||
|
||||
@defaultMessage Pass the precision step from the mappings explicitly instead
|
||||
org.apache.lucene.search.NumericRangeQuery#newDoubleRange(java.lang.String,java.lang.Double,java.lang.Double,boolean,boolean)
|
||||
org.apache.lucene.search.NumericRangeQuery#newFloatRange(java.lang.String,java.lang.Float,java.lang.Float,boolean,boolean)
|
||||
|
|
|
@ -9,7 +9,6 @@ following types are supported:
|
|||
<<brazilian-analyzer,`brazilian`>>,
|
||||
<<bulgarian-analyzer,`bulgarian`>>,
|
||||
<<catalan-analyzer,`catalan`>>,
|
||||
<<chinese-analyzer,`chinese`>>,
|
||||
<<cjk-analyzer,`cjk`>>,
|
||||
<<czech-analyzer,`czech`>>,
|
||||
<<danish-analyzer,`danish`>>,
|
||||
|
@ -339,15 +338,6 @@ The `catalan` analyzer could be reimplemented as a `custom` analyzer as follows:
|
|||
<2> This filter should be removed unless there are words which should
|
||||
be excluded from stemming.
|
||||
|
||||
[[chinese-analyzer]]
|
||||
===== `chinese` analyzer
|
||||
|
||||
The `chinese` analyzer cannot be reimplemented as a `custom` analyzer
|
||||
because it depends on the ChineseTokenizer and ChineseFilter classes,
|
||||
which are not exposed in Elasticsearch. These classes are
|
||||
deprecated in Lucene 4 and the `chinese` analyzer will be replaced
|
||||
with the <<analysis-standard-analyzer>> in Lucene 5.
|
||||
|
||||
[[cjk-analyzer]]
|
||||
===== `cjk` analyzer
|
||||
|
||||
|
|
|
@ -144,8 +144,9 @@ The `strategy` parameter accepts the following options:
|
|||
|
||||
`random_access_${threshold}`::
|
||||
|
||||
If the filter supports random access and if there is at least one matching
|
||||
document among the first `threshold` ones, then apply the filter first.
|
||||
If the filter supports random access and if the number of documents in the
|
||||
index divided by the cardinality of the filter is greater than ${threshold},
|
||||
then apply the filter first.
|
||||
Otherwise fall back to `leap_frog_query_first`. `${threshold}` must be
|
||||
greater than or equal to `1`.
|
||||
|
||||
|
|
14
pom.xml
14
pom.xml
|
@ -31,8 +31,8 @@
|
|||
</parent>
|
||||
|
||||
<properties>
|
||||
<lucene.version>4.10.2</lucene.version>
|
||||
<lucene.maven.version>4.10.2</lucene.maven.version>
|
||||
<lucene.version>5.0.0</lucene.version>
|
||||
<lucene.maven.version>5.0.0-snapshot-1636426</lucene.maven.version>
|
||||
<tests.jvms>auto</tests.jvms>
|
||||
<tests.shuffle>true</tests.shuffle>
|
||||
<tests.output>onerror</tests.output>
|
||||
|
@ -50,6 +50,10 @@
|
|||
<id>Codehaus Snapshots</id>
|
||||
<url>http://repository.codehaus.org/</url>
|
||||
</repository>
|
||||
<repository>
|
||||
<id>Lucene snapshots</id>
|
||||
<url>https://download.elasticsearch.org/lucenesnapshots/maven/</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
<dependencies>
|
||||
|
@ -84,6 +88,12 @@
|
|||
<version>${lucene.maven.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-backward-codecs</artifactId>
|
||||
<version>${lucene.maven.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analyzers-common</artifactId>
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
|
@ -65,8 +64,8 @@ public class PrefixAnalyzer extends Analyzer {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
TokenStreamComponents createComponents = analyzer.createComponents(fieldName, reader);
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
TokenStreamComponents createComponents = analyzer.createComponents(fieldName);
|
||||
TokenStream stream = new PrefixTokenFilter(createComponents.getTokenStream(), separator, prefix);
|
||||
TokenStreamComponents tsc = new TokenStreamComponents(createComponents.getTokenizer(), stream);
|
||||
return tsc;
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -37,8 +36,7 @@ public class UniqueTokenFilter extends TokenFilter {
|
|||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
// use a fixed version, as we don't care about case sensitivity.
|
||||
private final CharArraySet previous = new CharArraySet(Version.LUCENE_31, 8, false);
|
||||
private final CharArraySet previous = new CharArraySet(8, false);
|
||||
private final boolean onlyOnSamePosition;
|
||||
|
||||
public UniqueTokenFilter(TokenStream in) {
|
||||
|
|
|
@ -162,7 +162,7 @@ public abstract class BlendedTermQuery extends Query {
|
|||
return termContext;
|
||||
}
|
||||
TermContext newTermContext = new TermContext(termContext.topReaderContext);
|
||||
List<AtomicReaderContext> leaves = termContext.topReaderContext.leaves();
|
||||
List<LeafReaderContext> leaves = termContext.topReaderContext.leaves();
|
||||
final int len;
|
||||
if (leaves == null) {
|
||||
len = 1;
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.apache.lucene.queryparser.classic;
|
||||
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.elasticsearch.common.lucene.search.XConstantScoreQuery;
|
||||
import org.elasticsearch.index.query.ExistsFilterParser;
|
||||
import org.elasticsearch.index.query.QueryParseContext;
|
||||
|
||||
|
@ -33,6 +33,6 @@ public class ExistsFieldQueryExtension implements FieldQueryExtension {
|
|||
|
||||
@Override
|
||||
public Query query(QueryParseContext parseContext, String queryText) {
|
||||
return new XConstantScoreQuery(ExistsFilterParser.newFilter(parseContext, queryText, null));
|
||||
return new ConstantScoreQuery(ExistsFilterParser.newFilter(parseContext, queryText, null));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,12 +25,16 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.FilteredQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.lucene.search.MatchNoDocsQuery;
|
||||
import org.elasticsearch.common.lucene.search.Queries;
|
||||
import org.elasticsearch.common.lucene.search.XFilteredQuery;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
|
@ -79,12 +83,12 @@ public class MapperQueryParser extends QueryParser {
|
|||
private String quoteFieldSuffix;
|
||||
|
||||
public MapperQueryParser(QueryParseContext parseContext) {
|
||||
super(Lucene.QUERYPARSER_VERSION, null, null);
|
||||
super(null, null);
|
||||
this.parseContext = parseContext;
|
||||
}
|
||||
|
||||
public MapperQueryParser(QueryParserSettings settings, QueryParseContext parseContext) {
|
||||
super(Lucene.QUERYPARSER_VERSION, settings.defaultField(), settings.defaultAnalyzer());
|
||||
super(settings.defaultField(), settings.defaultAnalyzer());
|
||||
this.parseContext = parseContext;
|
||||
reset(settings);
|
||||
}
|
||||
|
@ -855,8 +859,8 @@ public class MapperQueryParser extends QueryParser {
|
|||
}
|
||||
|
||||
private void applySlop(Query q, int slop) {
|
||||
if (q instanceof XFilteredQuery) {
|
||||
applySlop(((XFilteredQuery)q).getQuery(), slop);
|
||||
if (q instanceof FilteredQuery) {
|
||||
applySlop(((FilteredQuery)q).getQuery(), slop);
|
||||
}
|
||||
if (q instanceof PhraseQuery) {
|
||||
((PhraseQuery) q).setSlop(slop);
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.apache.lucene.queryparser.classic;
|
||||
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.elasticsearch.common.lucene.search.XConstantScoreQuery;
|
||||
import org.elasticsearch.index.query.MissingFilterParser;
|
||||
import org.elasticsearch.index.query.QueryParseContext;
|
||||
|
||||
|
@ -33,7 +33,7 @@ public class MissingFieldQueryExtension implements FieldQueryExtension {
|
|||
|
||||
@Override
|
||||
public Query query(QueryParseContext parseContext, String queryText) {
|
||||
return new XConstantScoreQuery(MissingFilterParser.newFilter(parseContext, queryText,
|
||||
return new ConstantScoreQuery(MissingFilterParser.newFilter(parseContext, queryText,
|
||||
MissingFilterParser.DEFAULT_EXISTENCE_VALUE, MissingFilterParser.DEFAULT_NULL_VALUE, null));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,7 +58,7 @@ public class QueryParserSettings {
|
|||
private Analyzer forcedAnalyzer = null;
|
||||
private Analyzer forcedQuoteAnalyzer = null;
|
||||
private String quoteFieldSuffix = null;
|
||||
private MultiTermQuery.RewriteMethod rewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
|
||||
private MultiTermQuery.RewriteMethod rewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
|
||||
private String minimumShouldMatch;
|
||||
private boolean lenient;
|
||||
private Locale locale;
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
// this is just one possible solution for "early termination" !
|
||||
|
||||
/**
|
||||
* Abstract decorator class of a DocIdSetIterator
|
||||
* implementation that provides on-demand filter/validation
|
||||
* mechanism on an underlying DocIdSetIterator. See {@link
|
||||
* FilteredDocIdSet}.
|
||||
*/
|
||||
public abstract class XFilteredDocIdSetIterator extends DocIdSetIterator {
|
||||
protected DocIdSetIterator _innerIter;
|
||||
private int doc;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param innerIter Underlying DocIdSetIterator.
|
||||
*/
|
||||
public XFilteredDocIdSetIterator(DocIdSetIterator innerIter) {
|
||||
if (innerIter == null) {
|
||||
throw new IllegalArgumentException("null iterator");
|
||||
}
|
||||
_innerIter = innerIter;
|
||||
doc = -1;
|
||||
}
|
||||
|
||||
/** Return the wrapped {@link DocIdSetIterator}. */
|
||||
public DocIdSetIterator getDelegate() {
|
||||
return _innerIter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validation method to determine whether a docid should be in the result set.
|
||||
* @param doc docid to be tested
|
||||
* @return true if input docid should be in the result set, false otherwise.
|
||||
* @see #FilteredDocIdSetIterator(DocIdSetIterator)
|
||||
* @throws CollectionTerminatedException if the underlying iterator is exhausted.
|
||||
*/
|
||||
protected abstract boolean match(int doc);
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
try {
|
||||
while ((doc = _innerIter.nextDoc()) != NO_MORE_DOCS) {
|
||||
if (match(doc)) {
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
} catch (CollectionTerminatedException e) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
doc = _innerIter.advance(target);
|
||||
try {
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
if (match(doc)) {
|
||||
return doc;
|
||||
} else {
|
||||
while ((doc = _innerIter.nextDoc()) != NO_MORE_DOCS) {
|
||||
if (match(doc)) {
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
} catch (CollectionTerminatedException e) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return _innerIter.cost();
|
||||
}
|
||||
}
|
||||
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
package org.apache.lucene.search.postingshighlight;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -94,7 +94,7 @@ public final class CustomPostingsHighlighter extends XPostingsHighlighter {
|
|||
public Snippet[] highlightDoc(String field, BytesRef[] terms, IndexSearcher searcher, int docId, int maxPassages) throws IOException {
|
||||
IndexReader reader = searcher.getIndexReader();
|
||||
IndexReaderContext readerContext = reader.getContext();
|
||||
List<AtomicReaderContext> leaves = readerContext.leaves();
|
||||
List<LeafReaderContext> leaves = readerContext.leaves();
|
||||
|
||||
String[] contents = new String[]{loadCurrentFieldValue()};
|
||||
Map<Integer, Object> snippetsMap = highlightField(field, contents, getBreakIterator(field), terms, new int[]{docId}, leaves, maxPassages);
|
||||
|
|
|
@ -289,7 +289,7 @@ public class XPostingsHighlighter {
|
|||
query.extractTerms(queryTerms);
|
||||
|
||||
IndexReaderContext readerContext = reader.getContext();
|
||||
List<AtomicReaderContext> leaves = readerContext.leaves();
|
||||
List<LeafReaderContext> leaves = readerContext.leaves();
|
||||
|
||||
// Make our own copies because we sort in-place:
|
||||
int[] docids = new int[docidsIn.length];
|
||||
|
@ -384,8 +384,8 @@ public class XPostingsHighlighter {
|
|||
}
|
||||
|
||||
//BEGIN EDIT: made protected so that we can call from our subclass and pass in the terms by ourselves
|
||||
protected Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext> leaves, int maxPassages) throws IOException {
|
||||
//private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<AtomicReaderContext > leaves, int maxPassages) throws IOException {
|
||||
protected Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<LeafReaderContext> leaves, int maxPassages) throws IOException {
|
||||
//private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<LeafReaderContext > leaves, int maxPassages) throws IOException {
|
||||
//END EDIT
|
||||
|
||||
Map<Integer,Object> highlights = new HashMap<>();
|
||||
|
@ -408,8 +408,8 @@ public class XPostingsHighlighter {
|
|||
bi.setText(content);
|
||||
int doc = docids[i];
|
||||
int leaf = ReaderUtil.subIndex(doc, leaves);
|
||||
AtomicReaderContext subContext = leaves.get(leaf);
|
||||
AtomicReader r = subContext.reader();
|
||||
LeafReaderContext subContext = leaves.get(leaf);
|
||||
LeafReader r = subContext.reader();
|
||||
Terms t = r.terms(field);
|
||||
if (t == null) {
|
||||
continue; // nothing to do
|
||||
|
|
|
@ -36,10 +36,11 @@ import org.apache.lucene.util.fst.Util.Result;
|
|||
import org.apache.lucene.util.fst.Util.TopResults;
|
||||
import org.elasticsearch.common.collect.HppcMaps;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
@ -444,9 +445,9 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
@Override
|
||||
public void build(InputIterator iterator) throws IOException {
|
||||
String prefix = getClass().getSimpleName();
|
||||
File directory = OfflineSorter.defaultTempDir();
|
||||
File tempInput = File.createTempFile(prefix, ".input", directory);
|
||||
File tempSorted = File.createTempFile(prefix, ".sorted", directory);
|
||||
Path directory = OfflineSorter.defaultTempDir();
|
||||
Path tempInput = Files.createTempFile(directory, prefix, ".input");
|
||||
Path tempSorted = Files.createTempFile(directory, prefix, ".sorted");
|
||||
|
||||
hasPayloads = iterator.hasPayloads();
|
||||
|
||||
|
@ -530,7 +531,7 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
new OfflineSorter(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);
|
||||
|
||||
// Free disk space:
|
||||
tempInput.delete();
|
||||
Files.delete(tempInput);
|
||||
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempSorted);
|
||||
|
||||
|
@ -625,14 +626,13 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(reader, writer);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(reader, writer);
|
||||
}
|
||||
IOUtils.closeWhileHandlingException(reader, writer);
|
||||
|
||||
tempInput.delete();
|
||||
tempSorted.delete();
|
||||
if (success) {
|
||||
IOUtils.deleteFilesIfExist(tempInput, tempSorted);
|
||||
} else {
|
||||
IOUtils.deleteFilesIgnoringExceptions(tempInput, tempSorted);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,11 +24,18 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.queries.BlendedTermQuery;
|
||||
import org.apache.lucene.queries.FilterClause;
|
||||
import org.apache.lucene.queries.TermFilter;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.FilteredQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||
import org.elasticsearch.common.lucene.search.XBooleanFilter;
|
||||
import org.elasticsearch.common.lucene.search.XFilteredQuery;
|
||||
import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
|
||||
import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery;
|
||||
|
||||
|
@ -81,9 +88,6 @@ public class CustomFieldQuery extends FieldQuery {
|
|||
} else if (sourceQuery instanceof FilteredQuery) {
|
||||
flatten(((FilteredQuery) sourceQuery).getQuery(), reader, flatQueries);
|
||||
flatten(((FilteredQuery) sourceQuery).getFilter(), reader, flatQueries);
|
||||
} else if (sourceQuery instanceof XFilteredQuery) {
|
||||
flatten(((XFilteredQuery) sourceQuery).getQuery(), reader, flatQueries);
|
||||
flatten(((XFilteredQuery) sourceQuery).getFilter(), reader, flatQueries);
|
||||
} else if (sourceQuery instanceof MultiPhrasePrefixQuery) {
|
||||
flatten(sourceQuery.rewrite(reader), reader, flatQueries);
|
||||
} else if (sourceQuery instanceof FiltersFunctionScoreQuery) {
|
||||
|
|
|
@ -18,8 +18,6 @@
|
|||
*/
|
||||
package org.elasticsearch;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
|
@ -39,14 +37,20 @@ public class ElasticsearchCorruptionException extends IOException {
|
|||
/**
|
||||
* Creates a new {@link ElasticsearchCorruptionException} with the given exceptions stacktrace.
|
||||
* This constructor copies the stacktrace as well as the message from the given
|
||||
* {@link org.apache.lucene.index.CorruptIndexException} into this exception.
|
||||
* {@code Throwable} into this exception.
|
||||
*
|
||||
* @param ex the exception cause
|
||||
*/
|
||||
public ElasticsearchCorruptionException(CorruptIndexException ex) {
|
||||
public ElasticsearchCorruptionException(Throwable ex) {
|
||||
this(ex.getMessage());
|
||||
if (ex != null) {
|
||||
this.setStackTrace(ex.getStackTrace());
|
||||
}
|
||||
Throwable[] suppressed = ex.getSuppressed();
|
||||
if (suppressed != null) {
|
||||
for (Throwable supressedExc : suppressed) {
|
||||
addSuppressed(supressedExc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -19,11 +19,15 @@
|
|||
|
||||
package org.elasticsearch;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexFormatTooNewException;
|
||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.rest.RestStatus;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.List;
|
||||
|
@ -161,12 +165,19 @@ public final class ExceptionsHelper {
|
|||
return first;
|
||||
}
|
||||
|
||||
public static IOException unwrapCorruption(Throwable t) {
|
||||
return (IOException) unwrap(t, CorruptIndexException.class,
|
||||
IndexFormatTooOldException.class,
|
||||
IndexFormatTooNewException.class);
|
||||
}
|
||||
|
||||
public static <T extends Throwable> T unwrap(Throwable t, Class<T> clazz) {
|
||||
public static Throwable unwrap(Throwable t, Class<?>... clazzes) {
|
||||
if (t != null) {
|
||||
do {
|
||||
if (clazz.isInstance(t)) {
|
||||
return clazz.cast(t);
|
||||
for (Class<?> clazz : clazzes) {
|
||||
if (clazz.isInstance(t)) {
|
||||
return t;
|
||||
}
|
||||
}
|
||||
} while ((t = t.getCause()) != null);
|
||||
}
|
||||
|
|
|
@ -41,81 +41,84 @@ public class Version implements Serializable {
|
|||
// AA values below 50 are beta builds, and below 99 are RC builds, with 99 indicating a release
|
||||
// the (internal) format of the id is there so we can easily do after/before checks on the id
|
||||
|
||||
// NOTE: indexes created with 3.6 use this constant for e.g. analysis chain emulation (imperfect)
|
||||
public static final org.apache.lucene.util.Version LUCENE_3_EMULATION_VERSION = org.apache.lucene.util.Version.LUCENE_4_0_0;
|
||||
|
||||
public static final int V_0_18_0_ID = /*00*/180099;
|
||||
public static final Version V_0_18_0 = new Version(V_0_18_0_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_18_0 = new Version(V_0_18_0_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_18_1_ID = /*00*/180199;
|
||||
public static final Version V_0_18_1 = new Version(V_0_18_1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_18_1 = new Version(V_0_18_1_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_18_2_ID = /*00*/180299;
|
||||
public static final Version V_0_18_2 = new Version(V_0_18_2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_18_2 = new Version(V_0_18_2_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_18_3_ID = /*00*/180399;
|
||||
public static final Version V_0_18_3 = new Version(V_0_18_3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_18_3 = new Version(V_0_18_3_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_18_4_ID = /*00*/180499;
|
||||
public static final Version V_0_18_4 = new Version(V_0_18_4_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_18_4 = new Version(V_0_18_4_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_18_5_ID = /*00*/180599;
|
||||
public static final Version V_0_18_5 = new Version(V_0_18_5_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_18_5 = new Version(V_0_18_5_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_18_6_ID = /*00*/180699;
|
||||
public static final Version V_0_18_6 = new Version(V_0_18_6_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_18_6 = new Version(V_0_18_6_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_18_7_ID = /*00*/180799;
|
||||
public static final Version V_0_18_7 = new Version(V_0_18_7_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_18_7 = new Version(V_0_18_7_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_18_8_ID = /*00*/180899;
|
||||
public static final Version V_0_18_8 = new Version(V_0_18_8_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_18_8 = new Version(V_0_18_8_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
|
||||
public static final int V_0_19_0_RC1_ID = /*00*/190051;
|
||||
public static final Version V_0_19_0_RC1 = new Version(V_0_19_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_0_RC1 = new Version(V_0_19_0_RC1_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
|
||||
public static final int V_0_19_0_RC2_ID = /*00*/190052;
|
||||
public static final Version V_0_19_0_RC2 = new Version(V_0_19_0_RC2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_0_RC2 = new Version(V_0_19_0_RC2_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
|
||||
public static final int V_0_19_0_RC3_ID = /*00*/190053;
|
||||
public static final Version V_0_19_0_RC3 = new Version(V_0_19_0_RC3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_0_RC3 = new Version(V_0_19_0_RC3_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
|
||||
public static final int V_0_19_0_ID = /*00*/190099;
|
||||
public static final Version V_0_19_0 = new Version(V_0_19_0_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_0 = new Version(V_0_19_0_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_1_ID = /*00*/190199;
|
||||
public static final Version V_0_19_1 = new Version(V_0_19_1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_1 = new Version(V_0_19_1_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_2_ID = /*00*/190299;
|
||||
public static final Version V_0_19_2 = new Version(V_0_19_2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_2 = new Version(V_0_19_2_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_3_ID = /*00*/190399;
|
||||
public static final Version V_0_19_3 = new Version(V_0_19_3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_3 = new Version(V_0_19_3_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_4_ID = /*00*/190499;
|
||||
public static final Version V_0_19_4 = new Version(V_0_19_4_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_4 = new Version(V_0_19_4_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_5_ID = /*00*/190599;
|
||||
public static final Version V_0_19_5 = new Version(V_0_19_5_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_5 = new Version(V_0_19_5_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_6_ID = /*00*/190699;
|
||||
public static final Version V_0_19_6 = new Version(V_0_19_6_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_6 = new Version(V_0_19_6_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_7_ID = /*00*/190799;
|
||||
public static final Version V_0_19_7 = new Version(V_0_19_7_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_7 = new Version(V_0_19_7_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_8_ID = /*00*/190899;
|
||||
public static final Version V_0_19_8 = new Version(V_0_19_8_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_8 = new Version(V_0_19_8_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_9_ID = /*00*/190999;
|
||||
public static final Version V_0_19_9 = new Version(V_0_19_9_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_9 = new Version(V_0_19_9_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_10_ID = /*00*/191099;
|
||||
public static final Version V_0_19_10 = new Version(V_0_19_10_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_10 = new Version(V_0_19_10_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_11_ID = /*00*/191199;
|
||||
public static final Version V_0_19_11 = new Version(V_0_19_11_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_11 = new Version(V_0_19_11_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_12_ID = /*00*/191299;
|
||||
public static final Version V_0_19_12 = new Version(V_0_19_12_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_12 = new Version(V_0_19_12_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_19_13_ID = /*00*/191399;
|
||||
public static final Version V_0_19_13 = new Version(V_0_19_13_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_19_13 = new Version(V_0_19_13_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
|
||||
public static final int V_0_20_0_RC1_ID = /*00*/200051;
|
||||
public static final Version V_0_20_0_RC1 = new Version(V_0_20_0_RC1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_20_0_RC1 = new Version(V_0_20_0_RC1_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_20_0_ID = /*00*/200099;
|
||||
public static final Version V_0_20_0 = new Version(V_0_20_0_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_20_0 = new Version(V_0_20_0_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_20_1_ID = /*00*/200199;
|
||||
public static final Version V_0_20_1 = new Version(V_0_20_1_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_20_1 = new Version(V_0_20_1_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_20_2_ID = /*00*/200299;
|
||||
public static final Version V_0_20_2 = new Version(V_0_20_2_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_20_2 = new Version(V_0_20_2_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_20_3_ID = /*00*/200399;
|
||||
public static final Version V_0_20_3 = new Version(V_0_20_3_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_20_3 = new Version(V_0_20_3_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_20_4_ID = /*00*/200499;
|
||||
public static final Version V_0_20_4 = new Version(V_0_20_4_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_20_4 = new Version(V_0_20_4_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_20_5_ID = /*00*/200599;
|
||||
public static final Version V_0_20_5 = new Version(V_0_20_5_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_20_5 = new Version(V_0_20_5_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_20_6_ID = /*00*/200699;
|
||||
public static final Version V_0_20_6 = new Version(V_0_20_6_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_20_6 = new Version(V_0_20_6_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
public static final int V_0_20_7_ID = /*00*/200799;
|
||||
public static final Version V_0_20_7 = new Version(V_0_20_7_ID, false, org.apache.lucene.util.Version.LUCENE_3_6);
|
||||
public static final Version V_0_20_7 = new Version(V_0_20_7_ID, false, LUCENE_3_EMULATION_VERSION);
|
||||
|
||||
public static final int V_0_90_0_Beta1_ID = /*00*/900001;
|
||||
public static final Version V_0_90_0_Beta1 = new Version(V_0_90_0_Beta1_ID, false, org.apache.lucene.util.Version.LUCENE_4_1);
|
||||
|
@ -213,7 +216,7 @@ public class Version implements Serializable {
|
|||
public static final int V_1_5_0_ID = /*00*/1050099;
|
||||
public static final Version V_1_5_0 = new Version(V_1_5_0_ID, false, org.apache.lucene.util.Version.LUCENE_4_10_2);
|
||||
public static final int V_2_0_0_ID = /*00*/2000099;
|
||||
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_4_10_2);
|
||||
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_5_0_0);
|
||||
|
||||
public static final Version CURRENT = V_2_0_0;
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.action.mlt;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
|
@ -284,7 +285,7 @@ public class TransportMoreLikeThisAction extends HandledTransportAction<MoreLike
|
|||
docMapper.parse(SourceToParse.source(getResponse.getSourceAsBytesRef()).type(request.type()).id(request.id()), new DocumentMapper.ParseListenerAdapter() {
|
||||
@Override
|
||||
public boolean beforeFieldAdded(FieldMapper fieldMapper, Field field, Object parseContext) {
|
||||
if (!field.fieldType().indexed()) {
|
||||
if (field.fieldType().indexOptions() == IndexOptions.NONE) {
|
||||
return false;
|
||||
}
|
||||
if (fieldMapper instanceof InternalMapper) {
|
||||
|
|
|
@ -294,11 +294,6 @@ public final class TermVectorFields extends Fields {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef text) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
|
@ -345,11 +340,6 @@ public final class TermVectorFields extends Fields {
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() throws IOException {
|
||||
return numTerms;
|
||||
|
|
|
@ -86,8 +86,8 @@ public class FsBlobContainer extends AbstractBlobContainer {
|
|||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
IOUtils.fsync(file, false);
|
||||
IOUtils.fsync(path, true);
|
||||
IOUtils.fsync(file.toPath(), false);
|
||||
IOUtils.fsync(path.toPath(), true);
|
||||
}
|
||||
}, blobStore.bufferSizeInBytes());
|
||||
}
|
||||
|
|
|
@ -154,7 +154,7 @@ public class FileSystemUtils {
|
|||
* because not all file systems and operating systems allow to fsync on a directory)
|
||||
*/
|
||||
public static void syncFile(File fileToSync, boolean isDir) throws IOException {
|
||||
IOUtils.fsync(fileToSync, isDir);
|
||||
IOUtils.fsync(fileToSync.toPath(), isDir);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -21,7 +21,10 @@ package org.elasticsearch.common.lucene;
|
|||
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.DocValuesFormat;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -56,8 +59,18 @@ public class Lucene {
|
|||
public static final Version VERSION = Version.LATEST;
|
||||
public static final Version ANALYZER_VERSION = VERSION;
|
||||
public static final Version QUERYPARSER_VERSION = VERSION;
|
||||
public static final String LATEST_DOC_VALUES_FORMAT = "Lucene50";
|
||||
public static final String LATEST_POSTINGS_FORMAT = "Lucene50";
|
||||
public static final String LATEST_CODEC = Codec.getDefault().getName();
|
||||
|
||||
public static final NamedAnalyzer STANDARD_ANALYZER = new NamedAnalyzer("_standard", AnalyzerScope.GLOBAL, new StandardAnalyzer(ANALYZER_VERSION));
|
||||
static {
|
||||
Deprecated annotation = PostingsFormat.forName(LATEST_POSTINGS_FORMAT).getClass().getAnnotation(Deprecated.class);
|
||||
assert annotation == null : "PostingsFromat " + LATEST_POSTINGS_FORMAT + " is deprecated" ;
|
||||
annotation = DocValuesFormat.forName(LATEST_DOC_VALUES_FORMAT).getClass().getAnnotation(Deprecated.class);
|
||||
assert annotation == null : "DocValuesFormat " + LATEST_DOC_VALUES_FORMAT + " is deprecated" ;
|
||||
}
|
||||
|
||||
public static final NamedAnalyzer STANDARD_ANALYZER = new NamedAnalyzer("_standard", AnalyzerScope.GLOBAL, new StandardAnalyzer());
|
||||
public static final NamedAnalyzer KEYWORD_ANALYZER = new NamedAnalyzer("_keyword", AnalyzerScope.GLOBAL, new KeywordAnalyzer());
|
||||
|
||||
public static final ScoreDoc[] EMPTY_SCORE_DOCS = new ScoreDoc[0];
|
||||
|
@ -81,18 +94,14 @@ public class Lucene {
|
|||
* Reads the segments infos, failing if it fails to load
|
||||
*/
|
||||
public static SegmentInfos readSegmentInfos(Directory directory) throws IOException {
|
||||
final SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(directory);
|
||||
return sis;
|
||||
return SegmentInfos.readLatestCommit(directory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the segments infos from the given commit, failing if it fails to load
|
||||
*/
|
||||
public static SegmentInfos readSegmentInfos(IndexCommit commit, Directory directory) throws IOException {
|
||||
final SegmentInfos sis = new SegmentInfos();
|
||||
sis.read(directory, commit.getSegmentsFileName());
|
||||
return sis;
|
||||
return SegmentInfos.readCommit(directory, commit.getSegmentsFileName());
|
||||
}
|
||||
|
||||
public static void checkSegmentInfoIntegrity(final Directory directory) throws IOException {
|
||||
|
@ -483,11 +492,13 @@ public class Lucene {
|
|||
* A collector that terminates early by throwing {@link org.elasticsearch.common.lucene.Lucene.EarlyTerminationException}
|
||||
* when count of matched documents has reached <code>maxCountHits</code>
|
||||
*/
|
||||
public final static class EarlyTerminatingCollector extends Collector {
|
||||
public final static class EarlyTerminatingCollector extends SimpleCollector {
|
||||
|
||||
private final int maxCountHits;
|
||||
private final Collector delegate;
|
||||
|
||||
private int count = 0;
|
||||
private LeafCollector leafCollector;
|
||||
|
||||
EarlyTerminatingCollector(int maxCountHits) {
|
||||
this.maxCountHits = maxCountHits;
|
||||
|
@ -512,12 +523,12 @@ public class Lucene {
|
|||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
delegate.setScorer(scorer);
|
||||
leafCollector.setScorer(scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
delegate.collect(doc);
|
||||
leafCollector.collect(doc);
|
||||
|
||||
if (++count >= maxCountHits) {
|
||||
throw new EarlyTerminationException("early termination [CountBased]");
|
||||
|
@ -525,13 +536,13 @@ public class Lucene {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext atomicReaderContext) throws IOException {
|
||||
delegate.setNextReader(atomicReaderContext);
|
||||
public void doSetNextReader(LeafReaderContext atomicReaderContext) throws IOException {
|
||||
leafCollector = delegate.getLeafCollector(atomicReaderContext);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return delegate.acceptsDocsOutOfOrder();
|
||||
return leafCollector.acceptsDocsOutOfOrder();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -545,10 +556,11 @@ public class Lucene {
|
|||
|
||||
/**
|
||||
* Returns <tt>true</tt> iff the given exception or
|
||||
* one of it's causes is an instance of {@link CorruptIndexException} otherwise <tt>false</tt>.
|
||||
* one of it's causes is an instance of {@link CorruptIndexException},
|
||||
* {@link IndexFormatTooOldException}, or {@link IndexFormatTooNewException} otherwise <tt>false</tt>.
|
||||
*/
|
||||
public static boolean isCorruptionException(Throwable t) {
|
||||
return ExceptionsHelper.unwrap(t, CorruptIndexException.class) != null;
|
||||
return ExceptionsHelper.unwrapCorruption(t) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -19,23 +19,21 @@
|
|||
|
||||
package org.elasticsearch.common.lucene;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.ScoreCachingWrappingScorer;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.*;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class MinimumScoreCollector extends Collector {
|
||||
public class MinimumScoreCollector extends SimpleCollector {
|
||||
|
||||
private final Collector collector;
|
||||
|
||||
private final float minimumScore;
|
||||
|
||||
private Scorer scorer;
|
||||
private LeafCollector leafCollector;
|
||||
|
||||
public MinimumScoreCollector(Collector collector, float minimumScore) {
|
||||
this.collector = collector;
|
||||
|
@ -48,23 +46,23 @@ public class MinimumScoreCollector extends Collector {
|
|||
scorer = new ScoreCachingWrappingScorer(scorer);
|
||||
}
|
||||
this.scorer = scorer;
|
||||
collector.setScorer(scorer);
|
||||
leafCollector.setScorer(scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
if (scorer.score() >= minimumScore) {
|
||||
collector.collect(doc);
|
||||
leafCollector.collect(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
collector.setNextReader(context);
|
||||
public void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
leafCollector = collector.getLeafCollector(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return collector.acceptsDocsOutOfOrder();
|
||||
return leafCollector.acceptsDocsOutOfOrder();
|
||||
}
|
||||
}
|
|
@ -19,10 +19,8 @@
|
|||
|
||||
package org.elasticsearch.common.lucene;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.ScoreCachingWrappingScorer;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.elasticsearch.common.lucene.search.XCollector;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -30,15 +28,19 @@ import java.io.IOException;
|
|||
/**
|
||||
*
|
||||
*/
|
||||
public class MultiCollector extends XCollector {
|
||||
public class MultiCollector extends SimpleCollector implements XCollector {
|
||||
|
||||
private final Collector collector;
|
||||
|
||||
private final Collector[] collectors;
|
||||
|
||||
private LeafCollector leafCollector;
|
||||
private final LeafCollector[] leafCollectors;
|
||||
|
||||
|
||||
public MultiCollector(Collector collector, Collector[] collectors) {
|
||||
this.collector = collector;
|
||||
this.collectors = collectors;
|
||||
this.leafCollectors = new LeafCollector[collectors.length];
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -47,35 +49,35 @@ public class MultiCollector extends XCollector {
|
|||
if (!(scorer instanceof ScoreCachingWrappingScorer)) {
|
||||
scorer = new ScoreCachingWrappingScorer(scorer);
|
||||
}
|
||||
collector.setScorer(scorer);
|
||||
for (Collector collector : collectors) {
|
||||
collector.setScorer(scorer);
|
||||
leafCollector.setScorer(scorer);
|
||||
for (LeafCollector leafCollector : leafCollectors) {
|
||||
leafCollector.setScorer(scorer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
collector.collect(doc);
|
||||
for (Collector collector : collectors) {
|
||||
collector.collect(doc);
|
||||
leafCollector.collect(doc);
|
||||
for (LeafCollector leafCollector : leafCollectors) {
|
||||
leafCollector.collect(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
collector.setNextReader(context);
|
||||
for (Collector collector : collectors) {
|
||||
collector.setNextReader(context);
|
||||
public void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
leafCollector = collector.getLeafCollector(context);
|
||||
for (int i = 0; i < collectors.length; i++) {
|
||||
leafCollectors[i] = collectors[i].getLeafCollector(context);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
if (!collector.acceptsDocsOutOfOrder()) {
|
||||
if (!leafCollector.acceptsDocsOutOfOrder()) {
|
||||
return false;
|
||||
}
|
||||
for (Collector collector : collectors) {
|
||||
if (!collector.acceptsDocsOutOfOrder()) {
|
||||
for (LeafCollector leafCollector : leafCollectors) {
|
||||
if (!leafCollector.acceptsDocsOutOfOrder()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,12 +18,12 @@
|
|||
*/
|
||||
package org.elasticsearch.common.lucene;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface ReaderContextAware {
|
||||
|
||||
public void setNextReader(AtomicReaderContext reader);
|
||||
public void setNextReader(LeafReaderContext reader);
|
||||
}
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
*/
|
||||
package org.elasticsearch.common.lucene;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.FilterAtomicReader;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
@ -31,7 +31,7 @@ public class SegmentReaderUtils {
|
|||
* If no SegmentReader can be extracted an {@link org.elasticsearch.ElasticsearchIllegalStateException} is thrown.
|
||||
*/
|
||||
@Nullable
|
||||
public static SegmentReader segmentReader(AtomicReader reader) {
|
||||
public static SegmentReader segmentReader(LeafReader reader) {
|
||||
return internalSegmentReader(reader, true);
|
||||
}
|
||||
|
||||
|
@ -40,24 +40,24 @@ public class SegmentReaderUtils {
|
|||
* is returned
|
||||
*/
|
||||
@Nullable
|
||||
public static SegmentReader segmentReaderOrNull(AtomicReader reader) {
|
||||
public static SegmentReader segmentReaderOrNull(LeafReader reader) {
|
||||
return internalSegmentReader(reader, false);
|
||||
}
|
||||
|
||||
public static boolean registerCoreListener(AtomicReader reader, SegmentReader.CoreClosedListener listener) {
|
||||
public static boolean registerCoreListener(LeafReader reader, SegmentReader.CoreClosedListener listener) {
|
||||
reader.addCoreClosedListener(listener);
|
||||
return true;
|
||||
}
|
||||
|
||||
private static SegmentReader internalSegmentReader(AtomicReader reader, boolean fail) {
|
||||
private static SegmentReader internalSegmentReader(LeafReader reader, boolean fail) {
|
||||
if (reader == null) {
|
||||
return null;
|
||||
}
|
||||
if (reader instanceof SegmentReader) {
|
||||
return (SegmentReader) reader;
|
||||
} else if (reader instanceof FilterAtomicReader) {
|
||||
final FilterAtomicReader fReader = (FilterAtomicReader) reader;
|
||||
return segmentReader(FilterAtomicReader.unwrap(fReader));
|
||||
} else if (reader instanceof FilterLeafReader) {
|
||||
final FilterLeafReader fReader = (FilterLeafReader) reader;
|
||||
return segmentReader(FilterLeafReader.unwrap(fReader));
|
||||
}
|
||||
if (fail) {
|
||||
// hard fail - we can't get a SegmentReader
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.all;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.ComplexExplanation;
|
||||
|
@ -62,7 +62,7 @@ public class AllTermQuery extends SpanTermQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public AllTermSpanScorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public AllTermSpanScorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
if (this.stats == null) {
|
||||
return null;
|
||||
}
|
||||
|
@ -145,7 +145,7 @@ public class AllTermQuery extends SpanTermQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(AtomicReaderContext context, int doc) throws IOException{
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException{
|
||||
AllTermSpanScorer scorer = scorer(context, context.reader().getLiveDocs());
|
||||
if (scorer != null) {
|
||||
int newDoc = scorer.advance(doc);
|
||||
|
|
|
@ -21,11 +21,15 @@ package org.elasticsearch.common.lucene.docset;
|
|||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
@ -89,7 +93,7 @@ public class AndDocIdSet extends DocIdSet {
|
|||
}
|
||||
}
|
||||
if (bits.isEmpty()) {
|
||||
return IteratorBasedIterator.newDocIdSetIterator(iterators.toArray(new DocIdSet[iterators.size()]));
|
||||
return IteratorBasedIterator.newDocIdSetIterator(iterators);
|
||||
}
|
||||
if (iterators.isEmpty()) {
|
||||
return new BitsDocIdSetIterator(new AndBits(bits.toArray(new Bits[bits.size()])));
|
||||
|
@ -97,16 +101,17 @@ public class AndDocIdSet extends DocIdSet {
|
|||
// combination of both..., first iterating over the "fast" ones, and then checking on the more
|
||||
// expensive ones
|
||||
return new BitsDocIdSetIterator.FilteredIterator(
|
||||
IteratorBasedIterator.newDocIdSetIterator(iterators.toArray(new DocIdSet[iterators.size()])),
|
||||
IteratorBasedIterator.newDocIdSetIterator(iterators),
|
||||
new AndBits(bits.toArray(new Bits[bits.size()]))
|
||||
);
|
||||
}
|
||||
|
||||
static class AndBits implements Bits {
|
||||
/** A conjunction between several {@link Bits} instances with short-circuit logic. */
|
||||
public static class AndBits implements Bits {
|
||||
|
||||
private final Bits[] bits;
|
||||
|
||||
AndBits(Bits[] bits) {
|
||||
public AndBits(Bits[] bits) {
|
||||
this.bits = bits;
|
||||
}
|
||||
|
||||
|
@ -127,18 +132,17 @@ public class AndDocIdSet extends DocIdSet {
|
|||
}
|
||||
|
||||
static class IteratorBasedIterator extends DocIdSetIterator {
|
||||
private int lastReturn = -1;
|
||||
private final DocIdSetIterator[] iterators;
|
||||
private final long cost;
|
||||
private int doc = -1;
|
||||
private final DocIdSetIterator lead;
|
||||
private final DocIdSetIterator[] otherIterators;
|
||||
|
||||
|
||||
public static DocIdSetIterator newDocIdSetIterator(DocIdSet[] sets) throws IOException {
|
||||
if (sets.length == 0) {
|
||||
public static DocIdSetIterator newDocIdSetIterator(Collection<DocIdSet> sets) throws IOException {
|
||||
if (sets.isEmpty()) {
|
||||
return DocIdSetIterator.empty();
|
||||
}
|
||||
final DocIdSetIterator[] iterators = new DocIdSetIterator[sets.length];
|
||||
final DocIdSetIterator[] iterators = new DocIdSetIterator[sets.size()];
|
||||
int j = 0;
|
||||
long cost = Integer.MAX_VALUE;
|
||||
for (DocIdSet set : sets) {
|
||||
if (set == null) {
|
||||
return DocIdSetIterator.empty();
|
||||
|
@ -148,94 +152,74 @@ public class AndDocIdSet extends DocIdSet {
|
|||
return DocIdSetIterator.empty();// non matching
|
||||
}
|
||||
iterators[j++] = docIdSetIterator;
|
||||
cost = Math.min(cost, docIdSetIterator.cost());
|
||||
}
|
||||
}
|
||||
if (sets.length == 1) {
|
||||
if (sets.size() == 1) {
|
||||
// shortcut if there is only one valid iterator.
|
||||
return iterators[0];
|
||||
}
|
||||
return new IteratorBasedIterator(iterators, cost);
|
||||
return new IteratorBasedIterator(iterators);
|
||||
}
|
||||
|
||||
private IteratorBasedIterator(DocIdSetIterator[] iterators, long cost) throws IOException {
|
||||
this.iterators = iterators;
|
||||
this.cost = cost;
|
||||
private IteratorBasedIterator(DocIdSetIterator[] iterators) throws IOException {
|
||||
final DocIdSetIterator[] sortedIterators = Arrays.copyOf(iterators, iterators.length);
|
||||
new InPlaceMergeSorter() {
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return Long.compare(sortedIterators[i].cost(), sortedIterators[j].cost());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
ArrayUtil.swap(sortedIterators, i, j);
|
||||
}
|
||||
|
||||
}.sort(0, sortedIterators.length);
|
||||
lead = sortedIterators[0];
|
||||
this.otherIterators = Arrays.copyOfRange(sortedIterators, 1, sortedIterators.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int docID() {
|
||||
return lastReturn;
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int nextDoc() throws IOException {
|
||||
|
||||
if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
assert false : "Illegal State - DocIdSetIterator is already exhausted";
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
DocIdSetIterator dcit = iterators[0];
|
||||
int target = dcit.nextDoc();
|
||||
int size = iterators.length;
|
||||
int skip = 0;
|
||||
int i = 1;
|
||||
while (i < size) {
|
||||
if (i != skip) {
|
||||
dcit = iterators[i];
|
||||
int docid = dcit.advance(target);
|
||||
if (docid > target) {
|
||||
target = docid;
|
||||
if (i != 0) {
|
||||
skip = i;
|
||||
i = 0;
|
||||
continue;
|
||||
} else
|
||||
skip = 0;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return (lastReturn = target);
|
||||
doc = lead.nextDoc();
|
||||
return doNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int advance(int target) throws IOException {
|
||||
doc = lead.advance(target);
|
||||
return doNext();
|
||||
}
|
||||
|
||||
if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
assert false : "Illegal State - DocIdSetIterator is already exhausted";
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
DocIdSetIterator dcit = iterators[0];
|
||||
target = dcit.advance(target);
|
||||
int size = iterators.length;
|
||||
int skip = 0;
|
||||
int i = 1;
|
||||
while (i < size) {
|
||||
if (i != skip) {
|
||||
dcit = iterators[i];
|
||||
int docid = dcit.advance(target);
|
||||
if (docid > target) {
|
||||
target = docid;
|
||||
if (i != 0) {
|
||||
skip = i;
|
||||
i = 0;
|
||||
continue;
|
||||
} else {
|
||||
skip = 0;
|
||||
private int doNext() throws IOException {
|
||||
main:
|
||||
while (true) {
|
||||
for (DocIdSetIterator otherIterator : otherIterators) {
|
||||
// the following assert is the invariant of the loop
|
||||
assert otherIterator.docID() <= doc;
|
||||
// the current doc might already be equal to doc if it broke the loop
|
||||
// at the previous iteration
|
||||
if (otherIterator.docID() < doc) {
|
||||
final int advanced = otherIterator.advance(doc);
|
||||
if (advanced > doc) {
|
||||
doc = lead.advance(advanced);
|
||||
continue main;
|
||||
}
|
||||
}
|
||||
}
|
||||
i++;
|
||||
return doc;
|
||||
}
|
||||
return (lastReturn = target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return cost;
|
||||
return lead.cost();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,18 +19,18 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.docset;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
|
||||
/**
|
||||
* A holder for a {@link DocIdSet} and the {@link AtomicReaderContext} it is associated with.
|
||||
* A holder for a {@link DocIdSet} and the {@link LeafReaderContext} it is associated with.
|
||||
*/
|
||||
public class ContextDocIdSet {
|
||||
|
||||
public final AtomicReaderContext context;
|
||||
public final LeafReaderContext context;
|
||||
public final DocIdSet docSet;
|
||||
|
||||
public ContextDocIdSet(AtomicReaderContext context, DocIdSet docSet) {
|
||||
public ContextDocIdSet(LeafReaderContext context, DocIdSet docSet) {
|
||||
this.context = context;
|
||||
this.docSet = docSet;
|
||||
}
|
||||
|
|
|
@ -19,12 +19,16 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.docset;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.search.BitsFilteredDocIdSet;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.RoaringDocIdSet;
|
||||
import org.apache.lucene.util.SparseFixedBitSet;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -52,44 +56,47 @@ public class DocIdSets {
|
|||
* For example, it does not ends up iterating one doc at a time check for its "value".
|
||||
*/
|
||||
public static boolean isFastIterator(DocIdSet set) {
|
||||
return set instanceof FixedBitSet;
|
||||
// TODO: this is really horrible
|
||||
while (set instanceof BitsFilteredDocIdSet) {
|
||||
set = ((BitsFilteredDocIdSet) set).getDelegate();
|
||||
}
|
||||
return set instanceof BitDocIdSet || set instanceof RoaringDocIdSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts to a cacheable {@link DocIdSet}
|
||||
* <p/>
|
||||
* Note, we don't use {@link org.apache.lucene.search.DocIdSet#isCacheable()} because execution
|
||||
* might be expensive even if its cacheable (i.e. not going back to the reader to execute). We effectively
|
||||
* always either return an empty {@link DocIdSet} or {@link FixedBitSet} but never <code>null</code>.
|
||||
* This never returns <code>null</code>.
|
||||
*/
|
||||
public static DocIdSet toCacheable(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
|
||||
public static DocIdSet toCacheable(LeafReader reader, @Nullable DocIdSet set) throws IOException {
|
||||
if (set == null || set == DocIdSet.EMPTY) {
|
||||
return DocIdSet.EMPTY;
|
||||
}
|
||||
DocIdSetIterator it = set.iterator();
|
||||
final DocIdSetIterator it = set.iterator();
|
||||
if (it == null) {
|
||||
return DocIdSet.EMPTY;
|
||||
}
|
||||
int doc = it.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
final int firstDoc = it.nextDoc();
|
||||
if (firstDoc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return DocIdSet.EMPTY;
|
||||
}
|
||||
if (set instanceof FixedBitSet) {
|
||||
if (set instanceof BitDocIdSet) {
|
||||
return set;
|
||||
}
|
||||
// TODO: should we use WAH8DocIdSet like Lucene?
|
||||
FixedBitSet fixedBitSet = new FixedBitSet(reader.maxDoc());
|
||||
do {
|
||||
fixedBitSet.set(doc);
|
||||
doc = it.nextDoc();
|
||||
} while (doc != DocIdSetIterator.NO_MORE_DOCS);
|
||||
return fixedBitSet;
|
||||
|
||||
final RoaringDocIdSet.Builder builder = new RoaringDocIdSet.Builder(reader.maxDoc());
|
||||
builder.add(firstDoc);
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
builder.add(doc);
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a set to bits.
|
||||
*/
|
||||
public static Bits toSafeBits(AtomicReader reader, @Nullable DocIdSet set) throws IOException {
|
||||
public static Bits toSafeBits(LeafReader reader, @Nullable DocIdSet set) throws IOException {
|
||||
if (set == null) {
|
||||
return new Bits.MatchNoBits(reader.maxDoc());
|
||||
}
|
||||
|
@ -101,18 +108,21 @@ public class DocIdSets {
|
|||
if (iterator == null) {
|
||||
return new Bits.MatchNoBits(reader.maxDoc());
|
||||
}
|
||||
return toFixedBitSet(iterator, reader.maxDoc());
|
||||
return toBitSet(iterator, reader.maxDoc());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link FixedBitSet} from an iterator.
|
||||
* Creates a {@link BitSet} from an iterator.
|
||||
*/
|
||||
public static FixedBitSet toFixedBitSet(DocIdSetIterator iterator, int numBits) throws IOException {
|
||||
FixedBitSet set = new FixedBitSet(numBits);
|
||||
int doc;
|
||||
while ((doc = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
set.set(doc);
|
||||
public static BitSet toBitSet(DocIdSetIterator iterator, int numBits) throws IOException {
|
||||
BitDocIdSet.Builder builder = new BitDocIdSet.Builder(numBits);
|
||||
builder.or(iterator);
|
||||
BitDocIdSet result = builder.build();
|
||||
if (result != null) {
|
||||
return result.bits();
|
||||
} else {
|
||||
return new SparseFixedBitSet(numBits);
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,170 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.lucene.docset;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FilteredDocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A {@link DocIdSet} that works on a "doc" level by checking if it matches or not.
|
||||
*/
|
||||
public abstract class MatchDocIdSet extends DocIdSet implements Bits {
|
||||
|
||||
private final int maxDoc;
|
||||
private final Bits acceptDocs;
|
||||
|
||||
protected MatchDocIdSet(int maxDoc, @Nullable Bits acceptDocs) {
|
||||
this.maxDoc = maxDoc;
|
||||
this.acceptDocs = acceptDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this document match?
|
||||
*/
|
||||
protected abstract boolean matchDoc(int doc);
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
if (acceptDocs == null) {
|
||||
return new NoAcceptDocsIterator(maxDoc);
|
||||
} else if (acceptDocs instanceof FixedBitSet) {
|
||||
return new FixedBitSetIterator(((DocIdSet) acceptDocs).iterator());
|
||||
} else {
|
||||
return new BothIterator(maxDoc, acceptDocs);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits bits() throws IOException {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
return matchDoc(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
final class NoAcceptDocsIterator extends DocIdSetIterator {
|
||||
|
||||
private final int maxDoc;
|
||||
private int doc = -1;
|
||||
|
||||
NoAcceptDocsIterator(int maxDoc) {
|
||||
this.maxDoc = maxDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
do {
|
||||
doc++;
|
||||
if (doc >= maxDoc) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
} while (!matchDoc(doc));
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
for (doc = target; doc < maxDoc; doc++) {
|
||||
if (matchDoc(doc)) {
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return maxDoc;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final class FixedBitSetIterator extends FilteredDocIdSetIterator {
|
||||
|
||||
FixedBitSetIterator(DocIdSetIterator innerIter) {
|
||||
super(innerIter);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean match(int doc) {
|
||||
return matchDoc(doc);
|
||||
}
|
||||
}
|
||||
|
||||
final class BothIterator extends DocIdSetIterator {
|
||||
private final int maxDoc;
|
||||
private final Bits acceptDocs;
|
||||
private int doc = -1;
|
||||
|
||||
BothIterator(int maxDoc, Bits acceptDocs) {
|
||||
this.maxDoc = maxDoc;
|
||||
this.acceptDocs = acceptDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
do {
|
||||
doc++;
|
||||
if (doc >= maxDoc) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
} while (!(acceptDocs.get(doc) && matchDoc(doc)));
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
for (doc = target; doc < maxDoc; doc++) {
|
||||
if (acceptDocs.get(doc) && matchDoc(doc)) {
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return maxDoc;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -73,10 +74,12 @@ public class OrDocIdSet extends DocIdSet {
|
|||
return new IteratorBasedIterator(sets);
|
||||
}
|
||||
|
||||
static class OrBits implements Bits {
|
||||
/** A disjunction between several {@link Bits} instances with short-circuit logic. */
|
||||
public static class OrBits implements Bits {
|
||||
|
||||
private final Bits[] bits;
|
||||
|
||||
OrBits(Bits[] bits) {
|
||||
public OrBits(Bits[] bits) {
|
||||
this.bits = bits;
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,12 @@
|
|||
package org.elasticsearch.common.lucene.index;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
|
@ -29,11 +34,8 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||
import org.elasticsearch.common.lucene.search.ApplyAcceptedDocsFilter;
|
||||
import org.elasticsearch.common.lucene.search.Queries;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
@ -75,10 +77,9 @@ public class FilterableTermsEnum extends TermsEnum {
|
|||
// or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
|
||||
numDocs = reader.maxDoc();
|
||||
}
|
||||
ApplyAcceptedDocsFilter acceptedDocsFilter = filter == null ? null : new ApplyAcceptedDocsFilter(filter);
|
||||
List<AtomicReaderContext> leaves = reader.leaves();
|
||||
List<LeafReaderContext> leaves = reader.leaves();
|
||||
List<Holder> enums = Lists.newArrayListWithExpectedSize(leaves.size());
|
||||
for (AtomicReaderContext context : leaves) {
|
||||
for (LeafReaderContext context : leaves) {
|
||||
Terms terms = context.reader().terms(field);
|
||||
if (terms == null) {
|
||||
continue;
|
||||
|
@ -88,24 +89,20 @@ public class FilterableTermsEnum extends TermsEnum {
|
|||
continue;
|
||||
}
|
||||
Bits bits = null;
|
||||
if (acceptedDocsFilter != null) {
|
||||
if (acceptedDocsFilter.filter() == Queries.MATCH_ALL_FILTER) {
|
||||
bits = context.reader().getLiveDocs();
|
||||
} else {
|
||||
// we want to force apply deleted docs
|
||||
DocIdSet docIdSet = acceptedDocsFilter.getDocIdSet(context, context.reader().getLiveDocs());
|
||||
if (DocIdSets.isEmpty(docIdSet)) {
|
||||
// fully filtered, none matching, no need to iterate on this
|
||||
continue;
|
||||
}
|
||||
bits = DocIdSets.toSafeBits(context.reader(), docIdSet);
|
||||
// Count how many docs are in our filtered set
|
||||
// TODO make this lazy-loaded only for those that need it?
|
||||
DocIdSetIterator iterator = docIdSet.iterator();
|
||||
if (iterator != null) {
|
||||
while (iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
numDocs++;
|
||||
}
|
||||
if (filter != null) {
|
||||
// we want to force apply deleted docs
|
||||
DocIdSet docIdSet = filter.getDocIdSet(context, context.reader().getLiveDocs());
|
||||
if (DocIdSets.isEmpty(docIdSet)) {
|
||||
// fully filtered, none matching, no need to iterate on this
|
||||
continue;
|
||||
}
|
||||
bits = DocIdSets.toSafeBits(context.reader(), docIdSet);
|
||||
// Count how many docs are in our filtered set
|
||||
// TODO make this lazy-loaded only for those that need it?
|
||||
DocIdSetIterator iterator = docIdSet.iterator();
|
||||
if (iterator != null) {
|
||||
while (iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
numDocs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -210,9 +207,4 @@ public class FilterableTermsEnum extends TermsEnum {
|
|||
public BytesRef next() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MESSAGE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MESSAGE);
|
||||
}
|
||||
}
|
|
@ -19,7 +19,8 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BitsFilteredDocIdSet;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -45,19 +46,19 @@ public class AndFilter extends Filter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
if (filters.size() == 1) {
|
||||
return filters.get(0).getDocIdSet(context, acceptDocs);
|
||||
}
|
||||
DocIdSet[] sets = new DocIdSet[filters.size()];
|
||||
for (int i = 0; i < filters.size(); i++) {
|
||||
DocIdSet set = filters.get(i).getDocIdSet(context, acceptDocs);
|
||||
DocIdSet set = filters.get(i).getDocIdSet(context, null);
|
||||
if (DocIdSets.isEmpty(set)) { // none matching for this filter, we AND, so return EMPTY
|
||||
return null;
|
||||
}
|
||||
sets[i] = set;
|
||||
}
|
||||
return new AndDocIdSet(sets);
|
||||
return BitsFilteredDocIdSet.wrap(new AndDocIdSet(sets), acceptDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,217 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* The assumption is that the underlying filter might not apply the accepted docs, so this filter helps to wrap
|
||||
* the actual filter and apply the actual accepted docs.
|
||||
*/
|
||||
// TODO: we can try and be smart, and only apply if if a filter is cached (down the "chain") since that's the only place that acceptDocs are not applied in ES
|
||||
public class ApplyAcceptedDocsFilter extends Filter {
|
||||
|
||||
private final Filter filter;
|
||||
|
||||
public ApplyAcceptedDocsFilter(Filter filter) {
|
||||
this.filter = filter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
DocIdSet docIdSet = filter.getDocIdSet(context, acceptDocs);
|
||||
if (DocIdSets.isEmpty(docIdSet)) {
|
||||
return null;
|
||||
}
|
||||
if (acceptDocs == null) {
|
||||
return docIdSet;
|
||||
}
|
||||
if (acceptDocs == context.reader().getLiveDocs()) {
|
||||
// optimized wrapper for not deleted cases
|
||||
return new NotDeletedDocIdSet(docIdSet, acceptDocs);
|
||||
}
|
||||
// we wrap this to make sure we can unwrap the inner docIDset in #unwrap
|
||||
return new WrappedDocIdSet(BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs), docIdSet);
|
||||
}
|
||||
|
||||
public Filter filter() {
|
||||
return this.filter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return filter.toString();
|
||||
}
|
||||
|
||||
public static DocIdSet unwrap(DocIdSet docIdSet) {
|
||||
if (docIdSet instanceof NotDeletedDocIdSet) {
|
||||
return ((NotDeletedDocIdSet) docIdSet).innerSet;
|
||||
} else if (docIdSet instanceof WrappedDocIdSet) {
|
||||
return ((WrappedDocIdSet) docIdSet).innerSet;
|
||||
}
|
||||
return docIdSet;
|
||||
}
|
||||
|
||||
static class NotDeletedDocIdSet extends DocIdSet {
|
||||
|
||||
private final DocIdSet innerSet;
|
||||
private final Bits liveDocs;
|
||||
|
||||
NotDeletedDocIdSet(DocIdSet innerSet, Bits liveDocs) {
|
||||
this.innerSet = innerSet;
|
||||
this.liveDocs = liveDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return innerSet.isCacheable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + innerSet.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits bits() throws IOException {
|
||||
Bits bits = innerSet.bits();
|
||||
if (bits == null) {
|
||||
return null;
|
||||
}
|
||||
return new NotDeleteBits(bits, liveDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
if (!DocIdSets.isFastIterator(innerSet) && liveDocs instanceof FixedBitSet) {
|
||||
// might as well iterate over the live docs..., since the iterator is not fast enough
|
||||
// but we can only do that if we have Bits..., in short, we reverse the order...
|
||||
Bits bits = innerSet.bits();
|
||||
if (bits != null) {
|
||||
return new NotDeletedDocIdSetIterator(((FixedBitSet) liveDocs).iterator(), bits);
|
||||
}
|
||||
}
|
||||
DocIdSetIterator iterator = innerSet.iterator();
|
||||
if (iterator == null) {
|
||||
return null;
|
||||
}
|
||||
return new NotDeletedDocIdSetIterator(iterator, liveDocs);
|
||||
}
|
||||
}
|
||||
|
||||
static class NotDeleteBits implements Bits {
|
||||
|
||||
private final Bits bits;
|
||||
private final Bits liveDocs;
|
||||
|
||||
NotDeleteBits(Bits bits, Bits liveDocs) {
|
||||
this.bits = bits;
|
||||
this.liveDocs = liveDocs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
return liveDocs.get(index) && bits.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return bits.length();
|
||||
}
|
||||
}
|
||||
|
||||
static class NotDeletedDocIdSetIterator extends FilteredDocIdSetIterator {
|
||||
|
||||
private final Bits match;
|
||||
|
||||
NotDeletedDocIdSetIterator(DocIdSetIterator innerIter, Bits match) {
|
||||
super(innerIter);
|
||||
this.match = match;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean match(int doc) {
|
||||
return match.get(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((filter == null) ? 0 : filter.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
ApplyAcceptedDocsFilter other = (ApplyAcceptedDocsFilter) obj;
|
||||
if (filter == null) {
|
||||
if (other.filter != null)
|
||||
return false;
|
||||
} else if (!filter.equals(other.filter))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
private static final class WrappedDocIdSet extends DocIdSet {
|
||||
private final DocIdSet delegate;
|
||||
private final DocIdSet innerSet;
|
||||
|
||||
private WrappedDocIdSet(DocIdSet delegate, DocIdSet innerSet) {
|
||||
this.delegate = delegate;
|
||||
this.innerSet = innerSet;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
return delegate.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits bits() throws IOException {
|
||||
return delegate.bits();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return delegate.isCacheable();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_OBJECT_REF + delegate.ramBytesUsed();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -18,10 +18,8 @@
|
|||
*/
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||
|
||||
|
@ -30,13 +28,13 @@ import java.io.IOException;
|
|||
/**
|
||||
*
|
||||
*/
|
||||
public class FilteredCollector extends XCollector {
|
||||
public class FilteredCollector extends SimpleCollector implements XCollector {
|
||||
|
||||
private final Collector collector;
|
||||
|
||||
private final Filter filter;
|
||||
|
||||
private Bits docSet;
|
||||
private LeafCollector leafCollector;
|
||||
|
||||
public FilteredCollector(Collector collector, Filter filter) {
|
||||
this.collector = collector;
|
||||
|
@ -52,24 +50,24 @@ public class FilteredCollector extends XCollector {
|
|||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
collector.setScorer(scorer);
|
||||
leafCollector.setScorer(scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
if (docSet.get(doc)) {
|
||||
collector.collect(doc);
|
||||
leafCollector.collect(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
collector.setNextReader(context);
|
||||
public void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
leafCollector = collector.getLeafCollector(context);
|
||||
docSet = DocIdSets.toSafeBits(context.reader(), filter.getDocIdSet(context, null));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return collector.acceptsDocsOutOfOrder();
|
||||
return leafCollector.acceptsDocsOutOfOrder();
|
||||
}
|
||||
}
|
|
@ -19,14 +19,15 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.lucene.docset.MatchDocIdSet;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocValuesDocIdSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
||||
public class LimitFilter extends NoCacheFilter {
|
||||
|
||||
private final int limit;
|
||||
|
@ -41,14 +42,14 @@ public class LimitFilter extends NoCacheFilter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
if (counter > limit) {
|
||||
return null;
|
||||
}
|
||||
return new LimitDocIdSet(context.reader().maxDoc(), acceptDocs, limit);
|
||||
}
|
||||
|
||||
public class LimitDocIdSet extends MatchDocIdSet {
|
||||
public class LimitDocIdSet extends DocValuesDocIdSet {
|
||||
|
||||
private final int limit;
|
||||
|
||||
|
@ -64,5 +65,10 @@ public class LimitFilter extends NoCacheFilter {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.NUM_BYTES_INT;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -19,7 +19,8 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BitsFilteredDocIdSet;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -33,8 +34,8 @@ import java.io.IOException;
|
|||
public class MatchAllDocsFilter extends Filter {
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return new AllDocIdSet(context.reader().maxDoc());
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return BitsFilteredDocIdSet.wrap(new AllDocIdSet(context.reader().maxDoc()), acceptDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -32,7 +32,7 @@ import java.io.IOException;
|
|||
public class MatchNoDocsFilter extends Filter {
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -57,12 +57,12 @@ public final class MatchNoDocsQuery extends Query {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(final AtomicReaderContext context,
|
||||
public Explanation explain(final LeafReaderContext context,
|
||||
final int doc) {
|
||||
return new ComplexExplanation(false, 0, "MatchNoDocs matches nothing");
|
||||
}
|
||||
|
|
|
@ -156,8 +156,8 @@ public class MultiPhrasePrefixQuery extends Query {
|
|||
// SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
|
||||
// instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
|
||||
TermsEnum termsEnum = null;
|
||||
List<AtomicReaderContext> leaves = reader.leaves();
|
||||
for (AtomicReaderContext leaf : leaves) {
|
||||
List<LeafReaderContext> leaves = reader.leaves();
|
||||
for (LeafReaderContext leaf : leaves) {
|
||||
Terms _terms = leaf.reader().terms(field);
|
||||
if (_terms == null) {
|
||||
continue;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -39,7 +39,7 @@ public abstract class NoCacheFilter extends Filter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return delegate.getDocIdSet(context, acceptDocs);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,16 +19,16 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.SimpleCollector;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class NoopCollector extends Collector {
|
||||
public class NoopCollector extends SimpleCollector {
|
||||
|
||||
public static final NoopCollector NOOP_COLLECTOR = new NoopCollector();
|
||||
|
||||
|
@ -41,7 +41,7 @@ public class NoopCollector extends Collector {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,7 +19,8 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BitsFilteredDocIdSet;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -45,12 +46,15 @@ public class NotFilter extends Filter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
DocIdSet set = filter.getDocIdSet(context, acceptDocs);
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
DocIdSet set = filter.getDocIdSet(context, null);
|
||||
DocIdSet notSet;
|
||||
if (DocIdSets.isEmpty(set)) {
|
||||
return new AllDocIdSet(context.reader().maxDoc());
|
||||
notSet = new AllDocIdSet(context.reader().maxDoc());
|
||||
} else {
|
||||
notSet = new NotDocIdSet(set, context.reader().maxDoc());
|
||||
}
|
||||
return new NotDocIdSet(set, context.reader().maxDoc());
|
||||
return BitsFilteredDocIdSet.wrap(notSet, acceptDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,7 +19,8 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BitsFilteredDocIdSet;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -46,13 +47,13 @@ public class OrFilter extends Filter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
if (filters.size() == 1) {
|
||||
return filters.get(0).getDocIdSet(context, acceptDocs);
|
||||
}
|
||||
List<DocIdSet> sets = new ArrayList<>(filters.size());
|
||||
for (int i = 0; i < filters.size(); i++) {
|
||||
DocIdSet set = filters.get(i).getDocIdSet(context, acceptDocs);
|
||||
DocIdSet set = filters.get(i).getDocIdSet(context, null);
|
||||
if (DocIdSets.isEmpty(set)) { // none matching for this filter, continue
|
||||
continue;
|
||||
}
|
||||
|
@ -61,10 +62,13 @@ public class OrFilter extends Filter {
|
|||
if (sets.size() == 0) {
|
||||
return null;
|
||||
}
|
||||
DocIdSet set;
|
||||
if (sets.size() == 1) {
|
||||
return sets.get(0);
|
||||
set = sets.get(0);
|
||||
} else {
|
||||
set = new OrDocIdSet(sets.toArray(new DocIdSet[sets.size()]));
|
||||
}
|
||||
return new OrDocIdSet(sets.toArray(new DocIdSet[sets.size()]));
|
||||
return BitsFilteredDocIdSet.wrap(set, acceptDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -42,7 +42,7 @@ public class Queries {
|
|||
// We don't use MatchAllDocsQuery, its slower than the one below ... (much slower)
|
||||
// NEVER cache this XConstantScore Query it's not immutable and based on #3521
|
||||
// some code might set a boost on this query.
|
||||
return new XConstantScoreQuery(MATCH_ALL_FILTER);
|
||||
return new ConstantScoreQuery(MATCH_ALL_FILTER);
|
||||
}
|
||||
|
||||
/** Return a query that matches no document. */
|
||||
|
@ -74,8 +74,8 @@ public class Queries {
|
|||
}
|
||||
|
||||
public static boolean isConstantMatchAllQuery(Query query) {
|
||||
if (query instanceof XConstantScoreQuery) {
|
||||
XConstantScoreQuery scoreQuery = (XConstantScoreQuery) query;
|
||||
if (query instanceof ConstantScoreQuery) {
|
||||
ConstantScoreQuery scoreQuery = (ConstantScoreQuery) query;
|
||||
if (scoreQuery.getFilter() instanceof MatchAllDocsFilter) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
|
@ -30,7 +30,7 @@ import org.apache.lucene.util.automaton.RegExp;
|
|||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A lazy regexp filter which only builds the automaton on the first call to {@link #getDocIdSet(AtomicReaderContext, Bits)}.
|
||||
* A lazy regexp filter which only builds the automaton on the first call to {@link #getDocIdSet(LeafReaderContext, Bits)}.
|
||||
* It is not thread safe (so can't be applied on multiple segments concurrently)
|
||||
*/
|
||||
public class RegexpFilter extends Filter {
|
||||
|
@ -65,7 +65,7 @@ public class RegexpFilter extends Filter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return filter.getDocIdSet(context, acceptDocs);
|
||||
}
|
||||
|
||||
|
|
|
@ -17,21 +17,27 @@ package org.elasticsearch.common.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.queries.FilterClause;
|
||||
import org.apache.lucene.search.BitsFilteredDocIdSet;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.elasticsearch.common.lucene.docset.AllDocIdSet;
|
||||
import org.elasticsearch.common.lucene.docset.AndDocIdSet;
|
||||
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||
import org.elasticsearch.common.lucene.docset.NotDocIdSet;
|
||||
import org.elasticsearch.common.lucene.docset.OrDocIdSet.OrBits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Similar to {@link org.apache.lucene.queries.BooleanFilter}.
|
||||
|
@ -42,6 +48,19 @@ import java.util.*;
|
|||
*/
|
||||
public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
||||
|
||||
private static final Comparator<DocIdSetIterator> COST_DESCENDING = new Comparator<DocIdSetIterator>() {
|
||||
@Override
|
||||
public int compare(DocIdSetIterator o1, DocIdSetIterator o2) {
|
||||
return Long.compare(o2.cost(), o1.cost());
|
||||
}
|
||||
};
|
||||
private static final Comparator<DocIdSetIterator> COST_ASCENDING = new Comparator<DocIdSetIterator>() {
|
||||
@Override
|
||||
public int compare(DocIdSetIterator o1, DocIdSetIterator o2) {
|
||||
return Long.compare(o1.cost(), o2.cost());
|
||||
}
|
||||
};
|
||||
|
||||
final List<FilterClause> clauses = new ArrayList<>();
|
||||
|
||||
/**
|
||||
|
@ -49,9 +68,14 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
|||
* of the filters that have been added.
|
||||
*/
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
FixedBitSet res = null;
|
||||
final AtomicReader reader = context.reader();
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
final int maxDoc = context.reader().maxDoc();
|
||||
|
||||
// the 0-clauses case is ambiguous because an empty OR filter should return nothing
|
||||
// while an empty AND filter should return all docs, so we handle this case explicitely
|
||||
if (clauses.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// optimize single case...
|
||||
if (clauses.size() == 1) {
|
||||
|
@ -59,9 +83,9 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
|||
DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
|
||||
if (clause.getOccur() == Occur.MUST_NOT) {
|
||||
if (DocIdSets.isEmpty(set)) {
|
||||
return new AllDocIdSet(reader.maxDoc());
|
||||
return new AllDocIdSet(maxDoc);
|
||||
} else {
|
||||
return new NotDocIdSet(set, reader.maxDoc());
|
||||
return new NotDocIdSet(set, maxDoc);
|
||||
}
|
||||
}
|
||||
// SHOULD or MUST, just return the set...
|
||||
|
@ -71,241 +95,177 @@ public class XBooleanFilter extends Filter implements Iterable<FilterClause> {
|
|||
return set;
|
||||
}
|
||||
|
||||
// first, go over and see if we can shortcut the execution
|
||||
// and gather Bits if we need to
|
||||
List<ResultClause> results = new ArrayList<>(clauses.size());
|
||||
// We have several clauses, try to organize things to make it easier to process
|
||||
List<DocIdSetIterator> shouldIterators = new ArrayList<>();
|
||||
List<Bits> shouldBits = new ArrayList<>();
|
||||
boolean hasShouldClauses = false;
|
||||
boolean hasNonEmptyShouldClause = false;
|
||||
boolean hasMustClauses = false;
|
||||
boolean hasMustNotClauses = false;
|
||||
for (int i = 0; i < clauses.size(); i++) {
|
||||
FilterClause clause = clauses.get(i);
|
||||
DocIdSet set = clause.getFilter().getDocIdSet(context, acceptDocs);
|
||||
if (clause.getOccur() == Occur.MUST) {
|
||||
hasMustClauses = true;
|
||||
if (DocIdSets.isEmpty(set)) {
|
||||
return null;
|
||||
}
|
||||
} else if (clause.getOccur() == Occur.SHOULD) {
|
||||
hasShouldClauses = true;
|
||||
if (DocIdSets.isEmpty(set)) {
|
||||
continue;
|
||||
}
|
||||
hasNonEmptyShouldClause = true;
|
||||
} else if (clause.getOccur() == Occur.MUST_NOT) {
|
||||
hasMustNotClauses = true;
|
||||
if (DocIdSets.isEmpty(set)) {
|
||||
// we mark empty ones as null for must_not, handle it in the next run...
|
||||
results.add(new ResultClause(null, null, clause));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
List<DocIdSetIterator> requiredIterators = new ArrayList<>();
|
||||
List<DocIdSetIterator> excludedIterators = new ArrayList<>();
|
||||
|
||||
List<Bits> requiredBits = new ArrayList<>();
|
||||
List<Bits> excludedBits = new ArrayList<>();
|
||||
|
||||
for (FilterClause clause : clauses) {
|
||||
DocIdSet set = clause.getFilter().getDocIdSet(context, null);
|
||||
DocIdSetIterator it = null;
|
||||
Bits bits = null;
|
||||
if (!DocIdSets.isFastIterator(set)) {
|
||||
bits = set.bits();
|
||||
}
|
||||
results.add(new ResultClause(set, bits, clause));
|
||||
}
|
||||
|
||||
if (hasShouldClauses && !hasNonEmptyShouldClause) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// now, go over the clauses and apply the "fast" ones first...
|
||||
hasNonEmptyShouldClause = false;
|
||||
boolean hasBits = false;
|
||||
// But first we need to handle the "fast" should clauses, otherwise a should clause can unset docs
|
||||
// that don't match with a must or must_not clause.
|
||||
List<ResultClause> fastOrClauses = new ArrayList<>();
|
||||
for (int i = 0; i < results.size(); i++) {
|
||||
ResultClause clause = results.get(i);
|
||||
// we apply bits in based ones (slow) in the second run
|
||||
if (clause.bits != null) {
|
||||
hasBits = true;
|
||||
continue;
|
||||
}
|
||||
if (clause.clause.getOccur() == Occur.SHOULD) {
|
||||
if (hasMustClauses || hasMustNotClauses) {
|
||||
fastOrClauses.add(clause);
|
||||
} else if (res == null) {
|
||||
DocIdSetIterator it = clause.docIdSet.iterator();
|
||||
if (it != null) {
|
||||
hasNonEmptyShouldClause = true;
|
||||
res = new FixedBitSet(reader.maxDoc());
|
||||
res.or(it);
|
||||
}
|
||||
} else {
|
||||
DocIdSetIterator it = clause.docIdSet.iterator();
|
||||
if (it != null) {
|
||||
hasNonEmptyShouldClause = true;
|
||||
res.or(it);
|
||||
}
|
||||
if (DocIdSets.isEmpty(set) == false) {
|
||||
it = set.iterator();
|
||||
if (it != null) {
|
||||
bits = set.bits();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now we safely handle the "fast" must and must_not clauses.
|
||||
for (int i = 0; i < results.size(); i++) {
|
||||
ResultClause clause = results.get(i);
|
||||
// we apply bits in based ones (slow) in the second run
|
||||
if (clause.bits != null) {
|
||||
hasBits = true;
|
||||
continue;
|
||||
}
|
||||
if (clause.clause.getOccur() == Occur.MUST) {
|
||||
DocIdSetIterator it = clause.docIdSet.iterator();
|
||||
switch (clause.getOccur()) {
|
||||
case SHOULD:
|
||||
hasShouldClauses = true;
|
||||
if (it == null) {
|
||||
// continue, but we recorded that there is at least one should clause
|
||||
// so that if all iterators are null we know that nothing matches this
|
||||
// filter since at least one SHOULD clause needs to match
|
||||
} else if (bits == null || DocIdSets.isFastIterator(set)) {
|
||||
shouldIterators.add(it);
|
||||
} else {
|
||||
shouldBits.add(bits);
|
||||
}
|
||||
break;
|
||||
case MUST:
|
||||
if (it == null) {
|
||||
// no documents matched a clause that is compulsory, then nothing matches at all
|
||||
return null;
|
||||
}
|
||||
if (res == null) {
|
||||
res = new FixedBitSet(reader.maxDoc());
|
||||
res.or(it);
|
||||
} else if (bits == null || DocIdSets.isFastIterator(set)) {
|
||||
requiredIterators.add(it);
|
||||
} else {
|
||||
res.and(it);
|
||||
requiredBits.add(bits);
|
||||
}
|
||||
} else if (clause.clause.getOccur() == Occur.MUST_NOT) {
|
||||
if (res == null) {
|
||||
res = new FixedBitSet(reader.maxDoc());
|
||||
res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
|
||||
}
|
||||
if (clause.docIdSet != null) {
|
||||
DocIdSetIterator it = clause.docIdSet.iterator();
|
||||
if (it != null) {
|
||||
res.andNot(it);
|
||||
}
|
||||
break;
|
||||
case MUST_NOT:
|
||||
if (it == null) {
|
||||
// ignore
|
||||
} else if (bits == null || DocIdSets.isFastIterator(set)) {
|
||||
excludedIterators.add(it);
|
||||
} else {
|
||||
excludedBits.add(bits);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasBits) {
|
||||
if (!fastOrClauses.isEmpty()) {
|
||||
DocIdSetIterator it = res.iterator();
|
||||
at_least_one_should_clause_iter:
|
||||
for (int setDoc = it.nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
|
||||
for (ResultClause fastOrClause : fastOrClauses) {
|
||||
DocIdSetIterator clauseIterator = fastOrClause.iterator();
|
||||
if (clauseIterator == null) {
|
||||
continue;
|
||||
}
|
||||
if (iteratorMatch(clauseIterator, setDoc)) {
|
||||
hasNonEmptyShouldClause = true;
|
||||
continue at_least_one_should_clause_iter;
|
||||
}
|
||||
}
|
||||
res.clear(setDoc);
|
||||
}
|
||||
}
|
||||
// Since BooleanFilter requires that at least one SHOULD clause matches,
|
||||
// transform the SHOULD clauses into a MUST clause
|
||||
|
||||
if (hasShouldClauses && !hasNonEmptyShouldClause) {
|
||||
if (hasShouldClauses) {
|
||||
if (shouldIterators.isEmpty() && shouldBits.isEmpty()) {
|
||||
// we had should clauses, but they all produced empty sets
|
||||
// yet BooleanFilter requires that at least one clause matches
|
||||
// so it means we do not match anything
|
||||
return null;
|
||||
} else if (shouldIterators.size() == 1 && shouldBits.isEmpty()) {
|
||||
requiredIterators.add(shouldIterators.get(0));
|
||||
} else {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
// apply high-cardinality should clauses first
|
||||
CollectionUtil.timSort(shouldIterators, COST_DESCENDING);
|
||||
|
||||
// we have some clauses with bits, apply them...
|
||||
// we let the "res" drive the computation, and check Bits for that
|
||||
List<ResultClause> slowOrClauses = new ArrayList<>();
|
||||
for (int i = 0; i < results.size(); i++) {
|
||||
ResultClause clause = results.get(i);
|
||||
if (clause.bits == null) {
|
||||
continue;
|
||||
}
|
||||
if (clause.clause.getOccur() == Occur.SHOULD) {
|
||||
if (hasMustClauses || hasMustNotClauses) {
|
||||
slowOrClauses.add(clause);
|
||||
} else {
|
||||
if (res == null) {
|
||||
DocIdSetIterator it = clause.docIdSet.iterator();
|
||||
if (it == null) {
|
||||
continue;
|
||||
}
|
||||
hasNonEmptyShouldClause = true;
|
||||
res = new FixedBitSet(reader.maxDoc());
|
||||
res.or(it);
|
||||
BitDocIdSet.Builder shouldBuilder = null;
|
||||
for (DocIdSetIterator it : shouldIterators) {
|
||||
if (shouldBuilder == null) {
|
||||
shouldBuilder = new BitDocIdSet.Builder(maxDoc);
|
||||
}
|
||||
shouldBuilder.or(it);
|
||||
}
|
||||
|
||||
if (shouldBuilder != null && shouldBits.isEmpty() == false) {
|
||||
// we have both iterators and bits, there is no way to compute
|
||||
// the union efficiently, so we just transform the iterators into
|
||||
// bits
|
||||
// add first since these are fast bits
|
||||
shouldBits.add(0, shouldBuilder.build().bits());
|
||||
shouldBuilder = null;
|
||||
}
|
||||
|
||||
if (shouldBuilder == null) {
|
||||
// only bits
|
||||
assert shouldBits.size() >= 1;
|
||||
if (shouldBits.size() == 1) {
|
||||
requiredBits.add(shouldBits.get(0));
|
||||
} else {
|
||||
for (int doc = 0; doc < reader.maxDoc(); doc++) {
|
||||
if (!res.get(doc) && clause.bits.get(doc)) {
|
||||
hasNonEmptyShouldClause = true;
|
||||
res.set(doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (clause.clause.getOccur() == Occur.MUST) {
|
||||
if (res == null) {
|
||||
// nothing we can do, just or it...
|
||||
res = new FixedBitSet(reader.maxDoc());
|
||||
DocIdSetIterator it = clause.docIdSet.iterator();
|
||||
if (it == null) {
|
||||
return null;
|
||||
}
|
||||
res.or(it);
|
||||
} else {
|
||||
Bits bits = clause.bits;
|
||||
// use the "res" to drive the iteration
|
||||
DocIdSetIterator it = res.iterator();
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
if (!bits.get(doc)) {
|
||||
res.clear(doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (clause.clause.getOccur() == Occur.MUST_NOT) {
|
||||
if (res == null) {
|
||||
res = new FixedBitSet(reader.maxDoc());
|
||||
res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
|
||||
DocIdSetIterator it = clause.docIdSet.iterator();
|
||||
if (it != null) {
|
||||
res.andNot(it);
|
||||
requiredBits.add(new OrBits(shouldBits.toArray(new Bits[shouldBits.size()])));
|
||||
}
|
||||
} else {
|
||||
Bits bits = clause.bits;
|
||||
// let res drive the iteration
|
||||
DocIdSetIterator it = res.iterator();
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
if (bits.get(doc)) {
|
||||
res.clear(doc);
|
||||
}
|
||||
}
|
||||
assert shouldBits.isEmpty();
|
||||
// only iterators, we can add the merged iterator to the list of required iterators
|
||||
requiredIterators.add(shouldBuilder.build().iterator());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// From a boolean_logic behavior point of view a should clause doesn't have impact on a bool filter if there
|
||||
// is already a must or must_not clause. However in the current ES bool filter behaviour at least one should
|
||||
// clause must match in order for a doc to be a match. What we do here is checking if matched docs match with
|
||||
// any should filter. TODO: Add an option to have disable minimum_should_match=1 behaviour
|
||||
if (!slowOrClauses.isEmpty() || !fastOrClauses.isEmpty()) {
|
||||
DocIdSetIterator it = res.iterator();
|
||||
at_least_one_should_clause_iter:
|
||||
for (int setDoc = it.nextDoc(); setDoc != DocIdSetIterator.NO_MORE_DOCS; setDoc = it.nextDoc()) {
|
||||
for (ResultClause fastOrClause : fastOrClauses) {
|
||||
DocIdSetIterator clauseIterator = fastOrClause.iterator();
|
||||
if (clauseIterator == null) {
|
||||
continue;
|
||||
}
|
||||
if (iteratorMatch(clauseIterator, setDoc)) {
|
||||
hasNonEmptyShouldClause = true;
|
||||
continue at_least_one_should_clause_iter;
|
||||
}
|
||||
}
|
||||
for (ResultClause slowOrClause : slowOrClauses) {
|
||||
if (slowOrClause.bits.get(setDoc)) {
|
||||
hasNonEmptyShouldClause = true;
|
||||
continue at_least_one_should_clause_iter;
|
||||
}
|
||||
}
|
||||
res.clear(setDoc);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasShouldClauses && !hasNonEmptyShouldClause) {
|
||||
return null;
|
||||
} else {
|
||||
return res;
|
||||
assert shouldIterators.isEmpty();
|
||||
assert shouldBits.isEmpty();
|
||||
}
|
||||
|
||||
// From now on, we don't have to care about SHOULD clauses anymore since we upgraded
|
||||
// them to required clauses (if necessary)
|
||||
|
||||
// cheap iterators first to make intersection faster
|
||||
CollectionUtil.timSort(requiredIterators, COST_ASCENDING);
|
||||
CollectionUtil.timSort(excludedIterators, COST_ASCENDING);
|
||||
|
||||
// Intersect iterators
|
||||
BitDocIdSet.Builder res = null;
|
||||
for (DocIdSetIterator iterator : requiredIterators) {
|
||||
if (res == null) {
|
||||
res = new BitDocIdSet.Builder(maxDoc);
|
||||
res.or(iterator);
|
||||
} else {
|
||||
res.and(iterator);
|
||||
}
|
||||
}
|
||||
for (DocIdSetIterator iterator : excludedIterators) {
|
||||
if (res == null) {
|
||||
res = new BitDocIdSet.Builder(maxDoc, true);
|
||||
}
|
||||
res.andNot(iterator);
|
||||
}
|
||||
|
||||
// Transform the excluded bits into required bits
|
||||
if (excludedBits.isEmpty() == false) {
|
||||
Bits excluded;
|
||||
if (excludedBits.size() == 1) {
|
||||
excluded = excludedBits.get(0);
|
||||
} else {
|
||||
excluded = new OrBits(excludedBits.toArray(new Bits[excludedBits.size()]));
|
||||
}
|
||||
requiredBits.add(new NotDocIdSet.NotBits(excluded));
|
||||
}
|
||||
|
||||
// The only thing left to do is to intersect 'res' with 'requiredBits'
|
||||
|
||||
// the main doc id set that will drive iteration
|
||||
DocIdSet main;
|
||||
if (res == null) {
|
||||
main = new AllDocIdSet(maxDoc);
|
||||
} else {
|
||||
main = res.build();
|
||||
}
|
||||
|
||||
// apply accepted docs and compute the bits to filter with
|
||||
// accepted docs are added first since they are fast and will help not computing anything on deleted docs
|
||||
if (acceptDocs != null) {
|
||||
requiredBits.add(0, acceptDocs);
|
||||
}
|
||||
// the random-access filter that we will apply to 'main'
|
||||
Bits filter;
|
||||
if (requiredBits.isEmpty()) {
|
||||
filter = null;
|
||||
} else if (requiredBits.size() == 1) {
|
||||
filter = requiredBits.get(0);
|
||||
} else {
|
||||
filter = new AndDocIdSet.AndBits(requiredBits.toArray(new Bits[requiredBits.size()]));
|
||||
}
|
||||
|
||||
return BitsFilteredDocIdSet.wrap(main, filter);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -26,9 +26,7 @@ import java.io.IOException;
|
|||
* An extension to {@link Collector} that allows for a callback when
|
||||
* collection is done.
|
||||
*/
|
||||
public abstract class XCollector extends Collector {
|
||||
public interface XCollector extends Collector {
|
||||
|
||||
public void postCollection() throws IOException {
|
||||
|
||||
}
|
||||
public void postCollection() throws IOException;
|
||||
}
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Filter;
|
||||
|
||||
/**
|
||||
* We still need sometimes to exclude deletes, because we don't remove them always with acceptDocs on filters
|
||||
*/
|
||||
public class XConstantScoreQuery extends ConstantScoreQuery {
|
||||
|
||||
private final Filter actualFilter;
|
||||
|
||||
public XConstantScoreQuery(Filter filter) {
|
||||
super(new ApplyAcceptedDocsFilter(filter));
|
||||
this.actualFilter = filter;
|
||||
}
|
||||
|
||||
// trick so any external systems still think that its the actual filter we use, and not the
|
||||
// deleted filter
|
||||
@Override
|
||||
public Filter getFilter() {
|
||||
return this.actualFilter;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,261 +0,0 @@
|
|||
package org.elasticsearch.common.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.FilteredQuery.FilterStrategy;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.elasticsearch.common.lucene.docset.DocIdSets;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/**
|
||||
* A query that applies a filter to the results of another query.
|
||||
* <p/>
|
||||
* <p>Note: the bits are retrieved from the filter each time this
|
||||
* query is used in a search - use a CachingWrapperFilter to avoid
|
||||
* regenerating the bits every time.
|
||||
*
|
||||
* @see CachingWrapperFilter
|
||||
* @since 1.4
|
||||
*/
|
||||
// Changes are marked with //CHANGE:
|
||||
// Delegate to FilteredQuery - this version fixes the bug in LUCENE-4705 and uses ApplyAcceptedDocsFilter internally
|
||||
public final class XFilteredQuery extends Query {
|
||||
private final Filter rawFilter;
|
||||
private final FilteredQuery delegate;
|
||||
private final FilterStrategy strategy;
|
||||
|
||||
/**
|
||||
* Constructs a new query which applies a filter to the results of the original query.
|
||||
* {@link Filter#getDocIdSet} will be called every time this query is used in a search.
|
||||
*
|
||||
* @param query Query to be filtered, cannot be <code>null</code>.
|
||||
* @param filter Filter to apply to query results, cannot be <code>null</code>.
|
||||
*/
|
||||
public XFilteredQuery(Query query, Filter filter) {
|
||||
this(query, filter, FilteredQuery.RANDOM_ACCESS_FILTER_STRATEGY);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Constructs a new query which applies a filter to the results of the original query.
|
||||
* {@link Filter#getDocIdSet} will be called every time this query is used in a search.
|
||||
*
|
||||
* @param query Query to be filtered, cannot be <code>null</code>.
|
||||
* @param filter Filter to apply to query results, cannot be <code>null</code>.
|
||||
* @param strategy a filter strategy used to create a filtered scorer.
|
||||
* @see FilterStrategy
|
||||
*/
|
||||
public XFilteredQuery(Query query, Filter filter, FilterStrategy strategy) {
|
||||
this(new FilteredQuery(query, new ApplyAcceptedDocsFilter(filter), strategy), filter, strategy);
|
||||
}
|
||||
|
||||
private XFilteredQuery(FilteredQuery delegate, Filter filter, FilterStrategy strategy) {
|
||||
this.delegate = delegate;
|
||||
// CHANGE: we need to wrap it in post application of accepted docs
|
||||
this.rawFilter = filter;
|
||||
this.strategy = strategy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Weight that applies the filter to the enclosed query's Weight.
|
||||
* This is accomplished by overriding the Scorer returned by the Weight.
|
||||
*/
|
||||
@Override
|
||||
public Weight createWeight(final IndexSearcher searcher) throws IOException {
|
||||
return delegate.createWeight(searcher);
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrites the query. If the wrapped is an instance of
|
||||
* {@link MatchAllDocsQuery} it returns a {@link ConstantScoreQuery}. Otherwise
|
||||
* it returns a new {@code FilteredQuery} wrapping the rewritten query.
|
||||
*/
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
Query query = delegate.getQuery();
|
||||
final Query queryRewritten = query.rewrite(reader);
|
||||
|
||||
// CHANGE: if we push back to Lucene, would love to have an extension for "isMatchAllQuery"
|
||||
if (queryRewritten instanceof MatchAllDocsQuery || Queries.isConstantMatchAllQuery(queryRewritten)) {
|
||||
// Special case: If the query is a MatchAllDocsQuery, we only
|
||||
// return a CSQ(filter).
|
||||
final Query rewritten = new ConstantScoreQuery(delegate.getFilter());
|
||||
// Combine boost of MatchAllDocsQuery and the wrapped rewritten query:
|
||||
rewritten.setBoost(delegate.getBoost() * queryRewritten.getBoost());
|
||||
return rewritten;
|
||||
}
|
||||
|
||||
if (queryRewritten != query) {
|
||||
// rewrite to a new FilteredQuery wrapping the rewritten query
|
||||
final Query rewritten = new XFilteredQuery(queryRewritten, rawFilter, strategy);
|
||||
rewritten.setBoost(delegate.getBoost());
|
||||
return rewritten;
|
||||
} else {
|
||||
// nothing to rewrite, we are done!
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBoost(float b) {
|
||||
delegate.setBoost(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getBoost() {
|
||||
return delegate.getBoost();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this FilteredQuery's (unfiltered) Query
|
||||
*/
|
||||
public final Query getQuery() {
|
||||
return delegate.getQuery();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this FilteredQuery's filter
|
||||
*/
|
||||
public final Filter getFilter() {
|
||||
// CHANGE: unwrap the accepted docs filter
|
||||
if (rawFilter instanceof ApplyAcceptedDocsFilter) {
|
||||
return ((ApplyAcceptedDocsFilter) rawFilter).filter();
|
||||
}
|
||||
return rawFilter;
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
delegate.extractTerms(terms);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints a user-readable version of this query.
|
||||
*/
|
||||
@Override
|
||||
public String toString(String s) {
|
||||
return delegate.toString(s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true iff <code>o</code> is equal to this.
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof XFilteredQuery)) {
|
||||
return false;
|
||||
} else {
|
||||
return delegate.equals(((XFilteredQuery)o).delegate);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a hash code value for this object.
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return delegate.hashCode();
|
||||
}
|
||||
|
||||
// CHANGE: Add custom random access strategy, allowing to set the threshold
|
||||
// CHANGE: Add filter first filter strategy
|
||||
public static final FilterStrategy ALWAYS_RANDOM_ACCESS_FILTER_STRATEGY = new CustomRandomAccessFilterStrategy(0);
|
||||
|
||||
public static final CustomRandomAccessFilterStrategy CUSTOM_FILTER_STRATEGY = new CustomRandomAccessFilterStrategy();
|
||||
|
||||
/**
|
||||
* Extends {@link org.apache.lucene.search.FilteredQuery.RandomAccessFilterStrategy}.
|
||||
* <p/>
|
||||
* Adds a threshold value, which defaults to -1. When set to -1, it will check if the filter docSet is
|
||||
* *not* a fast docSet, and if not, it will use {@link FilteredQuery#QUERY_FIRST_FILTER_STRATEGY} (since
|
||||
* the assumption is that its a "slow" filter and better computed only on whatever matched the query).
|
||||
* <p/>
|
||||
* If the threshold value is 0, it always tries to pass "down" the filter as acceptDocs, and it the filter
|
||||
* can't be represented as Bits (never really), then it uses {@link FilteredQuery#LEAP_FROG_QUERY_FIRST_STRATEGY}.
|
||||
* <p/>
|
||||
* If the above conditions are not met, then it reverts to the {@link FilteredQuery.RandomAccessFilterStrategy} logic,
|
||||
* with the threshold used to control {@link #useRandomAccess(org.apache.lucene.util.Bits, int)}.
|
||||
*/
|
||||
public static class CustomRandomAccessFilterStrategy extends FilteredQuery.RandomAccessFilterStrategy {
|
||||
|
||||
private final int threshold;
|
||||
|
||||
public CustomRandomAccessFilterStrategy() {
|
||||
this.threshold = -1;
|
||||
}
|
||||
|
||||
public CustomRandomAccessFilterStrategy(int threshold) {
|
||||
this.threshold = threshold;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet) throws IOException {
|
||||
// CHANGE: If threshold is 0, always pass down the accept docs, don't pay the price of calling nextDoc even...
|
||||
if (threshold == 0) {
|
||||
final Bits filterAcceptDocs = docIdSet.bits();
|
||||
if (filterAcceptDocs != null) {
|
||||
return weight.scorer(context, filterAcceptDocs);
|
||||
} else {
|
||||
return FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet);
|
||||
}
|
||||
}
|
||||
|
||||
// CHANGE: handle "default" value
|
||||
if (threshold == -1) {
|
||||
// default value, don't iterate on only apply filter after query if its not a "fast" docIdSet
|
||||
if (!DocIdSets.isFastIterator(ApplyAcceptedDocsFilter.unwrap(docIdSet))) {
|
||||
return FilteredQuery.QUERY_FIRST_FILTER_STRATEGY.filteredScorer(context, weight, docIdSet);
|
||||
}
|
||||
}
|
||||
|
||||
return super.filteredScorer(context, weight, docIdSet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: decides if a filter should be executed as "random-access" or not.
|
||||
* random-access means the filter "filters" in a similar way as deleted docs are filtered
|
||||
* in Lucene. This is faster when the filter accepts many documents.
|
||||
* However, when the filter is very sparse, it can be faster to execute the query+filter
|
||||
* as a conjunction in some cases.
|
||||
* <p/>
|
||||
* The default implementation returns <code>true</code> if the first document accepted by the
|
||||
* filter is < threshold, if threshold is -1 (the default), then it checks for < 100.
|
||||
*/
|
||||
protected boolean useRandomAccess(Bits bits, int firstFilterDoc) {
|
||||
// "default"
|
||||
if (threshold == -1) {
|
||||
return firstFilterDoc < 100;
|
||||
}
|
||||
//TODO once we have a cost API on filters and scorers we should rethink this heuristic
|
||||
return firstFilterDoc < threshold;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query clone() {
|
||||
return new XFilteredQuery((FilteredQuery) delegate.clone(), rawFilter, strategy);
|
||||
}
|
||||
|
||||
}
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
|
||||
|
@ -43,7 +43,7 @@ public class BoostScoreFunction extends ScoreFunction {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) {
|
||||
public void setNextReader(LeafReaderContext context) {
|
||||
// nothing to do here...
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
|
@ -48,7 +48,7 @@ public class FieldValueFactorFunction extends ScoreFunction {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) {
|
||||
public void setNextReader(LeafReaderContext context) {
|
||||
this.values = this.indexFieldData.load(context).getDoubleValues();
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
|
@ -150,7 +150,7 @@ public class FiltersFunctionScoreQuery extends Query {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
// we ignore scoreDocsInOrder parameter, because we need to score in
|
||||
// order if documents are scored with a script. The
|
||||
// ShardLookup depends on in order scoring.
|
||||
|
@ -167,7 +167,7 @@ public class FiltersFunctionScoreQuery extends Query {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
|
||||
Explanation subQueryExpl = subQueryWeight.explain(context, doc);
|
||||
if (!subQueryExpl.isMatch()) {
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
|
@ -112,7 +112,7 @@ public class FunctionScoreQuery extends Query {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
// we ignore scoreDocsInOrder parameter, because we need to score in
|
||||
// order if documents are scored with a script. The
|
||||
// ShardLookup depends on in order scoring.
|
||||
|
@ -125,7 +125,7 @@ public class FunctionScoreQuery extends Query {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
Explanation subQueryExpl = subQueryWeight.explain(context, doc);
|
||||
if (!subQueryExpl.isMatch()) {
|
||||
return subQueryExpl;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.common.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||
|
@ -59,7 +59,7 @@ public class RandomScoreFunction extends ScoreFunction {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) {
|
||||
public void setNextReader(LeafReaderContext context) {
|
||||
AtomicFieldData leafData = uidFieldData.load(context);
|
||||
uidByteData = leafData.getBytesValues();
|
||||
if (uidByteData == null) throw new NullPointerException("failed to get uid byte data");
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
|
||||
/**
|
||||
|
@ -29,7 +29,7 @@ public abstract class ScoreFunction {
|
|||
|
||||
private final CombineFunction scoreCombiner;
|
||||
|
||||
public abstract void setNextReader(AtomicReaderContext context);
|
||||
public abstract void setNextReader(LeafReaderContext context);
|
||||
|
||||
public abstract double score(int docId, float subQueryScore);
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.elasticsearch.script.SearchScript;
|
||||
|
@ -87,7 +87,7 @@ public class ScriptScoreFunction extends ScoreFunction {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext ctx) {
|
||||
public void setNextReader(LeafReaderContext ctx) {
|
||||
script.setNextReader(ctx);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.ComplexExplanation;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
|
@ -53,7 +53,7 @@ public class WeightFactorFunction extends ScoreFunction {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) {
|
||||
public void setNextReader(LeafReaderContext context) {
|
||||
scoreFunction.setNextReader(context);
|
||||
}
|
||||
|
||||
|
@ -87,7 +87,7 @@ public class WeightFactorFunction extends ScoreFunction {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) {
|
||||
public void setNextReader(LeafReaderContext context) {
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -49,11 +49,6 @@ public class OutputStreamIndexOutput extends OutputStream {
|
|||
out.writeBytes(b, off, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
out.flush();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
out.close();
|
||||
|
|
|
@ -21,21 +21,18 @@ package org.elasticsearch.common.lucene.uid;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.elasticsearch.common.Numbers;
|
||||
import org.elasticsearch.common.lucene.uid.Versions.DocIdAndVersion;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
|
@ -51,7 +48,7 @@ import org.elasticsearch.index.mapper.internal.VersionFieldMapper;
|
|||
|
||||
final class PerThreadIDAndVersionLookup {
|
||||
|
||||
private final AtomicReaderContext[] readerContexts;
|
||||
private final LeafReaderContext[] readerContexts;
|
||||
private final TermsEnum[] termsEnums;
|
||||
private final DocsEnum[] docsEnums;
|
||||
// Only used for back compat, to lookup a version from payload:
|
||||
|
@ -64,9 +61,9 @@ final class PerThreadIDAndVersionLookup {
|
|||
|
||||
public PerThreadIDAndVersionLookup(IndexReader r) throws IOException {
|
||||
|
||||
List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves());
|
||||
List<LeafReaderContext> leaves = new ArrayList<>(r.leaves());
|
||||
|
||||
readerContexts = leaves.toArray(new AtomicReaderContext[leaves.size()]);
|
||||
readerContexts = leaves.toArray(new LeafReaderContext[leaves.size()]);
|
||||
termsEnums = new TermsEnum[leaves.size()];
|
||||
docsEnums = new DocsEnum[leaves.size()];
|
||||
posEnums = new DocsAndPositionsEnum[leaves.size()];
|
||||
|
@ -78,7 +75,7 @@ final class PerThreadIDAndVersionLookup {
|
|||
// iterate backwards to optimize for the frequently updated documents
|
||||
// which are likely to be in the last segments
|
||||
for(int i=leaves.size()-1;i>=0;i--) {
|
||||
AtomicReaderContext readerContext = leaves.get(i);
|
||||
LeafReaderContext readerContext = leaves.get(i);
|
||||
Fields fields = readerContext.reader().fields();
|
||||
if (fields != null) {
|
||||
Terms terms = fields.terms(UidFieldMapper.NAME);
|
||||
|
|
|
@ -126,13 +126,13 @@ public class Versions {
|
|||
private Versions() {
|
||||
}
|
||||
|
||||
/** Wraps an {@link AtomicReaderContext}, a doc ID <b>relative to the context doc base</b> and a version. */
|
||||
/** Wraps an {@link LeafReaderContext}, a doc ID <b>relative to the context doc base</b> and a version. */
|
||||
public static class DocIdAndVersion {
|
||||
public final int docId;
|
||||
public final long version;
|
||||
public final AtomicReaderContext context;
|
||||
public final LeafReaderContext context;
|
||||
|
||||
public DocIdAndVersion(int docId, long version, AtomicReaderContext context) {
|
||||
public DocIdAndVersion(int docId, long version, LeafReaderContext context) {
|
||||
this.docId = docId;
|
||||
this.version = version;
|
||||
this.context = context;
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
|
||||
package org.elasticsearch.common.util;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.util.Accountable;
|
||||
|
||||
|
||||
abstract class AbstractArray implements BigArray {
|
||||
|
||||
|
@ -41,4 +45,8 @@ abstract class AbstractArray implements BigArray {
|
|||
|
||||
protected abstract void doClose();
|
||||
|
||||
@Override
|
||||
public Iterable<? extends Accountable> getChildResources() {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import com.google.common.math.LongMath;
|
|||
import com.google.common.primitives.Ints;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
@ -171,6 +172,12 @@ public class BloomFilter {
|
|||
}
|
||||
}
|
||||
|
||||
public static void skipBloom(IndexInput in) throws IOException {
|
||||
int version = in.readInt(); // we do nothing with this now..., defaults to 0
|
||||
final int numLongs = in.readInt();
|
||||
in.seek(in.getFilePointer() + (numLongs * 8) + 4 + 4); // filter + numberOfHashFunctions + hashType
|
||||
}
|
||||
|
||||
public static BloomFilter deserialize(DataInput in) throws IOException {
|
||||
int version = in.readInt(); // we do nothing with this now..., defaults to 0
|
||||
int numLongs = in.readInt();
|
||||
|
|
|
@ -83,7 +83,7 @@ public class NodeEnvironment extends AbstractComponent {
|
|||
}
|
||||
logger.trace("obtaining node lock on {} ...", dir.getAbsolutePath());
|
||||
try {
|
||||
NativeFSLockFactory lockFactory = new NativeFSLockFactory(dir);
|
||||
NativeFSLockFactory lockFactory = new NativeFSLockFactory(dir.toPath());
|
||||
Lock tmpLock = lockFactory.makeLock("node.lock");
|
||||
boolean obtained = tmpLock.obtain();
|
||||
if (obtained) {
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
*/
|
||||
package org.elasticsearch.gateway.local.state.meta;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.elasticsearch.ElasticsearchCorruptionException;
|
||||
|
||||
/**
|
||||
|
@ -37,12 +36,12 @@ public class CorruptStateException extends ElasticsearchCorruptionException {
|
|||
|
||||
/**
|
||||
* Creates a new {@link CorruptStateException} with the given exceptions stacktrace.
|
||||
* This constructor copies the stacktrace as well as the message from the given {@link CorruptIndexException}
|
||||
* This constructor copies the stacktrace as well as the message from the given {@link Throwable}
|
||||
* into this exception.
|
||||
*
|
||||
* @param ex the exception cause
|
||||
*/
|
||||
public CorruptStateException(CorruptIndexException ex) {
|
||||
public CorruptStateException(Throwable ex) {
|
||||
super(ex);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,11 +22,9 @@ import com.google.common.base.Predicate;
|
|||
import com.google.common.collect.Collections2;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.OutputStreamIndexOutput;
|
||||
import org.apache.lucene.store.SimpleFSDirectory;
|
||||
import org.apache.lucene.index.IndexFormatTooNewException;
|
||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
|
@ -125,9 +123,9 @@ public abstract class MetaDataStateFormat<T> {
|
|||
}
|
||||
CodecUtil.writeFooter(out);
|
||||
}
|
||||
IOUtils.fsync(tmpStatePath.toFile(), false); // fsync the state file
|
||||
IOUtils.fsync(tmpStatePath, false); // fsync the state file
|
||||
Files.move(tmpStatePath, finalStatePath, StandardCopyOption.ATOMIC_MOVE);
|
||||
IOUtils.fsync(stateLocation.toFile(), true);
|
||||
IOUtils.fsync(stateLocation, true);
|
||||
for (int i = 1; i < locations.length; i++) {
|
||||
stateLocation = Paths.get(locations[i].getPath(), STATE_DIR_NAME);
|
||||
Files.createDirectories(stateLocation);
|
||||
|
@ -136,7 +134,7 @@ public abstract class MetaDataStateFormat<T> {
|
|||
try {
|
||||
Files.copy(finalStatePath, tmpPath);
|
||||
Files.move(tmpPath, finalPath, StandardCopyOption.ATOMIC_MOVE); // we are on the same FileSystem / Partition here we can do an atomic move
|
||||
IOUtils.fsync(stateLocation.toFile(), true); // we just fsync the dir here..
|
||||
IOUtils.fsync(stateLocation, true); // we just fsync the dir here..
|
||||
} finally {
|
||||
Files.deleteIfExists(tmpPath);
|
||||
}
|
||||
|
@ -187,7 +185,7 @@ public abstract class MetaDataStateFormat<T> {
|
|||
return fromXContent(parser);
|
||||
}
|
||||
}
|
||||
} catch(CorruptIndexException ex) {
|
||||
} catch(CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
|
||||
// we trick this into a dedicated exception with the original stacktrace
|
||||
throw new CorruptStateException(ex);
|
||||
}
|
||||
|
@ -195,7 +193,7 @@ public abstract class MetaDataStateFormat<T> {
|
|||
}
|
||||
|
||||
protected Directory newDirectory(File dir) throws IOException {
|
||||
return new SimpleFSDirectory(dir);
|
||||
return new SimpleFSDirectory(dir.toPath());
|
||||
}
|
||||
|
||||
private void cleanupOldFiles(String prefix, String fileName, File[] locations) throws IOException {
|
||||
|
|
|
@ -100,20 +100,20 @@ public class Analysis {
|
|||
return value != null && "_none_".equals(value);
|
||||
}
|
||||
|
||||
public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion, Version version) {
|
||||
public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion) {
|
||||
String value = settings.get("stem_exclusion");
|
||||
if (value != null) {
|
||||
if ("_none_".equals(value)) {
|
||||
return CharArraySet.EMPTY_SET;
|
||||
} else {
|
||||
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
|
||||
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), false);
|
||||
return new CharArraySet(Strings.commaDelimitedListToSet(value), false);
|
||||
}
|
||||
}
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion", null);
|
||||
if (stemExclusion != null) {
|
||||
// LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
|
||||
return new CharArraySet(version, Arrays.asList(stemExclusion), false);
|
||||
return new CharArraySet(Arrays.asList(stemExclusion), false);
|
||||
} else {
|
||||
return defaultStemExclusion;
|
||||
}
|
||||
|
@ -153,43 +153,43 @@ public class Analysis {
|
|||
.put("_turkish_", TurkishAnalyzer.getDefaultStopSet())
|
||||
.immutableMap();
|
||||
|
||||
public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords, ImmutableMap<String, Set<?>> namedWords, Version version, boolean ignoreCase) {
|
||||
public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords, ImmutableMap<String, Set<?>> namedWords, boolean ignoreCase) {
|
||||
String value = settings.get(name);
|
||||
if (value != null) {
|
||||
if ("_none_".equals(value)) {
|
||||
return CharArraySet.EMPTY_SET;
|
||||
} else {
|
||||
return resolveNamedWords(Strings.commaDelimitedListToSet(value), namedWords, version, ignoreCase);
|
||||
return resolveNamedWords(Strings.commaDelimitedListToSet(value), namedWords, ignoreCase);
|
||||
}
|
||||
}
|
||||
List<String> pathLoadedWords = getWordList(env, settings, name);
|
||||
if (pathLoadedWords != null) {
|
||||
return resolveNamedWords(pathLoadedWords, namedWords, version, ignoreCase);
|
||||
return resolveNamedWords(pathLoadedWords, namedWords, ignoreCase);
|
||||
}
|
||||
return defaultWords;
|
||||
}
|
||||
|
||||
public static CharArraySet parseCommonWords(Environment env, Settings settings, CharArraySet defaultCommonWords, Version version, boolean ignoreCase) {
|
||||
return parseWords(env, settings, "common_words", defaultCommonWords, namedStopWords, version, ignoreCase);
|
||||
public static CharArraySet parseCommonWords(Environment env, Settings settings, CharArraySet defaultCommonWords, boolean ignoreCase) {
|
||||
return parseWords(env, settings, "common_words", defaultCommonWords, namedStopWords, ignoreCase);
|
||||
}
|
||||
|
||||
public static CharArraySet parseArticles(Environment env, Settings settings, Version version) {
|
||||
return parseWords(env, settings, "articles", null, null, version, settings.getAsBoolean("articles_case", false));
|
||||
public static CharArraySet parseArticles(Environment env, Settings settings) {
|
||||
return parseWords(env, settings, "articles", null, null, settings.getAsBoolean("articles_case", false));
|
||||
}
|
||||
|
||||
public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version) {
|
||||
return parseStopWords(env, settings, defaultStopWords, version, settings.getAsBoolean("stopwords_case", false));
|
||||
public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords) {
|
||||
return parseStopWords(env, settings, defaultStopWords, settings.getAsBoolean("stopwords_case", false));
|
||||
}
|
||||
|
||||
public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version, boolean ignoreCase) {
|
||||
return parseWords(env, settings, "stopwords", defaultStopWords, namedStopWords, version, ignoreCase);
|
||||
public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, boolean ignoreCase) {
|
||||
return parseWords(env, settings, "stopwords", defaultStopWords, namedStopWords, ignoreCase);
|
||||
}
|
||||
|
||||
private static CharArraySet resolveNamedWords(Collection<String> words, ImmutableMap<String, Set<?>> namedWords, Version version, boolean ignoreCase) {
|
||||
private static CharArraySet resolveNamedWords(Collection<String> words, ImmutableMap<String, Set<?>> namedWords, boolean ignoreCase) {
|
||||
if (namedWords == null) {
|
||||
return new CharArraySet(version, words, ignoreCase);
|
||||
return new CharArraySet(words, ignoreCase);
|
||||
}
|
||||
CharArraySet setWords = new CharArraySet(version, words.size(), ignoreCase);
|
||||
CharArraySet setWords = new CharArraySet(words.size(), ignoreCase);
|
||||
for (String word : words) {
|
||||
if (namedWords.containsKey(word)) {
|
||||
setWords.addAll(namedWords.get(word));
|
||||
|
@ -200,12 +200,12 @@ public class Analysis {
|
|||
return setWords;
|
||||
}
|
||||
|
||||
public static CharArraySet getWordSet(Environment env, Settings settings, String settingsPrefix, Version version) {
|
||||
public static CharArraySet getWordSet(Environment env, Settings settings, String settingsPrefix) {
|
||||
List<String> wordList = getWordList(env, settings, settingsPrefix);
|
||||
if (wordList == null) {
|
||||
return null;
|
||||
}
|
||||
return new CharArraySet(version, wordList, settings.getAsBoolean(settingsPrefix + "_case", false));
|
||||
return new CharArraySet(wordList, settings.getAsBoolean(settingsPrefix + "_case", false));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -38,9 +38,9 @@ public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arabic
|
|||
@Inject
|
||||
public ArabicAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
arabicAnalyzer = new ArabicAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
arabicAnalyzer = new ArabicAnalyzer(Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
arabicAnalyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arme
|
|||
@Inject
|
||||
public ArmenianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new ArmenianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new ArmenianAnalyzer(Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider<Basque
|
|||
@Inject
|
||||
public BasqueAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new BasqueAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new BasqueAnalyzer(Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bra
|
|||
@Inject
|
||||
public BrazilianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new BrazilianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new BrazilianAnalyzer(Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,7 +38,7 @@ public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory
|
|||
@Inject
|
||||
public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bul
|
|||
@Inject
|
||||
public BulgarianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new BulgarianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new BulgarianAnalyzer(Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider<Catal
|
|||
@Inject
|
||||
public CatalanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new CatalanAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new CatalanAnalyzer(Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -27,20 +29,23 @@ import org.elasticsearch.index.Index;
|
|||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
*
|
||||
* Only for old indexes
|
||||
*/
|
||||
public class ChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider<ChineseAnalyzer> {
|
||||
public class ChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardAnalyzer> {
|
||||
|
||||
private final ChineseAnalyzer analyzer;
|
||||
private final StandardAnalyzer analyzer;
|
||||
|
||||
@Inject
|
||||
public ChineseAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new ChineseAnalyzer();
|
||||
// old index: best effort
|
||||
analyzer = new StandardAnalyzer();
|
||||
analyzer.setVersion(version);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public ChineseAnalyzer get() {
|
||||
public StandardAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -38,9 +38,10 @@ public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider<CJKAnalyz
|
|||
@Inject
|
||||
public CjkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version);
|
||||
CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet());
|
||||
|
||||
analyzer = new CJKAnalyzer(version, stopWords);
|
||||
analyzer = new CJKAnalyzer(stopWords);
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -44,8 +44,8 @@ public class ClassicTokenizerFactory extends AbstractTokenizerFactory {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
ClassicTokenizer tokenizer = new ClassicTokenizer(version, reader);
|
||||
public Tokenizer create() {
|
||||
ClassicTokenizer tokenizer = new ClassicTokenizer();
|
||||
tokenizer.setMaxTokenLength(maxTokenLength);
|
||||
return tokenizer;
|
||||
}
|
||||
|
|
|
@ -48,7 +48,7 @@ public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
super(index, indexSettings, name, settings);
|
||||
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
|
||||
this.queryMode = settings.getAsBoolean("query_mode", false);
|
||||
this.words = Analysis.parseCommonWords(env, settings, null, version, ignoreCase);
|
||||
this.words = Analysis.parseCommonWords(env, settings, null, ignoreCase);
|
||||
|
||||
if (this.words == null) {
|
||||
throw new ElasticsearchIllegalArgumentException("mising or empty [common_words] or [common_words_path] configuration for common_grams token filter");
|
||||
|
@ -57,7 +57,7 @@ public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
CommonGramsFilter filter = new CommonGramsFilter(version, tokenStream, words);
|
||||
CommonGramsFilter filter = new CommonGramsFilter(tokenStream, words);
|
||||
if (queryMode) {
|
||||
return new CommonGramsQueryFilter(filter);
|
||||
} else {
|
||||
|
|
|
@ -79,8 +79,8 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = tokenizerFactory.create(reader);
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = tokenizerFactory.create();
|
||||
TokenStream tokenStream = tokenizer;
|
||||
for (TokenFilterFactory tokenFilter : tokenFilters) {
|
||||
tokenStream = tokenFilter.create(tokenStream);
|
||||
|
|
|
@ -38,9 +38,9 @@ public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAn
|
|||
@Inject
|
||||
public CzechAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new CzechAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new CzechAnalyzer(Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Danish
|
|||
@Inject
|
||||
public DanishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new DanishAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new DanishAnalyzer(Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAn
|
|||
@Inject
|
||||
public DutchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new DutchAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new DutchAnalyzer(Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,13 +21,14 @@ package org.elasticsearch.index.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.nl.DutchStemFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.tartarus.snowball.ext.DutchStemmer;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -39,11 +40,12 @@ public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
@Inject
|
||||
public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new DutchStemFilter(new SetKeywordMarkerFilter(tokenStream, exclusions));
|
||||
tokenStream = new SetKeywordMarkerFilter(tokenStream, exclusions);
|
||||
return new SnowballFilter(tokenStream, new DutchStemmer());
|
||||
}
|
||||
}
|
|
@ -21,8 +21,7 @@ package org.elasticsearch.index.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
|
||||
import org.apache.lucene.analysis.ngram.Lucene43EdgeNGramTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.Lucene43EdgeNGramTokenFilter;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -43,7 +42,9 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
private final int maxGram;
|
||||
|
||||
private final EdgeNGramTokenFilter.Side side;
|
||||
public static final int SIDE_FRONT = 1;
|
||||
public static final int SIDE_BACK = 2;
|
||||
private final int side;
|
||||
|
||||
private org.elasticsearch.Version esVersion;
|
||||
|
||||
|
@ -52,29 +53,42 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
super(index, indexSettings, name, settings);
|
||||
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
||||
this.side = EdgeNGramTokenFilter.Side.getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
||||
this.side = parseSide(settings.get("side", "front"));
|
||||
this.esVersion = org.elasticsearch.Version.indexCreated(indexSettings);
|
||||
}
|
||||
|
||||
static int parseSide(String side) {
|
||||
switch(side) {
|
||||
case "front": return SIDE_FRONT;
|
||||
case "back": return SIDE_BACK;
|
||||
default: throw new IllegalArgumentException("invalid side: " + side);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
if (version.onOrAfter(Version.LUCENE_43) && esVersion.onOrAfter(org.elasticsearch.Version.V_0_90_2)) {
|
||||
TokenStream result = tokenStream;
|
||||
|
||||
// side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
|
||||
if (side == SIDE_BACK) {
|
||||
result = new ReverseStringFilter(result);
|
||||
}
|
||||
|
||||
if (version.onOrAfter(Version.LUCENE_4_3) && esVersion.onOrAfter(org.elasticsearch.Version.V_0_90_2)) {
|
||||
/*
|
||||
* We added this in 0.90.2 but 0.90.1 used LUCENE_43 already so we can not rely on the lucene version.
|
||||
* Yet if somebody uses 0.90.2 or higher with a prev. lucene version we should also use the deprecated version.
|
||||
*/
|
||||
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // always use 4.4 or higher
|
||||
TokenStream result = tokenStream;
|
||||
// side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
|
||||
if (side == Side.BACK) {
|
||||
result = new ReverseStringFilter(version, result);
|
||||
}
|
||||
result = new EdgeNGramTokenFilter(version, result, minGram, maxGram);
|
||||
if (side == Side.BACK) {
|
||||
result = new ReverseStringFilter(version, result);
|
||||
}
|
||||
return result;
|
||||
result = new EdgeNGramTokenFilter(result, minGram, maxGram);
|
||||
} else {
|
||||
result = new Lucene43EdgeNGramTokenFilter(result, minGram, maxGram);
|
||||
}
|
||||
return new EdgeNGramTokenFilter(version, tokenStream, side, minGram, maxGram);
|
||||
|
||||
// side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
|
||||
if (side == SIDE_BACK) {
|
||||
result = new ReverseStringFilter(result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -64,8 +64,8 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
if (version.onOrAfter(Version.LUCENE_43) && esVersion.onOrAfter(org.elasticsearch.Version.V_0_90_2)) {
|
||||
public Tokenizer create() {
|
||||
if (version.onOrAfter(Version.LUCENE_4_3) && esVersion.onOrAfter(org.elasticsearch.Version.V_0_90_2)) {
|
||||
/*
|
||||
* We added this in 0.90.2 but 0.90.1 used LUCENE_43 already so we can not rely on the lucene version.
|
||||
* Yet if somebody uses 0.90.2 or higher with a prev. lucene version we should also use the deprecated version.
|
||||
|
@ -76,11 +76,11 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
|||
+ " To obtain the same behavior as the previous version please use \"edgeNGram\" filter which still supports side=back"
|
||||
+ " in combination with a \"keyword\" tokenizer");
|
||||
}
|
||||
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // always use 4.4 or higher
|
||||
final Version version = this.version == Version.LUCENE_4_3 ? Version.LUCENE_4_4 : this.version; // always use 4.4 or higher
|
||||
if (matcher == null) {
|
||||
return new EdgeNGramTokenizer(version, reader, minGram, maxGram);
|
||||
return new EdgeNGramTokenizer(minGram, maxGram);
|
||||
} else {
|
||||
return new EdgeNGramTokenizer(version, reader, minGram, maxGram) {
|
||||
return new EdgeNGramTokenizer(minGram, maxGram) {
|
||||
@Override
|
||||
protected boolean isTokenChar(int chr) {
|
||||
return matcher.isTokenChar(chr);
|
||||
|
@ -88,7 +88,7 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
|||
};
|
||||
}
|
||||
} else {
|
||||
return new Lucene43EdgeNGramTokenizer(version, reader, side, minGram, maxGram);
|
||||
return new Lucene43EdgeNGramTokenizer(side, minGram, maxGram);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -39,7 +39,7 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
@Inject
|
||||
public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
this.articles = Analysis.parseArticles(env, settings, version);
|
||||
this.articles = Analysis.parseArticles(env, settings);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Engli
|
|||
@Inject
|
||||
public EnglishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new EnglishAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new EnglishAnalyzer(Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider<Finni
|
|||
@Inject
|
||||
public FinnishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new FinnishAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new FinnishAnalyzer(Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<French
|
|||
@Inject
|
||||
public FrenchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new FrenchAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new FrenchAnalyzer(Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,9 +20,10 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.fr.FrenchStemFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.tartarus.snowball.ext.FrenchStemmer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -39,11 +40,12 @@ public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
@Inject
|
||||
public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new FrenchStemFilter(new SetKeywordMarkerFilter(tokenStream, exclusions));
|
||||
tokenStream = new SetKeywordMarkerFilter(tokenStream, exclusions);
|
||||
return new SnowballFilter(tokenStream, new FrenchStemmer());
|
||||
}
|
||||
}
|
|
@ -38,9 +38,9 @@ public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Gali
|
|||
@Inject
|
||||
public GalicianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new GalicianAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new GalicianAnalyzer(Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<German
|
|||
@Inject
|
||||
public GermanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new GermanAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new GermanAnalyzer(Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -39,7 +39,7 @@ public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
@Inject
|
||||
public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version);
|
||||
this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -37,8 +37,8 @@ public class GreekAnalyzerProvider extends AbstractIndexAnalyzerProvider<GreekAn
|
|||
@Inject
|
||||
public GreekAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new GreekAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, GreekAnalyzer.getDefaultStopSet(), version));
|
||||
analyzer = new GreekAnalyzer(Analysis.parseStopWords(env, settings, GreekAnalyzer.getDefaultStopSet()));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,9 +38,9 @@ public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider<HindiAn
|
|||
@Inject
|
||||
public HindiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new HindiAnalyzer(version,
|
||||
Analysis.parseStopWords(env, settings, HindiAnalyzer.getDefaultStopSet(), version),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version));
|
||||
analyzer = new HindiAnalyzer(Analysis.parseStopWords(env, settings, HindiAnalyzer.getDefaultStopSet()),
|
||||
Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue